-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP auto convert old cons data to new cons data
- Loading branch information
Showing
1 changed file
with
85 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from django.core.management.base import BaseCommand | ||
|
||
import pandas as pd | ||
from mysoc_dataset import get_dataset_df | ||
|
||
from hub.models import Area, AreaData, AreaType, DataSet, DataType | ||
from utils.constituency_mapping import convert_data_geographies | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Create new constituency data from old constituency data" | ||
|
||
new_con_at = AreaType.objects.get(code="WMC23") | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"-q", "--quiet", action="store_true", help="Silence progress bars." | ||
) | ||
|
||
def fetch_parl25_gss_map(self): | ||
df = get_dataset_df( | ||
repo_name="2025-constituencies", | ||
package_name="parliament_con_2025", | ||
version_name="latest", | ||
file_name="parl_constituencies_2025.csv", | ||
) | ||
self.parl25_gss_map = df.set_index("short_code").gss_code.to_dict() | ||
|
||
def apply_parl25_gss_to_df(self, df): | ||
df["PARL25"] = df["PARL25"].apply( | ||
lambda name: self.parl25_gss_map.get(name, None) | ||
) | ||
return df | ||
|
||
def get_df_from_dataset(self, ds): | ||
if ds.table == "areadata": | ||
data = AreaData.objects.filter( | ||
data_type__data_set=ds, area__area_type__code="WMC" | ||
) | ||
|
||
data_list = [] | ||
for d in data: | ||
data_list.append([d.area.gss, d.value()]) | ||
|
||
df = pd.DataFrame(data_list) | ||
df.columns = ["PARL10", "value"] | ||
|
||
return df | ||
|
||
def create_data_for_new_con(self, ds, df): | ||
dt = DataType.objects.get(data_set=ds) | ||
for _, row in df.iterrows(): | ||
a = Area.objects.get(gss=row["PARL25"], area_type__code="WMC23") | ||
AreaData.objects.update_or_create( | ||
area=a, | ||
data_type=dt, | ||
defaults={ | ||
"float": row["value"], | ||
}, | ||
) | ||
ds.areas_available.add(self.new_con_at) | ||
|
||
def process_datasets(self): | ||
sets = DataSet.objects.filter( | ||
unit_distribution="people_in_area", | ||
category__in=["place", "opinion"], | ||
is_range=False, | ||
) | ||
|
||
for ds in sets: | ||
print(ds.label) | ||
df = self.get_df_from_dataset(ds) | ||
new_df = convert_data_geographies( | ||
df=df, | ||
input_geography="PARL10", | ||
output_geography="PARL25", | ||
input_values_type=ds.unit_type, | ||
) | ||
new_df = self.apply_parl25_gss_to_df(new_df) | ||
self.create_data_for_new_con(ds, new_df) | ||
|
||
def handle(self, quiet=False, *args, **options): | ||
self._quiet = quiet | ||
self.fetch_parl25_gss_map() | ||
self.process_datasets() |