From ad40f1577c30d1fb2375f069a862586588fee744 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Tue, 5 Dec 2023 17:02:08 +0000 Subject: [PATCH] WIP auto convert old cons data to new cons data --- .../commands/create_new_con_data_from_old.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 hub/management/commands/create_new_con_data_from_old.py diff --git a/hub/management/commands/create_new_con_data_from_old.py b/hub/management/commands/create_new_con_data_from_old.py new file mode 100644 index 000000000..51c512ead --- /dev/null +++ b/hub/management/commands/create_new_con_data_from_old.py @@ -0,0 +1,85 @@ +from django.core.management.base import BaseCommand + +import pandas as pd +from mysoc_dataset import get_dataset_df + +from hub.models import Area, AreaData, AreaType, DataSet, DataType +from utils.constituency_mapping import convert_data_geographies + + +class Command(BaseCommand): + help = "Create new constituency data from old constituency data" + + new_con_at = AreaType.objects.get(code="WMC23") + + def add_arguments(self, parser): + parser.add_argument( + "-q", "--quiet", action="store_true", help="Silence progress bars." + ) + + def fetch_parl25_gss_map(self): + df = get_dataset_df( + repo_name="2025-constituencies", + package_name="parliament_con_2025", + version_name="latest", + file_name="parl_constituencies_2025.csv", + ) + self.parl25_gss_map = df.set_index("short_code").gss_code.to_dict() + + def apply_parl25_gss_to_df(self, df): + df["PARL25"] = df["PARL25"].apply( + lambda name: self.parl25_gss_map.get(name, None) + ) + return df + + def get_df_from_dataset(self, ds): + if ds.table == "areadata": + data = AreaData.objects.filter( + data_type__data_set=ds, area__area_type__code="WMC" + ) + + data_list = [] + for d in data: + data_list.append([d.area.gss, d.value()]) + + df = pd.DataFrame(data_list) + df.columns = ["PARL10", "value"] + + return df + + def create_data_for_new_con(self, ds, df): + dt = DataType.objects.get(data_set=ds) + for _, row in df.iterrows(): + a = Area.objects.get(gss=row["PARL25"], area_type__code="WMC23") + AreaData.objects.update_or_create( + area=a, + data_type=dt, + defaults={ + "float": row["value"], + }, + ) + ds.areas_available.add(self.new_con_at) + + def process_datasets(self): + sets = DataSet.objects.filter( + unit_distribution="people_in_area", + category__in=["place", "opinion"], + is_range=False, + ) + + for ds in sets: + print(ds.label) + df = self.get_df_from_dataset(ds) + new_df = convert_data_geographies( + df=df, + input_geography="PARL10", + output_geography="PARL25", + input_values_type=ds.unit_type, + ) + new_df = self.apply_parl25_gss_to_df(new_df) + self.create_data_for_new_con(ds, new_df) + + def handle(self, quiet=False, *args, **options): + self._quiet = quiet + self.fetch_parl25_gss_map() + self.process_datasets()