Skip to content

Commit

Permalink
WIP auto convert old cons data to new cons data
Browse files Browse the repository at this point in the history
  • Loading branch information
struan committed Dec 5, 2023
1 parent 27202e4 commit ad40f15
Showing 1 changed file with 85 additions and 0 deletions.
85 changes: 85 additions & 0 deletions hub/management/commands/create_new_con_data_from_old.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from django.core.management.base import BaseCommand

import pandas as pd
from mysoc_dataset import get_dataset_df

from hub.models import Area, AreaData, AreaType, DataSet, DataType
from utils.constituency_mapping import convert_data_geographies


class Command(BaseCommand):
help = "Create new constituency data from old constituency data"

new_con_at = AreaType.objects.get(code="WMC23")

def add_arguments(self, parser):
parser.add_argument(
"-q", "--quiet", action="store_true", help="Silence progress bars."
)

def fetch_parl25_gss_map(self):
df = get_dataset_df(
repo_name="2025-constituencies",
package_name="parliament_con_2025",
version_name="latest",
file_name="parl_constituencies_2025.csv",
)
self.parl25_gss_map = df.set_index("short_code").gss_code.to_dict()

def apply_parl25_gss_to_df(self, df):
df["PARL25"] = df["PARL25"].apply(
lambda name: self.parl25_gss_map.get(name, None)
)
return df

def get_df_from_dataset(self, ds):
if ds.table == "areadata":
data = AreaData.objects.filter(
data_type__data_set=ds, area__area_type__code="WMC"
)

data_list = []
for d in data:
data_list.append([d.area.gss, d.value()])

df = pd.DataFrame(data_list)
df.columns = ["PARL10", "value"]

return df

def create_data_for_new_con(self, ds, df):
dt = DataType.objects.get(data_set=ds)
for _, row in df.iterrows():
a = Area.objects.get(gss=row["PARL25"], area_type__code="WMC23")
AreaData.objects.update_or_create(
area=a,
data_type=dt,
defaults={
"float": row["value"],
},
)
ds.areas_available.add(self.new_con_at)

def process_datasets(self):
sets = DataSet.objects.filter(
unit_distribution="people_in_area",
category__in=["place", "opinion"],
is_range=False,
)

for ds in sets:
print(ds.label)
df = self.get_df_from_dataset(ds)
new_df = convert_data_geographies(
df=df,
input_geography="PARL10",
output_geography="PARL25",
input_values_type=ds.unit_type,
)
new_df = self.apply_parl25_gss_to_df(new_df)
self.create_data_for_new_con(ds, new_df)

def handle(self, quiet=False, *args, **options):
self._quiet = quiet
self.fetch_parl25_gss_map()
self.process_datasets()

0 comments on commit ad40f15

Please sign in to comment.