diff --git a/hub/management/commands/generate_csv_of_available_datasets.py b/hub/management/commands/generate_csv_of_available_datasets.py index e21ef0e42..d5df3f552 100644 --- a/hub/management/commands/generate_csv_of_available_datasets.py +++ b/hub/management/commands/generate_csv_of_available_datasets.py @@ -4,7 +4,7 @@ import pandas as pd from tqdm import tqdm -from hub.models import Area, DataSet, Person +from hub.models import DataSet class Command(BaseCommand): @@ -22,38 +22,6 @@ def handle(self, quiet=False, *args, **options): df = self.build_dataframe() df.to_csv(self.out_file) - def get_area_data(self): - area_details = [] - for area in Area.objects.filter(area_type__code="WMC"): - try: - mp = Person.objects.get(area=area) - except Person.DoesNotExist: - print(f"Person does not exist for area {area.gss} {area.name}") - area_details.append([area.gss, area.name, area.mapit_id, mp.name]) - return pd.DataFrame( - area_details, - columns=["Area GSS code", "Area name", "Area MapIt ID", "MP name"], - ).set_index("Area GSS code") - - def create_dataset_df(self, data, label, table): - df_data = [] - for datum in data: - if table == "areadata": - area = datum.area - else: - area = datum.person.area - df_data.append([area.gss, datum.value()]) - df = pd.DataFrame(df_data, columns=["Area GSS code", label]) - # Deal with any multiples, by concatenating them into one string - df = df.groupby("Area GSS code").agg( - { - "Area GSS code": "first", - label: lambda data_list: ", ".join([str(x) for x in data_list]), - } - ) - df = df.set_index("Area GSS code") - return df - def build_dataframe(self): # Next, iterate through each (filterable) data set in the db datasets = [] @@ -72,6 +40,8 @@ def build_dataframe(self): data_set.is_public, "WMC" in areas_available, "WMC23" in areas_available, + "STC" in areas_available, + "DIS" in areas_available, ] ) @@ -85,6 +55,8 @@ def build_dataframe(self): "Public", "2010 Cons", "2024 Cons", + "Single Tier Councils", + "District Councils", ], ) return df