From ed857c83a70b2b84ad403680aad30610c49b5f41 Mon Sep 17 00:00:00 2001 From: Struan Donald Date: Thu, 19 Sep 2024 16:46:48 +0200 Subject: [PATCH] rough version of imported that uses CSV config for imports This should enable add simple imports by adding rows to the CSV rather than writing a new script --- conf/imports.csv | 2 + hub/management/commands/import_from_comfig.py | 86 +++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 conf/imports.csv create mode 100644 hub/management/commands/import_from_comfig.py diff --git a/conf/imports.csv b/conf/imports.csv new file mode 100644 index 000000000..b66902c9c --- /dev/null +++ b/conf/imports.csv @@ -0,0 +1,2 @@ +name,label,description,data_type,category,subcategory,release_date,source_label,source,source_type,data_url,table,default_value,exclude_countries,unit_type,unit_distribution,fill_blanks,is_public,is_filterable,is_shadeable,data_file,uses_gss,constituency_col,data_col,file_type,area_type +import_test,"Berkshire, Buckinghamshire and Oxfordshire Wildlife Trust members",,integer,movement,places_and_spaces,August 2024,"Data from Berkshire, Buckinghamshire and Oxfordshire Wildlife Trusts",,xlsl,,areadata,10,"Scotland, Northern Ireland",raw,people_in_area,FALSE,FALSE,FALSE,FALSE,berks_bucks_oxon_wildlife_trust_member_counts.xlsx,FALSE,Constituency,Total_Members,excel,WMC23 diff --git a/hub/management/commands/import_from_comfig.py b/hub/management/commands/import_from_comfig.py new file mode 100644 index 000000000..7b7ce156e --- /dev/null +++ b/hub/management/commands/import_from_comfig.py @@ -0,0 +1,86 @@ +from django.conf import settings + +import pandas as pd + +from .base_importers import BaseImportFromDataFrameCommand + + +class Command(BaseImportFromDataFrameCommand): + help = "Import based on config" + + config_file = settings.BASE_DIR / "conf" / "imports.csv" + + defaults_cols = [ + "label", + "data_type", + "category", + "subcategory", + "release_date", + "source_label", + "source", + "source_type", + "data_url", + "table", + "default_value", + "exclude_countries", + # "comparators", + "unit_type", + "unit_distribution", + "fill_blanks", + ] + + def add_arguments(self, parser): + super().add_arguments(parser) + + parser.add_argument( + "--import_name", action="store", required=True, help="Name of import to run" + ) + + def setup(self, import_name): + df = pd.read_csv(self.config_file) + + df.loc[df["name"] == import_name] + row = df.iloc[0] + + self.message = f"Importing {row['label']}" + self.cons_row = row["constituency_col"] + self.cons_col = row["constituency_col"] + self.data_file = settings.BASE_DIR / "data" / row["data_file"] + self.file_type = row["file_type"] + self.area_type = row["area_type"] + + if row["uses_gss"] == "TRUE": + self.uses_gss = True + else: + self.uses_gss = False + + defaults = {} + + for col in self.defaults_cols: + defaults[col] = row[col] + + self.data_sets = {import_name: {"defaults": defaults, "col": row["data_col"]}} + + def get_dataframe(self): + if self.file_type == "csv": + df = pd.read_csv(self.data_file) + elif self.file_type == "excel": + df = pd.read_excel(self.data_file) + else: + self.stderr.write(f"Unknown file type: {self.file_type}") + return None + + df = df.astype({self.get_cons_col(): "str"}) + return df + + def handle( + self, + quiet=False, + skip_new_areatype_conversion=False, + import_name=None, + *args, + **options, + ): + self.setup(import_name) + + super().handle(quiet, skip_new_areatype_conversion, *args, **options)