Skip to content

Commit

Permalink
rough version of imported that uses CSV config for imports
Browse files Browse the repository at this point in the history
This should enable add simple imports by adding rows to the CSV rather
than writing a new script
  • Loading branch information
struan committed Nov 26, 2024
1 parent 620f61f commit ed857c8
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 0 deletions.
2 changes: 2 additions & 0 deletions conf/imports.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,label,description,data_type,category,subcategory,release_date,source_label,source,source_type,data_url,table,default_value,exclude_countries,unit_type,unit_distribution,fill_blanks,is_public,is_filterable,is_shadeable,data_file,uses_gss,constituency_col,data_col,file_type,area_type
import_test,"Berkshire, Buckinghamshire and Oxfordshire Wildlife Trust members",,integer,movement,places_and_spaces,August 2024,"Data from Berkshire, Buckinghamshire and Oxfordshire Wildlife Trusts",,xlsl,,areadata,10,"Scotland, Northern Ireland",raw,people_in_area,FALSE,FALSE,FALSE,FALSE,berks_bucks_oxon_wildlife_trust_member_counts.xlsx,FALSE,Constituency,Total_Members,excel,WMC23
86 changes: 86 additions & 0 deletions hub/management/commands/import_from_comfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from django.conf import settings

import pandas as pd

from .base_importers import BaseImportFromDataFrameCommand


class Command(BaseImportFromDataFrameCommand):
help = "Import based on config"

config_file = settings.BASE_DIR / "conf" / "imports.csv"

defaults_cols = [
"label",
"data_type",
"category",
"subcategory",
"release_date",
"source_label",
"source",
"source_type",
"data_url",
"table",
"default_value",
"exclude_countries",
# "comparators",
"unit_type",
"unit_distribution",
"fill_blanks",
]

def add_arguments(self, parser):
super().add_arguments(parser)

parser.add_argument(
"--import_name", action="store", required=True, help="Name of import to run"
)

def setup(self, import_name):
df = pd.read_csv(self.config_file)

df.loc[df["name"] == import_name]
row = df.iloc[0]

self.message = f"Importing {row['label']}"
self.cons_row = row["constituency_col"]
self.cons_col = row["constituency_col"]
self.data_file = settings.BASE_DIR / "data" / row["data_file"]
self.file_type = row["file_type"]
self.area_type = row["area_type"]

if row["uses_gss"] == "TRUE":
self.uses_gss = True
else:
self.uses_gss = False

defaults = {}

for col in self.defaults_cols:
defaults[col] = row[col]

self.data_sets = {import_name: {"defaults": defaults, "col": row["data_col"]}}

def get_dataframe(self):
if self.file_type == "csv":
df = pd.read_csv(self.data_file)
elif self.file_type == "excel":
df = pd.read_excel(self.data_file)
else:
self.stderr.write(f"Unknown file type: {self.file_type}")
return None

df = df.astype({self.get_cons_col(): "str"})
return df

def handle(
self,
quiet=False,
skip_new_areatype_conversion=False,
import_name=None,
*args,
**options,
):
self.setup(import_name)

super().handle(quiet, skip_new_areatype_conversion, *args, **options)

0 comments on commit ed857c8

Please sign in to comment.