Skip to content

Commit

Permalink
fix import script errors due to new cons
Browse files Browse the repository at this point in the history
  • Loading branch information
struan committed Dec 11, 2023
1 parent 8a57599 commit dd35470
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 12 deletions.
7 changes: 5 additions & 2 deletions hub/management/commands/import_area_age_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
from tqdm import tqdm

from hub.models import Area, AreaData, DataSet, DataType
from hub.models import Area, AreaData, AreaType, DataSet, DataType


class Command(BaseCommand):
Expand Down Expand Up @@ -49,13 +49,16 @@ def handle(self, quiet=False, *args, **options):
df = df.loc[df["Date"] == 2020]
if not self._quiet:
self.stdout.write("Importing constituency age distribution")

area_type = AreaType.objects.get(code=self.area_type)
for index, row in tqdm(df.iterrows(), disable=self._quiet, total=df.shape[0]):
age_group = row["Age group"]
gss = row["ONSConstID"]

data_type, created = DataType.objects.update_or_create(
data_set=data_set,
name=f"ages_{age_group}",
area_type=area_type,
defaults={
"data_type": "percent",
"label": f"Ages {age_group}",
Expand All @@ -78,6 +81,6 @@ def handle(self, quiet=False, *args, **options):
)

for name, average in averages.items():
data_type = DataType.objects.get(name=name)
data_type = DataType.objects.get(name=name, area_type=area_type)
data_type.average = average
data_type.save()
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def process_data(self):
# Group by the area, and add the data from there
for area_name, data in tqdm(df.groupby("area")):
try:
area = Area.objects.get(name=area_name)
area = Area.objects.get(name=area_name, area_type__code=self.area_type)
except Area.DoesNotExist:
continue

Expand Down
4 changes: 3 additions & 1 deletion hub/management/commands/import_foe_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def process_data(self):
for index, row in tqdm(df.iterrows(), disable=self._quiet):
json_data, created = AreaData.objects.update_or_create(
data_type=self.data_types["constituency_foe_groups"],
area=Area.objects.get(name=row.constituency),
area=Area.objects.get(
name=row.constituency, area_type__code=self.area_type
),
json=row.groups,
)

Expand Down
1 change: 1 addition & 0 deletions hub/management/commands/import_gbgw_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def add_data_sets(self, df):
data_type, created = DataType.objects.update_or_create(
data_set=data_set,
name="constituency_gbgw_2022_event_count",
area_type=self.get_area_type(),
defaults={
"data_type": "integer",
"label": "Number of Great Big Green Week 2022 events",
Expand Down
14 changes: 11 additions & 3 deletions hub/management/commands/import_hnh_polling_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.conf import settings
from django.db.models import Avg, Max, Min
from django.db.models import Avg, FloatField, Max, Min
from django.db.models.functions import Cast, Coalesce

import pandas as pd
Expand Down Expand Up @@ -28,6 +28,7 @@ class Command(BaseImportFromDataFrameCommand):

help = "Import Hope Not Hate polling data, from February 2023"
message = "Importing Hope Not Hate polling data"
cast_field = FloatField

defaults = {
"data_type": "percent",
Expand Down Expand Up @@ -369,6 +370,7 @@ def extract_and_save_data(self):
data_type, created = DataType.objects.update_or_create(
data_set=data_set,
name=data_type_slug,
area_type=self.get_area_type(),
defaults={
"data_type": "percent",
"label": col["label"],
Expand Down Expand Up @@ -426,7 +428,10 @@ def update_averages(self):
file["data_set_name"], col["slug"]
)
self.log(f" {data_type_slug}")
data_type = DataType.objects.get(name=data_type_slug)
data_type = DataType.objects.get(
name=data_type_slug,
area_type__code=file.get("area_type", "WMC"),
)
average = (
AreaData.objects.filter(data_type=data_type)
.annotate(
Expand All @@ -450,7 +455,10 @@ def update_max_min(self):
file["data_set_name"], col["slug"]
)
self.log(f" {data_type_slug}")
data_type = DataType.objects.get(name=data_type_slug)
data_type = DataType.objects.get(
name=data_type_slug,
area_type__code=file.get("area_type", "WMC"),
)
base = (
AreaData.objects.filter(data_type=data_type)
.annotate(
Expand Down
12 changes: 10 additions & 2 deletions hub/management/commands/import_mp_job_titles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
from tqdm import tqdm

from hub.models import Area, DataSet, DataType, Person, PersonData
from hub.models import Area, AreaType, DataSet, DataType, Person, PersonData

CONSTITUENCY_CORRECTIONS_DICT = {
"Beverly and Holderness": "Beverley and Holderness",
Expand All @@ -24,6 +24,9 @@ def handle(self, quiet=False, *args, **options):
self._quiet = quiet
self.import_results()

def get_area_type(self):
return AreaType.objects.get(code="WMC")

def get_df(self):
df = pd.read_csv("data/mp_job_titles.csv", usecols=["Constituency", "Title"])
df = df.query(
Expand Down Expand Up @@ -51,10 +54,12 @@ def create_data_type(self):
"comparators": DataSet.string_comparators(),
},
)
mp_job_titles_ds.areas_available.add(self.get_area_type())

mp_job_titles, created = DataType.objects.update_or_create(
data_set=mp_job_titles_ds,
name="job_titles",
area_type=self.get_area_type(),
defaults={"data_type": "text"},
)

Expand All @@ -65,9 +70,12 @@ def get_results(self):
df = self.get_df()
results = {}
print("Matching MPs with titles")
area_type = self.get_area_type()
for index, row in df.iterrows():
try:
area = Area.objects.get(name__iexact=row.Constituency)
area = Area.objects.get(
name__iexact=row.Constituency, area_type=area_type
)
results[mps.get(area=area)] = row.Title
except Area.DoesNotExist:
print(f"Constituency: {row.Constituency} not found.")
Expand Down
3 changes: 2 additions & 1 deletion hub/management/commands/import_nt_property_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ def process_data(self):
self.stdout.write("Importing National Trust property data")

# Group by the area, and add the data from there
area_type = self.get_area_type()
for area_name, data in tqdm(df.groupby("area")):
try:
area = Area.objects.get(name=area_name)
area = Area.objects.get(name=area_name, area_type=area_type)
except Area.DoesNotExist:
continue

Expand Down
4 changes: 3 additions & 1 deletion hub/management/commands/import_renewables_polling_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd

from hub.management.commands.base_importers import BaseImportFromDataFrameCommand
from hub.models import AreaData, DataSet, DataType
from hub.models import AreaData, AreaType, DataSet, DataType

SUBCATEGORIES_DICT = {
"would-change-party": "voting",
Expand Down Expand Up @@ -91,6 +91,7 @@ def get_dataframe(self):

def add_data_sets(self, df):
order = 1
area_type = AreaType.objects.get(code=self.area_type)
for column in df.columns:
if column in ("id-name", "gss", "constituency-name"):
continue
Expand Down Expand Up @@ -125,6 +126,7 @@ def add_data_sets(self, df):
data_type, created = DataType.objects.update_or_create(
data_set=data_set,
name=column,
area_type=area_type,
defaults={
"data_type": "percent",
"label": label,
Expand Down
3 changes: 2 additions & 1 deletion hub/management/commands/import_wi_group_locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,10 @@ def process_data(self):
self.stdout.write("Importing women's institute group data")

# Group by the area, and add the data from there
area_type = self.get_area_type()
for area_name, data in tqdm(df.groupby("area")):
try:
area = Area.objects.get(name=area_name)
area = Area.objects.get(name=area_name, area_type=area_type)
except Area.DoesNotExist:
continue

Expand Down

0 comments on commit dd35470

Please sign in to comment.