Skip to content

Commit

Permalink
update last election data import to use parliament api for by elections
Browse files Browse the repository at this point in the history
API data is updated in a timely manner compared to the spreadsheet we
were relying on.

Fixes #445
  • Loading branch information
struan committed Feb 20, 2024
1 parent 93f9527 commit 62addcb
Showing 1 changed file with 82 additions and 48 deletions.
130 changes: 82 additions & 48 deletions hub/management/commands/import_last_election_data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import re
from datetime import date

from django.conf import settings
from django.core.management.base import BaseCommand

import numpy as np
import pandas as pd
import requests
from dateutil import parser
from tqdm import tqdm

from hub.models import Area, AreaData, DataSet, DataType
from hub.models import Area, AreaData, AreaType, DataSet, DataType


class Command(BaseCommand):
Expand All @@ -19,9 +22,8 @@ class Command(BaseCommand):
general_election_source_file = (
settings.BASE_DIR / "data" / "2019_general_election.csv"
)
# https://commonslibrary.parliament.uk/research-briefings/cbp-9225/
# https://researchbriefings.files.parliament.uk/documents/CBP-9225/Data-file.xlsx
by_election_source_file = settings.BASE_DIR / "data" / "byelections.xlsx"

by_election_api_url = "https://lda.data.parliament.uk/electionresults.json?_sort=-election.date&_pageSize=40"

area_type = "WMC"

Expand Down Expand Up @@ -82,6 +84,9 @@ def handle(self, quiet=False, *args, **options):
self.data_types = self.create_data_types()
self.import_results(df)

def get_area_type(self):
return AreaType.objects.get(code=self.area_type)

def get_general_election_data(self):
df = pd.read_csv(
self.general_election_source_file,
Expand Down Expand Up @@ -113,50 +118,76 @@ def get_general_election_data(self):
return df

def get_by_elections_data(self):
gss_lookup = {
row["Constituency name"].lower(): row["ONS_id"]
for index, row in pd.read_excel(
self.by_election_source_file,
skiprows=1,
usecols=["ONS_id", "Constituency name"],
).iterrows()
}
df = pd.read_excel(
self.by_election_source_file,
sheet_name=1,
skiprows=1,
usecols=["Party", "Constituency", "Date of By-election", "Votes_by"],
)
df["Constituency"] = df["Constituency"].apply(
lambda name: gss_lookup[name.lower()]
)
df.columns = ["party", "gss", "date", "votes"]
first_second_parties = []
for gss, data in df.groupby("gss"):
votes = sorted(data.votes.tolist())
first = votes.pop() # NOQA
second = votes.pop() # NOQA
first_party = data.query("votes == @first").iloc[0, 0]
second_party = data.query("votes == @second").iloc[0, 0]
first_second_parties.append([gss, first_party, second_party])
first_second_parties_df = pd.DataFrame(
first_second_parties, columns=["gss", "first_party", "second_party"]
).set_index("gss")
df.party = df.party.apply(
lambda x: self.party_translate_down_dict.get(x, "other")
)
df = (
df.groupby(["gss", "date"], group_keys=True)
.apply(
lambda x: x.groupby("party", group_keys=False).sum(numeric_only=True).T
)
.fillna(0)
.reset_index()
.drop(columns="level_2")
)
df = df.set_index("gss")
df.date = df.date.astype(str)
df = df.join(first_second_parties_df)
response = requests.get(self.by_election_api_url)

by_election_data = {}

if response.status_code == 200:
elections = response.json()
election_ids = []
for election in elections["result"]["items"]:
desc = election["election"]["label"]["_value"]
if desc == "2019 General Election":
break
election_id = election["_about"]
election_id = election_id.replace(
"http://data.parliament.uk/resources/", ""
)
election_ids.append(election_id)

for election_id in election_ids:
uri = (
f"https://lda.data.parliament.uk/electionresults/{election_id}.json"
)
election_response = requests.get(uri)
if election_response.status_code == 200:
election_data = election_response.json()
election_data = election_data["result"]["primaryTopic"]
label = election_data["election"]["label"]["_value"]
election_date = re.match(r"(\d+-\w+-\d+) .*", label)
election_date = election_date.group(1)
election_date = parser.parse(election_date)
cons = election_data["constituency"]["label"]["_value"]
a = Area.get_by_name(cons)

result = {
"date": election_date.date().isoformat(),
# "uri": uri,
# "label": election_data["election"]["label"]["_value"],
# "Constituency": a.gss,
# "Constituency name": cons,
}

for candidate in election_data["candidate"]:
party = candidate["party"]["_value"].lower()
party_name = self.party_translate_up_dict.get(party, "other")

# consolidate the minor parties
if party_name == "other":
count = result.get("other", 0)
count = count + candidate["numberOfVotes"]
result["other"] = count
else:
result[party] = candidate["numberOfVotes"]

sorted_results = sorted(
election_data["candidate"],
key=lambda c: c["numberOfVotes"],
reverse=True,
)
result["first_party"] = self.party_translate_up_dict.get(
sorted_results[0]["party"]["_value"].lower(),
sorted_results[0]["party"]["_value"].lower(),
)
result["second_party"] = self.party_translate_up_dict.get(
sorted_results[1]["party"]["_value"].lower(),
sorted_results[1]["party"]["_value"].lower(),
)

by_election_data[a.gss] = result

df = pd.DataFrame.from_dict(by_election_data, orient="index")

return df

def get_last_election_df(self):
Expand Down Expand Up @@ -195,6 +226,7 @@ def create_data_types(self):
"comparators": DataSet.in_comparators(),
},
)
second_party_ds.areas_available.add(self.get_area_type())

second_party, created = DataType.objects.update_or_create(
data_set=second_party_ds,
Expand All @@ -217,6 +249,8 @@ def create_data_types(self):
},
)

last_election_ds.areas_available.add(self.get_area_type())

last_election, created = DataType.objects.update_or_create(
data_set=last_election_ds,
name="last_election",
Expand Down

0 comments on commit 62addcb

Please sign in to comment.