Skip to content

Commit

Permalink
fetch and save recent relevant MP written answers
Browse files Browse the repository at this point in the history
This fetches yesterday's wrans XML from TheyWorkForYou and parses out
relevant questions, defined by department, and updates this list for the
MP to include the last three questions.
  • Loading branch information
struan committed Sep 17, 2024
1 parent dc7dc4d commit 1ca5bd4
Showing 1 changed file with 138 additions and 0 deletions.
138 changes: 138 additions & 0 deletions hub/management/commands/import_mp_wrans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from collections import defaultdict
from datetime import date, timedelta
from functools import cache

from django.core.management.base import BaseCommand

import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

from hub.models import AreaType, DataSet, DataType, Person, PersonData


class Command(BaseCommand):
help = "Import relevant MP written questions"

departments = ["Department for Environment, Food and Rural Affairs"]

wrans_api_url = (
"https://www.theyworkforyou.com/api/getWrans?date=2024-09-16&output=json"
)

wrans_api_url = (
"https://www.theyworkforyou.com/pwdata/scrapedxml/wrans/answers{date}.xml"
)

def handle(self, quiet=False, *args, **options):
self._quiet = quiet
# Relevant votes
if not quiet:
print("Getting recent relevant wrans from parliament")
wrans = self.get_all_relevant_wrans()

self.data_types = self.create_data_types()
self.import_results(wrans)

@cache
def get_existing_data(self):
lookup = {}

for data in PersonData.objects.filter(
data_type__name="mp_wrans"
).select_related("person"):
lookup[data.person.external_id] = data.value()

return lookup

def get_all_relevant_wrans(self):
yesterday = date.today() - timedelta(1)
api_url = self.wrans_api_url.format(date=yesterday.isoformat())
response = requests.get(api_url)
wrans = defaultdict(list)
if response.status_code == 200:
pw = response.text
soup = BeautifulSoup(pw, "xml")
mp = None
question = {}
department = None
for tag in soup.publicwhip.children:
if tag.name == "major-heading":
department = tag.text.strip()
if tag.name == "minor-heading":
if question.get("department") in self.departments:
wrans[mp].append(question)
id = tag["id"].replace("uk.org.publicwhip/wrans/", "")
question = {
"department": department,
"area": tag.text.strip(),
"date": yesterday.isoformat(),
"link": f"https://www.theyworkforyou.com/wrans/?id={id}",
"id": id,
}
mp = None
if tag.name == "ques":
mp = tag["person_id"]
mp = mp.replace("uk.org.publicwhip/person/", "")
question["name"] = tag["speakername"]

return wrans

else:
print(f"API didn't work - returned code: {str(response.status_code)}")
return None

def create_data_types(self):
data_types = {}
ds, created = DataSet.objects.update_or_create(
name="mp_wrans",
defaults={
"data_type": "json",
"description": "Recent relevant Written Questions in Parliament",
"label": "MP written questions",
"release_date": date.today(),
"source_label": "Data from UK Parliament.",
"source": "https://parliament.uk/",
"table": "people__persondata",
"subcategory": "vote",
"comparators": DataSet.in_comparators(),
},
)

ds.areas_available.add(AreaType.objects.get(code="WMC23"))

data_type, created = DataType.objects.update_or_create(
data_set=ds,
name="mp_wrans",
label="MP written questions",
defaults={"data_type": "json"},
)
data_types["mp_wrans"] = data_type

return data_types

def import_results(self, wrans):
if not self._quiet:
print("Adding MP data on Written Answers")

data = self.get_existing_data()

for mp_id, questions in tqdm(wrans.items(), disable=self._quiet):
mp = Person.objects.get(external_id=mp_id)
q_to_add = []
if data.get(mp_id):
questions.extend(data[mp_id])
for question in questions:
if question["id"] not in [q["id"] for q in q_to_add]:
q_to_add.append(question)
else:
q_to_add = questions

q_to_add = q_to_add[:3]
person_data, created = PersonData.objects.update_or_create(
person=mp,
data_type=self.data_types["mp_wrans"],
defaults={
"json": q_to_add,
},
)

0 comments on commit 1ca5bd4

Please sign in to comment.