-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fetch and save recent relevant MP written answers
This fetches yesterday's wrans XML from TheyWorkForYou and parses out relevant questions, defined by department, and updates this list for the MP to include the last three questions.
- Loading branch information
Showing
1 changed file
with
138 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
from collections import defaultdict | ||
from datetime import date, timedelta | ||
from functools import cache | ||
|
||
from django.core.management.base import BaseCommand | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
from tqdm import tqdm | ||
|
||
from hub.models import AreaType, DataSet, DataType, Person, PersonData | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Import relevant MP written questions" | ||
|
||
departments = ["Department for Environment, Food and Rural Affairs"] | ||
|
||
wrans_api_url = ( | ||
"https://www.theyworkforyou.com/api/getWrans?date=2024-09-16&output=json" | ||
) | ||
|
||
wrans_api_url = ( | ||
"https://www.theyworkforyou.com/pwdata/scrapedxml/wrans/answers{date}.xml" | ||
) | ||
|
||
def handle(self, quiet=False, *args, **options): | ||
self._quiet = quiet | ||
# Relevant votes | ||
if not quiet: | ||
print("Getting recent relevant wrans from parliament") | ||
wrans = self.get_all_relevant_wrans() | ||
|
||
self.data_types = self.create_data_types() | ||
self.import_results(wrans) | ||
|
||
@cache | ||
def get_existing_data(self): | ||
lookup = {} | ||
|
||
for data in PersonData.objects.filter( | ||
data_type__name="mp_wrans" | ||
).select_related("person"): | ||
lookup[data.person.external_id] = data.value() | ||
|
||
return lookup | ||
|
||
def get_all_relevant_wrans(self): | ||
yesterday = date.today() - timedelta(1) | ||
api_url = self.wrans_api_url.format(date=yesterday.isoformat()) | ||
response = requests.get(api_url) | ||
wrans = defaultdict(list) | ||
if response.status_code == 200: | ||
pw = response.text | ||
soup = BeautifulSoup(pw, "xml") | ||
mp = None | ||
question = {} | ||
department = None | ||
for tag in soup.publicwhip.children: | ||
if tag.name == "major-heading": | ||
department = tag.text.strip() | ||
if tag.name == "minor-heading": | ||
if question.get("department") in self.departments: | ||
wrans[mp].append(question) | ||
id = tag["id"].replace("uk.org.publicwhip/wrans/", "") | ||
question = { | ||
"department": department, | ||
"area": tag.text.strip(), | ||
"date": yesterday.isoformat(), | ||
"link": f"https://www.theyworkforyou.com/wrans/?id={id}", | ||
"id": id, | ||
} | ||
mp = None | ||
if tag.name == "ques": | ||
mp = tag["person_id"] | ||
mp = mp.replace("uk.org.publicwhip/person/", "") | ||
question["name"] = tag["speakername"] | ||
|
||
return wrans | ||
|
||
else: | ||
print(f"API didn't work - returned code: {str(response.status_code)}") | ||
return None | ||
|
||
def create_data_types(self): | ||
data_types = {} | ||
ds, created = DataSet.objects.update_or_create( | ||
name="mp_wrans", | ||
defaults={ | ||
"data_type": "json", | ||
"description": "Recent relevant Written Questions in Parliament", | ||
"label": "MP written questions", | ||
"release_date": date.today(), | ||
"source_label": "Data from UK Parliament.", | ||
"source": "https://parliament.uk/", | ||
"table": "people__persondata", | ||
"subcategory": "vote", | ||
"comparators": DataSet.in_comparators(), | ||
}, | ||
) | ||
|
||
ds.areas_available.add(AreaType.objects.get(code="WMC23")) | ||
|
||
data_type, created = DataType.objects.update_or_create( | ||
data_set=ds, | ||
name="mp_wrans", | ||
label="MP written questions", | ||
defaults={"data_type": "json"}, | ||
) | ||
data_types["mp_wrans"] = data_type | ||
|
||
return data_types | ||
|
||
def import_results(self, wrans): | ||
if not self._quiet: | ||
print("Adding MP data on Written Answers") | ||
|
||
data = self.get_existing_data() | ||
|
||
for mp_id, questions in tqdm(wrans.items(), disable=self._quiet): | ||
mp = Person.objects.get(external_id=mp_id) | ||
q_to_add = [] | ||
if data.get(mp_id): | ||
questions.extend(data[mp_id]) | ||
for question in questions: | ||
if question["id"] not in [q["id"] for q in q_to_add]: | ||
q_to_add.append(question) | ||
else: | ||
q_to_add = questions | ||
|
||
q_to_add = q_to_add[:3] | ||
person_data, created = PersonData.objects.update_or_create( | ||
person=mp, | ||
data_type=self.data_types["mp_wrans"], | ||
defaults={ | ||
"json": q_to_add, | ||
}, | ||
) |