Skip to content

Commit

Permalink
Merge pull request #5 from AdrianoKF/4-infection-data-parsing-error
Browse files Browse the repository at this point in the history
Update infection data parser for new web page layout
  • Loading branch information
AdrianoKF authored Sep 17, 2021
2 parents 35d5232 + 62904f4 commit 2b453f4
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions custom_components/home_assistant_covid19_augsburg/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ async def crawl_incidence(self) -> IncidenceData:
)
soup = await self._fetch(url)

match = soup.find(class_="frame--type-textpic")
text = match.p.text
match = soup.find(id="c1067628")
text = match.text.strip()
_log.debug(f"Infection data text: {text}")

matches = re.search(r"(\d+,\d+)\sNeuinfektion", text)
Expand All @@ -120,18 +120,15 @@ async def crawl_incidence(self) -> IncidenceData:
incidence = parse_num(matches.group(1), t=float)
_log.debug(f"Parsed incidence: {incidence}")

text = match.h2.text
matches = re.search(r"\((\d+)\. (\w+).*\)", text)
match = soup.find(id="c1052517")
text = match.text.strip()
matches = re.search(r"Stand: (\d+)\. (\w+) (\d{4})", text)
if not matches:
raise ValueError(f"Could not extract date from scraped web page, {text=}")

date = parse_date(matches.group(1), matches.group(2))
date = parse_date(matches.group(1), matches.group(2), matches.group(3))
_log.debug(f"Parsed date: {date}")

match = match.find_next_sibling(class_="frame--type-textpic")
text = match.text
_log.debug(f"Infection counts text: {text}")

regexes = [
r"Insgesamt: (?P<total_cases>[0-9.]+)",
r"genesen: (?P<num_recovered>[0-9.]+)",
Expand Down

0 comments on commit 2b453f4

Please sign in to comment.