Skip to content

Commit

Permalink
Update Moray scraper (#1789)
Browse files Browse the repository at this point in the history
* add Moray Council

* fix Moray scraper

* fix docs

* run update_docu_links

* undo bin_id

* Update moray_gov_uk.py

* reformatting

---------

Co-authored-by: Dewet Diener <[email protected]>
Co-authored-by: 5ila5 <[email protected]>
  • Loading branch information
3 people authored Feb 19, 2024
1 parent f8cfbcd commit 12947aa
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 32 deletions.
Original file line number Diff line number Diff line change
@@ -1,32 +1,29 @@
import logging
from datetime import datetime

import requests
from bs4 import BeautifulSoup
from waste_collection_schedule import Collection # type: ignore[attr-defined]

_LOGGER = logging.getLogger(__name__)

TITLE = "Moray Council"
DESCRIPTION = "Source for Moray Council, UK."
URL = "https://moray.gov.uk"
TEST_CASES = {
"test_str": {"id": "00013734"},
"test_int": {"id": 60216},
"Test_001": {"id": "00013734"},
"Test_002": {"id": 60216},
}
TEXT_MAP = {
"images/green_bin.png": "Refuse (Green)",
"images/brown_bin.png": "Garden and Kitchen Waste (Brown)",
"images/purple_bin.png": "Cans and Plastic (Purple)",
"images/blue_bin.png": "Paper and Card (Blue)",
"images/orange_box_glass_bag.png": "Glass (Orange)",
"G": "Green Refuse Bin",
"B": "Brown Garden and Kitchen Waste Bin",
"P": "Purple Cans and Plastic Bin",
"C": "Blue Paper and Card Bin",
"O": "Glass Container",
}
ICON_MAP = {
"images/green_bin.png": "mdi:trash-can",
"images/brown_bin.png": "mdi:compost",
"images/purple_bin.png": "mdi:recycle",
"images/blue_bin.png": "mdi:newspaper-variant-multiple",
"images/orange_box_glass_bag.png": "mdi:bottle-wine",
"G": "mdi:trash-can",
"B": "mdi:recycle",
"P": "mdi:house",
"C": "mdi:bulb",
"O": "mdi:glass",
}


Expand All @@ -35,28 +32,38 @@ def __init__(self, id):
self._id = str(id).zfill(8)

def fetch(self):
response = requests.Session().get(
f"https://bindayfinder.moray.gov.uk/cal_2024_view.php?id={self._id}"
year = datetime.today().year
response = requests.get(
f"https://bindayfinder.moray.gov.uk/cal_{year}_view.php",
params={"id": self._id},
)
response.raise_for_status()
if response.status_code != 200:
# fall back to known good calendar URL
response = requests.get(
"https://bindayfinder.moray.gov.uk/cal_2024_view.php",
params={"id": self._id},
)
soup = BeautifulSoup(response.text, "html.parser")

entries = []

for month in soup.findAll("div", class_="cal_month_box"):
parsed_date = None
for div in month.findAll("div"):
if "disp_day_area" in div["class"]:
parsed_date = datetime.strptime(div.text, "%a %d %B %Y").date()
elif "disp_bins_cont" in div["class"]:
for i in div.findAll("img"):
for month_container in soup.findAll("div", class_="month-container"):
for div in month_container.findAll("div"):
if "month-header" in div["class"]:
month = div.text
elif div["class"] and div["class"][0] in ["B", "GPOC", "GBPOC"]:
bins = div["class"][0]
dom = int(div.text)
parsed_date = datetime.strptime(
f"{dom} {month} {year}", "%d %B %Y"
).date()
for i in bins:
entries.append(
Collection(
date=parsed_date,
t=TEXT_MAP.get(i["src"]),
icon=ICON_MAP.get(i["src"]),
t=TEXT_MAP.get(i),
icon=ICON_MAP.get(i),
)
)
if not entries:
_LOGGER.warning(f"No collection days found at {response.url}")

return entries
6 changes: 3 additions & 3 deletions doc/source/moray_gov_uk.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ waste_collection_schedule:
sources:
- name: moray_gov_uk
args:
id: "01234567"
id: UNIQUE_PROPERTY_ID
```
### Configuration Variables
Expand All @@ -18,8 +18,8 @@ waste_collection_schedule:
*(string) (required)*
#### How to find your unique property `id`
Your `id` is the 8-digit number at the end of the url when looking up your collection schedule on the [Moray Council Bin Day Finder](https://bindayfinder.moray.gov.uk/) web site.
#### How to find your `id`
Your `id` is the collection of numbers at the end of the url when looking up your collection schedule on the [Moray Council Bin Day Finder](https://bindayfinder.moray.gov.uk/) web site.

For example: _https://bindayfinder.moray.gov.uk/disp_bins.php?id=`00027199`_

0 comments on commit 12947aa

Please sign in to comment.