Merge branch 'master' into montreal_est_scraper

opencivicdata · Oct 29, 2024 · 707e503 · 707e503
2 parents 6f52701 + a370b4c
commit 707e503
Show file tree

Hide file tree

Showing 148 changed files with 1,311 additions and 1,310 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml
@@ -0,0 +1,35 @@
+# The pull_request_target workflow trigger is dangerous. Do not add unrelated logic to this workflow.
+# https://securitylab.github.com/research/github-actions-preventing-pwn-requests/
+# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target
+name: Auto-merge
+on: pull_request_target
+permissions:
+  pull-requests: write  # to approve the PR
+  contents: write  # to merge the PR
+jobs:
+  dependabot:
+    if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' }}
+    runs-on: ubuntu-latest
+    steps:
+      - id: dependabot-metadata
+        uses: dependabot/fetch-metadata@v2
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+      - if: ${{ steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major' || steps.dependabot-metadata.outputs.package-ecosystem == 'github_actions' }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: gh pr review --approve ${{ github.event.pull_request.html_url }}
+      - if: ${{ steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major' || steps.dependabot-metadata.outputs.package-ecosystem == 'github_actions' }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: gh pr merge --auto --squash ${{ github.event.pull_request.html_url }}
+  precommit:
+    if: ${{ github.event.pull_request.user.login == 'pre-commit-ci[bot]' }}
+    runs-on: ubuntu-latest
+    steps:
+      - env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: gh pr review --approve ${{ github.event.pull_request.html_url }}
+      - env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: gh pr merge --auto --squash ${{ github.event.pull_request.html_url }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,16 +1,17 @@
 ci:
   autoupdate_schedule: quarterly
+  skip: [pip-compile]
+default_language_version:
+    python: python3.10
 repos:
-  - repo: https://github.com/psf/black
-    rev: 24.3.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.9
     hooks:
-      - id: black
-  - repo: https://github.com/pycqa/flake8
-    rev: 7.0.0
+      - id: ruff
+      - id: ruff-format
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    rev: 0.4.18
     hooks:
-      - id: flake8
-        additional_dependencies: [flake8-comprehensions]
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
+      - id: pip-compile
+        name: pip-compile requirements.in
+        args: [requirements.in, -o, requirements.txt]
diff --git a/ca/people.py b/ca/people.py
@@ -59,7 +59,7 @@ def scrape_people(self, rows, gender):
                 photo_response = self.get(photo)
                 if (
                     photo_response.status_code == 200
-                    and hashlib.sha1(photo_response.content).hexdigest() not in IMAGE_PLACEHOLDER_SHA1
+                    and hashlib.sha1(photo_response.content).hexdigest() not in IMAGE_PLACEHOLDER_SHA1  # noqa: S324 # non-cryptographic
                 ):
                     m.image = photo
 
@@ -119,7 +119,7 @@ def scrape_people(self, rows, gender):
             ):
                 note = "constituency"
                 if i:
-                    note += " ({})".format(i + 1)
+                    note += f" ({i + 1})"
 
                 address = constituency_office_el.xpath("./p[1]")[0]
                 address = address.text_content().strip().splitlines()

diff --git a/ca_ab/people.py b/ca_ab/people.py
@@ -24,7 +24,7 @@
 
 
 def get_party(abbr):
-    """Return full party name from abbreviation"""
+    """Return full party name from abbreviation."""
     return PARTIES[abbr]
 
 
@@ -59,8 +59,8 @@ def scrape(self):
         field_names = next(reader)
         for name in OFFICE_FIELDS:
             assert field_names.count(name) == 2
-            field_names[field_names.index(name)] = "{} 1".format(name)
-            field_names[field_names.index(name)] = "{} 2".format(name)
+            field_names[field_names.index(name)] = f"{name} 1"
+            field_names[field_names.index(name)] = f"{name} 2"
         rows = [dict(zip_longest(field_names, row)) for row in reader]
         assert len(rows), "No members found"
         for mla in rows:
@@ -76,8 +76,8 @@ def scrape(self):
             row_xpath = '//td[normalize-space()="{}"]/..'.format(
                 mla["Constituency Name"],
             )
-            (detail_url,) = index.xpath("{}//a/@href".format(row_xpath))
-            (photo_url,) = index.xpath("{}//img/@src".format(row_xpath))
+            (detail_url,) = index.xpath(f"{row_xpath}//a/@href")
+            (photo_url,) = index.xpath(f"{row_xpath}//img/@src")
             district = mla["Constituency Name"]
             if district == "Calgary-Bhullar-McCall":
                 district = "Calgary-McCall"
@@ -108,10 +108,10 @@ def scrape(self):
 
             for suffix, note in addresses:
                 for key, contact_type in (("Phone", "voice"), ("Fax", "fax")):
-                    value = mla["{} Number {}".format(key, suffix)]
+                    value = mla[f"{key} Number {suffix}"]
                     if value and value != "Pending":
                         p.add_contact(contact_type, value, note)
-                address = ", ".join(filter(bool, [mla["{} {}".format(field, suffix)] for field in ADDRESS_FIELDS]))
+                address = ", ".join(filter(bool, [mla[f"{field} {suffix}"] for field in ADDRESS_FIELDS]))
                 if address:
                     p.add_contact("address", address, note)
 

diff --git a/ca_ab_grande_prairie/__init__.py b/ca_ab_grande_prairie/__init__.py
@@ -17,7 +17,7 @@ def get_organizations(self):
         for seat_number in range(1, 9):
             organization.add_post(
                 role="Councillor",
-                label="{} (seat {})".format(self.division_name, seat_number),
+                label=f"{self.division_name} (seat {seat_number})",
                 division_id=self.division_id,
             )
 

diff --git a/ca_ab_grande_prairie/people.py b/ca_ab_grande_prairie/people.py
@@ -1,7 +1,30 @@
-from utils import CSVScraper
+from utils import CanadianPerson as Person
+from utils import CanadianScraper
 
+COUNCIL_PAGE = "https://cityofgp.com/city-government/mayor-city-council/council-members"
 
-class GrandePrairiePersonScraper(CSVScraper):
-    # https://data.cityofgp.com/Community/City-Council-Contact-Information/vcfc-gi78
-    csv_url = "https://data.cityofgp.com/api/views/vcfc-gi78/rows.csv?accessType=DOWNLOAD"
-    many_posts_per_area = True
+
+class GrandePrairiePersonScraper(CanadianScraper):
+    def scrape(self):
+        seat_number = 1
+        page = self.lxmlize(COUNCIL_PAGE)
+        councillors = page.xpath('//div[contains(@class, "council-bios")]//div[@class="views-row"]')
+
+        assert len(councillors), "No councillors found"
+        for councillor in councillors:
+            role, name = councillor.xpath(".//h3")[0].text_content().split(" ", 1)
+            if role == "Councillor":
+                district = f"Grande Prairie (seat {seat_number})"
+                seat_number += 1
+            else:
+                district = " Grande Prairie"
+            email = self.get_email(councillor)
+            phone = self.get_phone(councillor)
+            image = councillor.xpath(".//img/@src")[0]
+
+            p = Person(primary_org="legislature", name=name, district=district, role=role, image=image)
+            p.add_contact("email", email)
+            p.add_contact("voice", phone, "legislature")
+            p.add_source(COUNCIL_PAGE)
+
+            yield p
diff --git a/ca_ab_grande_prairie_county_no_1/__init__.py b/ca_ab_grande_prairie_county_no_1/__init__.py
@@ -16,8 +16,8 @@ def get_organizations(self):
         for division_number in range(1, 10):
             organization.add_post(
                 role="Councillor",
-                label="Division {}".format(division_number),
-                division_id="{}/division:{}".format(self.division_id, division_number),
+                label=f"Division {division_number}",
+                division_id=f"{self.division_id}/division:{division_number}",
             )
 
         yield organization
diff --git a/ca_ab_lethbridge/__init__.py b/ca_ab_lethbridge/__init__.py
@@ -17,7 +17,7 @@ def get_organizations(self):
         for seat_number in range(1, 9):
             organization.add_post(
                 role="Councillor",
-                label="{} (seat {})".format(self.division_name, seat_number),
+                label=f"{self.division_name} (seat {seat_number})",
                 division_id=self.division_id,
             )
 

diff --git a/ca_ab_lethbridge/people.py b/ca_ab_lethbridge/people.py
@@ -8,7 +8,7 @@
 class LethbridgePersonScraper(CanadianScraper):
     def scrape_mayor(self):
         page = self.lxmlize(MAYOR_PAGE)
-        paragraph = page.xpath("//p[1]")[0].text_content().split()
+        paragraph = page.xpath("//h4[contains(., 'Mayor')]/following-sibling::p")[0].text_content().split()
         name = " ".join([paragraph[0], paragraph[1]])
 
         p = Person(primary_org="legislature", name=name, district="Lethbridge", role="Mayor")
@@ -24,7 +24,7 @@ def scrape_person(self, url, seat_number):
         p = Person(
             primary_org="legislature",
             name=name,
-            district="Lethbridge (seat {})".format(seat_number + 1),
+            district=f"Lethbridge (seat {seat_number + 1})",
             role="Councillor",
         )
 

diff --git a/ca_ab_wood_buffalo/__init__.py b/ca_ab_wood_buffalo/__init__.py
@@ -17,16 +17,16 @@ def get_organizations(self):
         for seat_number in range(1, 7):
             organization.add_post(
                 role="Councillor",
-                label="Ward 1 (seat {})".format(seat_number),
-                division_id="{}/ward:1".format(self.division_id),
+                label=f"Ward 1 (seat {seat_number})",
+                division_id=f"{self.division_id}/ward:1",
             )
         for seat_number in range(1, 3):
             organization.add_post(
                 role="Councillor",
-                label="Ward 2 (seat {})".format(seat_number),
-                division_id="{}/ward:2".format(self.division_id),
+                label=f"Ward 2 (seat {seat_number})",
+                division_id=f"{self.division_id}/ward:2",
             )
-        organization.add_post(role="Councillor", label="Ward 3", division_id="{}/ward:3".format(self.division_id))
-        organization.add_post(role="Councillor", label="Ward 4", division_id="{}/ward:4".format(self.division_id))
+        organization.add_post(role="Councillor", label="Ward 3", division_id=f"{self.division_id}/ward:3")
+        organization.add_post(role="Councillor", label="Ward 4", division_id=f"{self.division_id}/ward:4")
 
         yield organization
diff --git a/ca_ab_wood_buffalo/people.py b/ca_ab_wood_buffalo/people.py
@@ -33,13 +33,13 @@ def scrape(self):
         for ward in wards:
             area = ward.text_content().split("–", 1)[1].strip()
             councillors = ward.xpath("./following-sibling::table[1]/tbody/tr/td/h3")
-            assert len(councillors), "No councillors found for {}".format(area)
+            assert len(councillors), f"No councillors found for {area}"
             for councillor in councillors:
                 name = councillor.text_content()
 
                 if area in ("Ward 1", "Ward 2"):
                     seat_numbers[area] += 1
-                    district = "{} (seat {})".format(area, seat_numbers[area])
+                    district = f"{area} (seat {seat_numbers[area]})"
                 else:
                     district = area
 

diff --git a/ca_bc_abbotsford/people.py b/ca_bc_abbotsford/people.py
@@ -19,12 +19,12 @@ def scrape(self):
         ]
 
         assert len(councillors), "No councillors found"
-        assert len(councillors) == len(contact_data), "Expected {}, got {}".format(len(councillors), len(contact_data))
+        assert len(councillors) == len(contact_data), f"Expected {len(councillors)}, got {len(contact_data)}"
         for councillor, contact in zip(councillors, contact_data):
             text = councillor.xpath(".//h3/a")[0].text_content()
             if text.startswith("Councill"):
                 role = "Councillor"
-                district = "Abbotsford (seat {})".format(councillor_seat_number)
+                district = f"Abbotsford (seat {councillor_seat_number})"
                 councillor_seat_number += 1
             else:
                 role = "Mayor"

diff --git a/ca_bc_burnaby/people.py b/ca_bc_burnaby/people.py
@@ -12,39 +12,27 @@ def scrape(self):
         councillors = page.xpath("//a[@class='biography__link']/@href")
         assert len(councillors), "No councillors found"
         for person_url in councillors:
-
-            def decode_email(e):
-                de = ""
-                k = int(e[:2], 16)
-
-                for i in range(2, len(e) - 1, 2):
-                    de += chr(int(e[i : i + 2], 16) ^ k)
-
-                return de
-
             page = self.lxmlize(person_url)
 
             role, name = page.xpath("//h1/span")[0].text_content().strip().split(" ", 1)
             photo_url = page.xpath('//img[@typeof="foaf:Image"]/@src')[0]
 
             contact_node = page.xpath('//div[@class="contact"]')[0]
 
-            email = page.xpath('//div[@class = "contact__detail contact__detail--email"]/a/@href')[0]
-            decoded_email = decode_email(email.split("#", 1)[1])  # cloudflare encrypts the email data
-
+            email = self.get_email(contact_node)
             phone = self.get_phone(contact_node, area_codes=[604, 778])
 
             if role == "Mayor":
                 district = "Burnaby"
             else:
-                district = "Burnaby (seat {})".format(councillor_seat_number)
+                district = f"Burnaby (seat {councillor_seat_number})"
                 councillor_seat_number += 1
 
             p = Person(primary_org="legislature", name=name, district=district, role=role, image=photo_url)
             p.add_source(COUNCIL_PAGE)
             p.add_source(person_url)
             if email:
-                p.add_contact("email", decoded_email)
+                p.add_contact("email", email)
             if phone:
                 p.add_contact("voice", phone, "legislature")
             yield p
diff --git a/ca_bc_coquitlam/people.py b/ca_bc_coquitlam/people.py
@@ -7,18 +7,16 @@
 
 
 class CoquitlamPersonScraper(CanadianScraper):
-
     def scrape(self):
         def build_email(script):
             w = re.findall(r'w = "(.*?)"', script)[0]
             x = re.findall(r'x = "(.*?)"', script)[0]
-            email = w + "@" + x
-            return email
+            return w + "@" + x
 
         councillor_seat_number = 1
 
         page = self.lxmlize(COUNCIL_PAGE, user_agent="Mozilla/5.0")
-        councillors = page.xpath('//table[@id="cityDirectoryDepartmentDetails"]/tr')
+        councillors = page.xpath('//table[contains(@id, "cityDirectoryDepartmentDetails")]/tr')
         assert len(councillors), "No councillors found"
         for councillor in councillors:
             name = " ".join(
@@ -36,7 +34,7 @@ def build_email(script):
             if role == "Mayor":
                 district = "Coquitlam"
             else:
-                district = "Coquitlam (seat {})".format(councillor_seat_number)
+                district = f"Coquitlam (seat {councillor_seat_number})"
                 councillor_seat_number += 1
 
             p = Person(primary_org="legislature", name=name, district=district, role=role)

diff --git a/ca_bc_langley/people.py b/ca_bc_langley/people.py
@@ -15,7 +15,7 @@ def scrape(self):
             page = self.lxmlize(url)
             name = page.xpath("//h1")[0].text_content().strip()
 
-            district = "Langley (seat {})".format(seat_number)
+            district = f"Langley (seat {seat_number})"
             seat_number += 1
             email = self.get_email(page)
             phone = self.get_phone(page)
@@ -34,7 +34,7 @@ def scrape(self):
         address_block = page.xpath('//p/a[@rel="noopener noreferrer"]/parent::p')[0].text_content()
         line1 = address_block[address_block.find("Facility") + 8 : address_block.find("Langley,")]
         line2 = address_block[address_block.find("Langley,") : address_block.find("Phone") - 1]
-        address = ", ".join([line1, line2])
+        address = f"{line1}, {line2}"
         p = Person(primary_org="legislature", name=name, role="Mayor", district="Langley")
         p.add_contact("email", email)
         p.add_contact("voice", phone, "legislature")