scrape correct csv file remove scraping webpage

opencivicdata · Nov 4, 2024 · 0b80560 · 0b80560
1 parent b85cd51
commit 0b80560
Showing 1 changed file with 5 additions and 50 deletions.
diff --git a/ca_on_guelph/people.py b/ca_on_guelph/people.py
@@ -1,52 +1,7 @@
-from utils import CanadianPerson as Person
-from utils import CanadianScraper
+from utils import CSVScraper
 
-COUNCIL_PAGE = "https://guelph.ca/city-hall/mayor-and-council/city-council/"
-MAYOR_PAGE = "https://guelph.ca/city-hall/mayor-and-council/mayors-office/"
 
-
-class GuelphPersonScraper(CanadianScraper):
-    def scrape(self):
-        page = self.lxmlize(COUNCIL_PAGE)
-
-        councillor_nodes = page.xpath('.//div[@class="thumbnail"]')[1:]
-        assert len(councillor_nodes), "No councillors found"
-
-        for councillor_node in councillor_nodes:
-            district = councillor_node.xpath(".//h2/text()")[0].split("Councillors")[0].strip()
-
-            councillors = councillor_node.xpath(".//div/div")
-            for councillor in councillors:
-                role_and_name = councillor.xpath(".//h3/text()")
-                if not role_and_name:
-                    continue
-
-                role_and_name = councillor.xpath(".//h3/text()")[0]
-                role, name = role_and_name.split(" ", 1)
-                phone = councillor.xpath(".//p/text()")[1].strip()
-                email = self.get_email(councillor)
-                image = councillor.xpath(".//img/@src")[0]
-
-                p = Person(primary_org="legislature", name=name, district=district, role=role, image=image)
-                p.add_contact("email", email)
-                if phone:
-                    p.add_contact("voice", phone, "legislature")
-                p.add_source(COUNCIL_PAGE)
-
-        yield self.scrape_mayor(MAYOR_PAGE)
-
-    def scrape_mayor(self, url):
-        page = self.lxmlize(url)
-
-        mayor_node = page.xpath('.//div[@class="entry-content"]/p')[-1]
-        name = mayor_node.xpath(".//text()")[0].strip().split("Mayor ")[1]
-        phone = self.get_phone(mayor_node)
-        email = self.get_email(mayor_node)
-        image = mayor_node.xpath('//img[contains(@alt, "Mayor")]/@src')[0]
-
-        p = Person(primary_org="legislature", name=name, district="Guelph", role="Mayor", image=image)
-        p.add_contact("voice", phone, "legislature")
-        p.add_contact("email", email)
-        p.add_source(MAYOR_PAGE)
-
-        return p
+class GuelphPersonScraper(CSVScraper):
+    # https://explore.guelph.ca/documents/5ec8d85028c94e83be12a9f01d14eb7f/about
+    csv_url = "https://gismaps.guelph.ca/OpenData/guelph-city-council.csv"
+    many_posts_per_area = True