Skip to content

Commit

Permalink
Stop scraping councillor with incomplete information
Browse files Browse the repository at this point in the history
  • Loading branch information
rafe-murray authored and jpmckinney committed May 31, 2024
1 parent 9a9e458 commit dcaab20
Showing 1 changed file with 0 additions and 22 deletions.
22 changes: 0 additions & 22 deletions ca_on_chatham_kent/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,28 +50,6 @@ def scrape(self):

page = self.lxmlize(COUNCIL_PAGE)

# getting 1 councillor with incomplete information
if seat_numbers["Ward 3"] < 2:
councillor_url = page.xpath(
'//a[contains(@href, "Councillors-by-Ward.aspx")]/following-sibling::ul[1]/li/a/@href'
)[-1]
councillor_page = self.lxmlize(councillor_url)

name = councillor_page.xpath("//h1")[0].text_content()
ward = "Ward 3"
seat_numbers[ward] += 1
district = "{} (seat {})".format(ward, seat_numbers[ward])

p = Person(primary_org="legislature", name=name, district=district, role="Councillor")
p.add_source(councillor_url)
address = councillor_page.xpath("//hr/following-sibling::*")[1].text_content()
p.add_contact("address", address, "legislature")
email = self.get_email(councillor_page)
p.add_contact("email", email)
phone = self.get_phone(councillor_page)
p.add_contact("voice", phone, "legislature")
yield p

mayor_url = page.xpath('//@href[contains(., "Mayor-")]')[0]
page = self.lxmlize(mayor_url)
contact_page = self.lxmlize(MAYOR_CONTACT_PAGE)
Expand Down

0 comments on commit dcaab20

Please sign in to comment.