-
Notifications
You must be signed in to change notification settings - Fork 21
/
parse-postcodes_2010.rb
executable file
·83 lines (69 loc) · 2.14 KB
/
parse-postcodes_2010.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env ruby
# frozen_string_literal: true
$LOAD_PATH.unshift "#{File.dirname(__FILE__)}/lib"
require "rubygems"
require "mechanize"
require "people"
agent = Mechanize.new
puts "Reading Australia post office data..."
data = CSV.readlines("data/pc-full_20100629.csv")
# Ignore header
data.shift
valid_postcodes = data.map(&:first).uniq.sort
def extract_divisions_from_page(page)
divisions = page.search("div/table/tr/td[4]").map(&:inner_text)
redistributed_divisions = page.search("div/table/tr/td[5]").map(&:inner_text)
raise "expected same number of divisions as redistributed divisions" unless divisions.size == redistributed_divisions.size
combined = []
divisions.each_index do |i|
v1 = divisions[i]
v2 = redistributed_divisions[i]
if v1 == ""
combined << v2
elsif v2 == ""
combined << v1
else
raise "don't expect both columns to have values"
end
end
combined
end
def other_pages?(page)
page.at("table table")
end
def extract_divisions_for_postcode(agent, postcode)
page = agent.get("http://apps.aec.gov.au/esearch/LocalitySearchResults.aspx?filter=#{postcode}&filterby=Postcode")
puts "Postcode #{postcode}..."
page_number = 1
puts " Page #{page_number}..."
divisions = extract_divisions_from_page(page)
if other_pages?(page)
loop do
page_number += 1
puts " Page #{page_number}..."
form = page.form_with(name: "aspnetForm")
form["__EVENTTARGET"] = "ctl00$ContentPlaceHolderBody$gridViewLocalities"
form["__EVENTARGUMENT"] = "Page$#{page_number}"
page = form.submit
new_divisions = extract_divisions_from_page(page)
divisions += new_divisions
break if new_divisions.empty?
end
end
# Remove duplicates and sort
divisions.uniq.sort
end
file = File.open("data/postcodes_2010.csv", "w")
file.puts("Postcode,Electoral division name")
file.puts(",")
valid_postcodes.each do |postcode|
divisions = extract_divisions_for_postcode(agent, postcode)
if divisions.empty?
puts " * No divisions *"
else
puts " #{divisions.join(', ')}"
divisions.each do |division|
file.puts "#{postcode},#{division}"
end
end
end