forked from belowtheline/site_2014
-
Notifications
You must be signed in to change notification settings - Fork 0
/
feed_gvts.py
97 lines (75 loc) · 2.94 KB
/
feed_gvts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
import json
import os
import os.path
import zipfile
from bs4 import BeautifulSoup
DATA_DIR = 'newdata'
STATES = {
'Australian Capital Territory': 'state/act',
'New South Wales': 'state/nsw',
'Victoria': 'state/vic',
'Queensland': 'state/qld',
'Western Australia': 'state/wa',
'South Australia': 'state/sa',
'Northern Territory': 'state/nt',
'Tasmania': 'state/tas',
}
ID_TRANSLATION = {
ord(u' '): None,
ord(u'-'): None,
ord(u"'"): None,
}
def division_name(name):
return 'division/' + name.lower().translate(ID_TRANSLATION)
def candidate_id(first, last):
first = first.lower().translate(ID_TRANSLATION)
last = last.lower().translate(ID_TRANSLATION)
return 'people/{}-{}'.format(last, first)
def group_id(state, code):
return 'group/{}-{}'.format(state.split('/')[1], code.lower())
def jsonify(obj):
return json.dumps(obj, sort_keys=True, indent=4, separators=(',', ': '))
def inhale(preload):
if not os.path.exists(DATA_DIR):
os.mkdir(DATA_DIR)
subdir = os.path.join(DATA_DIR, 'group')
if not os.path.exists(subdir):
os.mkdir(subdir)
preload = zipfile.ZipFile(preload)
for name in preload.namelist():
if name.startswith('xml/aec-mediafeed-groupvotingtickets'):
break
gvts = BeautifulSoup(preload.open(name).read(), 'xml')
preload.close()
groups = {}
for contest in gvts.find_all('Contest'):
state = STATES[contest.find('ContestName').text]
for group in contest.find_all('Group'):
data = {
'code': group.find('Ticket').text,
'name': group.find('GroupName').text,
'candidates': [],
'tickets': [],
}
for candidate in group.find_all('Candidate', recursive=False):
last, first = candidate.find('CandidateName').text.split(', ', 1)
data['candidates'].append(candidate_id(first, last))
assert os.path.exists(os.path.join(DATA_DIR, candidate_id(first, last)) + '.json')
for ticket in group.find_all('GroupVotingTicket'):
ticket_data = {}
for candidate in ticket.find_all('Candidate'):
last, first = candidate.find('CandidateName').text.split(', ', 1)
preference = int(candidate.find('Preference').text)
ticket_data[preference] = candidate_id(first, last)
ticket_data = [ticket_data[p] for p in sorted(ticket_data)]
data['tickets'].append(ticket_data)
filename = os.path.join(DATA_DIR, group_id(state, data['code']))
filename += '.json'
open(filename, 'w').write(jsonify(data))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('preload', nargs=1, help="AEC preload Zip archive")
args = parser.parse_args()
inhale(args.preload[0])