forked from everypolitician/everypolitician-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile.rb
113 lines (97 loc) · 3.44 KB
/
Rakefile.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
require 'fileutils'
require 'iso_country_codes'
require 'pathname'
require 'pry'
require 'tmpdir'
require 'json'
ISO = IsoCountryCodes.for_select
@HOUSES = FileList['data/*/*/Rakefile.rb'].map { |f| f.pathmap '%d' }.reject { |p| File.exist? "#{p}/WIP" }
def name_to_iso_code(name)
if code = ISO.find { |iname, _| iname == name }
return code.last
elsif code = ISO.find { |iname, _| iname.start_with? name }
return code.last
else
fail "Can't find country code for #{name}"
end
end
def json_from(json_file)
statements = 0
json = JSON.load(File.read(json_file), lambda { |h|
statements += h.values.select { |v| v.class == String }.count if h.class == Hash
}, symbolize_names: true)
return json, statements
end
def json_write(file, json)
File.write(file, JSON.pretty_generate(json))
end
def terms_from(json, h)
terms = json[:events].find_all { |o| o[:classification] == 'legislative period' }
terms.sort_by { |t| t[:start_date].to_s }.reverse.map { |t|
t.delete :classification
t.delete :organization_id
t[:slug] ||= t[:id].split('/').last
t[:csv] = h + "/term-#{t[:slug]}.csv"
t
}.select { |t| File.exist? t[:csv] }
end
def name_from(json)
orgs = json[:organizations].find_all { |o| o[:classification] == 'legislature' }
raise "Wrong number of legislatures (#{orgs})" unless orgs.count == 1
orgs.first[:name]
end
desc 'Install country-list locally'
task 'countries.json' do
# By default we build every country, but if EP_COUNTRY_REFRESH is set
# we only build any country that contains that string. For example:
# EP_COUNTRY_REFRESH=Latvia be rake countries.json
to_build = ENV['EP_COUNTRY_REFRESH'] || 'data'
countries = @HOUSES.group_by { |h| h.split('/')[1] }.select do |c, hs|
hs.any? { |h| h.include? to_build }
end
data, _ = json_from('countries.json') rescue {}
countries.each do |c, hs|
meta_file = hs.first + '/../meta.json'
meta = File.exist?(meta_file) ? JSON.load(File.open meta_file) : {}
name = meta['name'] || c.tr('_', ' ')
slug = c.tr('_', '-')
country = {
name: name,
# Deprecated — will be removed soon!
country: name,
code: (meta['iso_code'] || name_to_iso_code(name)).upcase,
slug: slug,
legislatures: hs.map { |h|
json_file = h + '/ep-popolo-v1.0.json'
name_file = h + '/names.csv'
remote_source = 'https://cdn.rawgit.com/everypolitician/everypolitician-data/%s/%s'
popolo, statement_count = json_from(json_file)
cmd = "git --no-pager log --format='%h|%at' -1 #{h}"
(sha, lastmod) = `#{cmd}`.chomp.split('|')
lname = name_from(popolo)
lslug = h.split('/').last.tr('_', '-')
{
name: lname,
slug: lslug,
sources_directory: "#{h}/sources",
popolo: json_file,
popolo_url: remote_source % [sha, json_file],
names: name_file,
lastmod: lastmod,
person_count: popolo[:persons].size,
sha: sha,
legislative_periods: terms_from(popolo, h).each { |t| t[:csv_url] = remote_source % [sha, t[:csv]] },
statement_count: statement_count,
}
}
}
data[ data.find_index { |c| c[:name] == country[:name] } ] = country
end
File.write('countries.json', JSON.pretty_generate(data.sort_by { |c| c[:name] }.to_a))
end
require 'rake/testtask'
Rake::TestTask.new do |t|
t.libs << 'test'
t.test_files = FileList['test/*_test.rb']
t.verbose = true
end