forked from fitnr/acris-download
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
186 lines (141 loc) · 5.76 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# ACRIS downloader
# Make tasks for downloading real estate property transactions from NYC's open data site
# Copyright (C) 2015-16 Neil Freeman
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
API = https://data.cityofnewyork.us/api/views
# Download slugs for data files on https://data.cityofnewyork.us
real_property_legals = 8h5j-fqxa
real_property_master = bnx9-e6tj
real_property_parties = 636b-3b5g
real_property_references = pwkr-dpni
real_property_remarks = 9p4w-7npp
personal_property_legals = uqqa-hym2
personal_property_master = sv7x-dduq
personal_property_parties = nbbg-wtuz
personal_property_references = 6y3e-jcrc
personal_property_remarks = fuzi-5ks9
country_codes = j2iz-mwzu
document_control_codes = 7isb-wh4c
property_type_codes = 94g4-w6xz
ucc_collateral_codes = q9kp-jvxv
PERSONAL_BASIC = personal_property_legals \
personal_property_master \
personal_property_parties \
REAL_BASIC = real_property_legals \
real_property_master \
real_property_parties
PERSONAL_REF = personal_property_references personal_property_remarks
REAL_REF = real_property_references real_property_remarks
EXTRAS = country_codes \
document_control_codes \
property_type_codes \
ucc_collateral_codes
DATA = $(EXTRAS) \
$(PERSONAL_BASIC) \
$(PERSONAL_REF) \
$(REAL_BASIC) \
$(REAL_REF)
RAWS = $(foreach a,$(DATA),data/$a.raw)
IDX_document_control_codes = doctype
IDX_country_codes = countrycode
IDX_property_type_codes = propertytype
IDX_ucc_collateral_codes = ucccollateralcode
IDX_personal_property_legals = documentid
IDX_personal_property_master = documentid
IDX_personal_property_parties = documentid
IDX_personal_property_references = documentid
IDX_personal_property_remarks = documentid
IDX_real_property_legals = documentid
IDX_real_property_master = documentid
IDX_real_property_parties = documentid
IDX_real_property_references = documentid
IDX_real_property_remarks = documentid
DATABASE = acris
HOST = localhost
PASSFLAG = -p
MYSQL = mysql -u '$(USER)' $(PASSFLAG)$(PASS) -h $(HOST) $(MYSQLFLAGS)
PSQL = psql -U "$(USER)" $(PSQLFLAGS)
SQLITE = sqlite3 $(DATABASE).db
CSVSQLFLAGS = --tables $* --no-constraints --no-inference
CURLFLAGS = -GL
.PHONY: clean install download \
sqlite sqlite_% \
psql psql_% \
mysql mysql_%
download: $(foreach a,$(REAL_BASIC) $(EXTRAS),data/$a.csv)
sqlite: $(foreach a,$(REAL_BASIC),sqlite_$a) | sqlite_extras
sqlite_real_complete: $(foreach a,$(REAL_REF),sqlite_$a) | sqlite
sqlite_personal: $(foreach a,$(PERSONAL_BASIC),sqlite_$a) | sqlite_extras
sqlite_personal_complete: $(foreach a,$(PERSONAL_REF),sqlite_$a) | sqlite_personal
sqlite_extras: $(foreach a,$(EXTRAS),sqlite_$a)
mysql: $(foreach a,$(REAL_BASIC),mysql_$a) | mysql_extras
mysql_real_complete: $(foreach a,$(REAL_REF),mysql_$a) | mysql
mysql_personal: $(foreach a,$(PERSONAL_BASIC),mysql_$a) | mysql_extras
mysql_personal_complete: $(foreach a,$(PERSONAL_REF),mysql_$a) | mysql_personal
mysql_extras: $(foreach a,$(EXTRAS),mysql_$a)
psql: $(foreach a,$(REAL_BASIC),psql_$a) | psql_extras
psql_real_complete: $(foreach a,$(REAL_REF),psql_$a) | psql
psql_personal: $(foreach a,$(PERSONAL_BASIC),psql_$a) | psql_extras
psql_personal_complete: $(foreach a,$(PERSONAL_REF),psql_$a) | psql_personal
psql_extras: $(foreach a,$(EXTRAS),psql_$a)
# MySQL
mysql_%: data/%.csv | mysql_create
$(MYSQL) $(DATABASE) \
-e "DROP TABLE IF EXISTS $*;"
head -n1000 $< | \
csvsql -i mysql --tables $* | \
mysql $(DATABASE) -u $(USER) -p$(PASS) -h $(HOST)
$(MYSQL) $(DATABASE) \
-e "ALTER TABLE $* ADD INDEX $*_idx ($(IDX_$*))"
$(MYSQL) $(DATABASE) -u $(USER) -p$(PASS) -h $(HOST) --local-infile \
-e "LOAD DATA LOCAL INFILE '$<' INTO TABLE $(DATABASE).$* \
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' LINES TERMINATED BY '\n' \
IGNORE 1 LINES;"
mysql_create: ; $(MYSQL) -e "CREATE DATABASE IF NOT EXISTS $(DATABASE)"
# SQLite
sqlite_%: data/%.csv
head -n1000 $< | \
csvsql $(CSVSQLFLAGS) --db sqlite:///$(DATABASE).db
$(SQLITE) "CREATE INDEX $*_idx ON $* ($(IDX_$*))"
tail -n+2 $< | $(SQLITE) -separator , '.import /dev/stdin $*'
# Postgres
psql_%: data/%.csv | psql_create
head -n1000 $< | \
csvsql $(CSVSQLFLAGS) --db postgresql://$(USER):$(PASS)@$(HOST)/$(DATABASE)
tail -n+2 $< | \
$(PSQL) $(DATABASE) -c "COPY $* FROM STDIN DELIMITER ',' CSV QUOTE '\"';"
psql_create:
$(PSQL) -c "CREATE DATABASE $(DATABASE)" || echo "$(DATABASE) probably exists"
# Data download
# Try to get pretty column names by deleting spaces, periods, slashes, replacing '%' with 'perc'.
# replace MM/DD/YYYY with YYYY-MM-DD
data/%.csv: data/%.raw
{ \
head -n1 $< | \
awk '{ gsub(/[ \.\/]/, ""); sub("%", "perc"); sub("\#", "nbr"); print; }' | \
tr '[:upper:]' '[:lower:]'; \
tail -n+2 $< | \
sed -e 's/,\([01][0-9]\)\/\([0123][0-9]\)\/\([0-9]\{4\}\)/,\3-\1-\2/g'; \
} > $@
.INTERMEDIATE: data/%.raw
$(RAWS): data/%.raw: | data
curl $(CURLFLAGS) -o $@ $(API)/$($*)/rows.csv -d accessType=DOWNLOAD
data: ; mkdir -p $@
mysql_clean: | clean
$(MYSQL) -e "DROP DATABASE IF EXISTS $(DATABASE)"
sqlite_clean: | clean
rm -rf data $(DATABASE).db
psql_clean: | clean
$(PSQL) -c "DROP DATABASE $(DATABASE)"
clean: ; rm -rf data
install: requirements.txt
pip install $(INSTALLFLAGS) --requirement=$<