This repository has been archived by the owner on Jun 14, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
odourcollect_downloader.py
292 lines (253 loc) · 10.3 KB
/
odourcollect_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import os
from csv import QUOTE_NONNUMERIC
import json
import pandas as pd
from pydantic import (BaseModel, conint, root_validator, validator, ValidationError)
from typing import Optional
from datetime import date, datetime, time
import requests
OC_ENDPOINT = 'https://odourcollect.eu/api/odor/list'
TYPE_LIST = {1: 'Waste|Fresh waste',
2: 'Waste|Decomposed waste',
3: 'Waste|Leachate',
4: 'Waste|Biogas',
5: 'Waste|Biofilter',
6: 'Waste|Ammonia',
7: 'Waste|Amines',
8: 'Waste|Other',
9: 'Waste|I don\'t know',
10: 'Waste Water|Waste water',
11: 'Waste Water|Rotten eggs',
12: 'Waste Water|Sludge',
13: 'Waste Water|Chlorine',
14: 'Waste Water|Other',
15: 'Waste Water|I don\'t know',
16: 'Agriculture / Livestock|Dead animal',
17: 'Agriculture / Livestock|Cooked meat',
18: 'Agriculture / Livestock|Organic fertilizers (manure/slurry)',
19: 'Agriculture / Livestock|Animal feed',
20: 'Agriculture / Livestock|Cabbage soup',
21: 'Agriculture / Livestock|Rotten eggs',
22: 'Agriculture / Livestock|Ammonia',
23: 'Agriculture / Livestock|Amines',
24: 'Agriculture / Livestock|Other',
25: 'Agriculture / Livestock|I don\'t know',
26: 'Food Industries|Fat / Oil',
27: 'Food Industries|Coffee',
28: 'Food Industries|Cocoa',
29: 'Food Industries|Milk / Dairy',
30: 'Food Industries|Animal food',
31: 'Food Industries|Ammonia',
32: 'Food Industries|Malt / Hop',
33: 'Food Industries|Fish',
34: 'Food Industries|Bakeries',
35: 'Food Industries|Raw meat',
36: 'Food Industries|Ammines',
37: 'Food Industries|Cabbage soup',
38: 'Food Industries|Rotten eggs',
39: 'Food Industries|Bread / Cookies',
40: 'Food Industries|Alcohol',
41: 'Food Industries|Aroma / Flavour',
42: 'Food Industries|Other',
43: 'Food Industries|I don\'t know',
44: 'Industrial|Cabbage soup',
45: 'Industrial|Oil / Petrochemical',
46: 'Industrial|Gas',
47: 'Industrial|Asphalt / Rubber',
48: 'Industrial|Chemical',
49: 'Industrial|Ammonia',
50: 'Industrial|Leather',
51: 'Industrial|Metal',
52: 'Industrial|Plastic',
53: 'Industrial|Sulphur',
54: 'Industrial|Alcohol',
55: 'Industrial|Ketone / Ester / Acetate / Ether',
56: 'Industrial|Amines',
57: 'Industrial|Glue / Adhesive',
58: 'Urban|Urine',
59: 'Urban|Traffic',
60: 'Urban|Sewage',
61: 'Urban|Waste bin',
62: 'Urban|Waste truck',
63: 'Urban|Sweat',
64: 'Urban|Cannabis',
65: 'Urban|Fresh grass',
66: 'Urban|Humidity / Wet soil',
67: 'Urban|Flowers',
68: 'Urban|Food',
69: 'Urban|Chimney (burnt wood)',
70: 'Urban|Paint',
71: 'Urban|Fuel',
72: 'Urban|Other',
73: 'Urban|I don\'t know',
74: 'Nice|Flowers',
75: 'Nice|Food',
76: 'Nice|Bread / Cookies',
77: 'Nice|Fruit',
78: 'Nice|Fresh grass',
79: 'Nice|Forest / Trees / Nature',
80: 'Nice|Mint / Rosemary / Lavander',
81: 'Nice|Sea',
82: 'Nice|Perfume',
83: 'Nice|Chimney (burnt wood)',
84: 'Nice|Wood',
85: 'Nice|New book',
86: 'Nice|Other',
87: 'Nice|I don\'t know',
88: 'No Odour|No Odour',
89: 'Other|NA'}
CATEGORY_LIST = {1: 'Waste related odours',
2: 'Waste water related odours',
3: 'Agriculture and livestock related odours',
4: 'Food Industries related odours',
5: 'Industry related odours',
6: 'Urban odours',
7: 'Nice odours',
8: 'Other odours not fitting elsewhere',
9: 'No odour observations (for testing, for reporting the end of an odour, etc.)'}
ANNOY_ID_TO_REAL_NUMBER = {1: -4,
2: -3,
3: -2,
4: -1,
5: 0,
6: 1,
7: 2,
8: 3,
9: 4}
ANNOY_ID_TO_DESCRIPTION = {1: 'Extremely unpleasant',
2: 'Very unpleasant',
3: 'Unpleasant',
4: 'Slightly unpleasant',
5: 'Neutral',
6: 'Slightly pleasant',
7: 'Pleasant',
8: 'Very pleasant',
9: 'Extremely pleasant'}
INTENSITY_ID_TO_REAL_NUMBER = {1: 0,
2: 1,
3: 2,
4: 3,
5: 4,
6: 5,
7: 6}
INTENSITY_ID_TO_DESCRIPTION = {1: 'Not perceptible',
2: 'Very weak',
3: 'Weak',
4: 'Noticeable',
5: 'Strong',
6: 'Very strong',
7: 'Extremely strong'}
DURATION_LIST = {0: '(No odour)',
1: 'Punctual',
2: 'Continuous in the last hour',
3: 'Continuous throughout the day'}
def day_of_week(whatdate: date) -> str:
weekdays = ("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
dow = whatdate.weekday()
return weekdays[dow]
class Odour(BaseModel):
id: int
userid: int
category: str
type: str
intensity: str
annoy: str
duration: str
observeddatetime: datetime
observedtimeonly: time
latitude: float
longitude: float
class OdourType(BaseModel):
id: int
category: str
type: str
class OdourIntensity(BaseModel):
id: int
value: int
desc: str
class OdourAnnoy(BaseModel):
id: int
value: int
desc: str
class OdourDuration(BaseModel):
id: int
desc: str
class OCRequest(BaseModel):
type: Optional[conint(ge=0, le=9)] # OdourCollect's odour type (called "category" here). 0 = All, 1-88 = filters
subtype: Optional[conint(ge=0, le=89)] # OdourCollect's odour subtype (called "type" here). 0 = All, 1-9 = filters
minAnnoy: Optional[conint(ge=-4, le=4)] # OdourCollect's "hedonic tone", from -4 to 4. 0 = neutral.
maxAnnoy: Optional[conint(ge=-4, le=4)]
minIntensity: Optional[conint(ge=0, le=6)] # "intensity" in OdourCollect, from 0 to 6
maxIntensity: Optional[conint(ge=0, le=6)]
date_init: Optional[date] # yyyy-mm-dd
date_end: Optional[date] # yyyy-mm-dd
@root_validator()
def validate_ocrequest(cls, values):
if values.get('minannoy') and values.get('maxannoy'):
if values.get('minannoy') > values.get('maxannoy'):
raise ValueError('Min annoy can\'t be greater than max annoy')
if values.get('minintensity') and values.get('maxintensity'):
if values.get('minintensity') > values.get('maxintensity'):
raise ValueError('Min intensity can\'t be greater than max intensity')
if values.get('date_init') and values.get('date_end'):
if values.get('date_init') > values.get('date_end'):
raise ValueError('Starting date can\'t be later than ending date')
return values
class GPScoords(BaseModel):
lat: float
long: float
@validator('lat')
def validate_lat(cls, v):
# print('Validating: {}'.format(v))
if v < -90.0 or v > 90.0:
raise ValidationError(f'Incorrect GPS latitude value detected: {v}')
return v
@validator('long')
def validate_long(cls, v):
# print('Validating: {}'.format(v))
if v < -180.0 or v > 180.0:
raise ValidationError(f'Incorrect GPS longitude value detected: {v}')
return v
def build_df(json_response) -> pd.DataFrame:
observationslist = []
try:
observationslist = json.loads(json_response)['content']
except KeyError:
print('Received JSON data does not have a "content" key:')
print(json_response)
exit(2)
if len(observationslist) == 0:
print('No data for criteria specified')
exit(1)
ocdf = pd.DataFrame(observationslist)
# DATA TRANSFORMS
# USERS: adds the character "u" as a prefix for the user ID number so they clearly become categoric, not numeric
ocdf['id_user'] = ocdf['id_user'].apply(str)
# ocdf['id_user'] = ocdf['id_user'].apply(lambda s: '' + s)
ocdf.rename(columns={'id_user': 'user'}, inplace=True)
ocdf['category'] = ocdf['id_odor_type']
ocdf.replace(inplace=True, to_replace={'category': TYPE_LIST})
ocdf[['category', 'type']] = ocdf['category'].str.split('|', n=1, expand=True)
ocdf['hedonic_tone'] = ocdf['id_odor_annoy']
ocdf.replace(inplace=True, to_replace={'hedonic_tone': ANNOY_ID_TO_DESCRIPTION})
ocdf['intensity'] = ocdf['id_odor_intensity']
ocdf.replace(inplace=True, to_replace={'intensity': INTENSITY_ID_TO_DESCRIPTION})
# And reorder fields
ocdf = ocdf[
['id', 'user', 'published_at', 'type', 'hedonic_tone', 'intensity', 'latitude', 'longitude']]
return ocdf
def get_oc_data():
r = requests.post(OC_ENDPOINT, verify=True)
if r.status_code != 200:
print(f'Unexpected HTTP code received: {r.status_code}')
exit(1)
ocdf = build_df(r.text)
return ocdf
if __name__ == '__main__':
print('Getting latest OdourCollect data...')
df = get_oc_data()
print('Saving obtained data to file odourcollect.csv...')
df.to_csv('odourcollect-temp.csv', quoting=QUOTE_NONNUMERIC, index=False)
if os.path.exists('odourcollect.csv'):
os.remove('odourcollect.csv')
os.rename('odourcollect-temp.csv', 'odourcollect.csv')