-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
384 lines (328 loc) · 14.7 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
import os
import osmnx as ox
pickle_protocol = 5
verbose = 2
DEBUG = False
DEBUG_TRUNCATE = False
MASTER_VISUALISE_EACH_STEP = False
MASTER_VISUALISE_EACH_STEP_INSIDE_RN_class = False
MASTER_VISUALISE_EACH_STEP_INSIDE_PrepNetwork_class = True
MASTER_VISUALISE_EACH_STEP_INSIDE_ShapAnalysisScript = False
MASTER_VISUALISE_FEATURE_DIST_INSIDE_ShapAnalysisScript = True
CONGESTION_TYPE = "RECURRENT"
assert CONGESTION_TYPE in ["RECURRENT", "NON-RECURRENT", "NON-RECURRENT-MMM"]
BASE_FOLDER_local = "/Users/nishant/Documents/GitHub/WCS" # "/Users/nishant/Downloads/WCS_from_server_local_copy_withy_new_plots" # "/Users/nishant/Documents/GitHub/WCS"
BASE_FOLDER_server = "/home/niskumar/WCS"
delete_results_folder = True
cur_dir = os.getcwd()
RUNNING_ON_LOCAL = True
RUNNING_ON_SERVER = False
if cur_dir.split("/")[1] == "home":
BASE_FOLDER = BASE_FOLDER_server
RUNNING_ON_LOCAL = False
RUNNING_ON_SERVER = True
elif cur_dir.split("/")[1] == "Users":
BASE_FOLDER = BASE_FOLDER_local
RUNNING_ON_LOCAL = True
RUNNING_ON_SERVER = False
home_folder_path = BASE_FOLDER
osmnx_cache_folder = os.path.join(BASE_FOLDER, "cache_osmnx")
log_file = os.path.join(BASE_FOLDER, "log-main.txt")
LOGGING_ENABLED = False
ox.settings.use_cache = True
ox.settings.cache_folder = osmnx_cache_folder
####################################
###### DELETE FILES CONFIG ########
####################################
rn_delete_existing_pickled_objects = False
scl_delete_existing_pickle_objects = False
sd_delete_existing_pickle_objects = False
ps_delete_existing_pickle_objects = False
td_delete_existing_pickle_objects = False
#####################################
############## PLOTS ################
#####################################
ppl_smallest_sample = -1
ppl_use_all = True
if ppl_use_all:
assert ppl_smallest_sample == -1
ppl_parallel_overall = 1
ppl_plot_FI = True
ppl_CV_splits = 7
ppl_plot_corr = False
ppl_hist = False
ppl_hist_bins = 10
ppl_scaling_for_EDA = (
2 # 0: None; 1: Divide by Max; 2: StandardScaler(); 3: Divide by max; followed by StandardScaler()
)
assert ppl_scaling_for_EDA in [0, 1, 2, 3]
ppl_list_of_baselines = [] # ["Lasso()", "LinearRegression()", "Ridge()"], "LinearRegression()"
# ppl_feature_importance_via_coefficients = False
# if ppl_feature_importance_via_coefficients:
# assert model in ["LR", "RIDGE", "LASSO"]
ppl_feature_importance_via_NL_models = False
# if ppl_feature_importance_via_NL_models:
# assert model in ["RFR", "GBM"]
ppl_list_of_NL_models = ["RandomForestRegressor()"] # , "GradientBoostingRegressor()" # ["RandomForestRegressor()"] # , "GradientBoostingRegressor()"]
# assert (ppl_feature_importance_via_NL_models != ppl_feature_importance_via_coefficients) or (
# ppl_feature_importance_via_coefficients == False and ppl_feature_importance_via_NL_models == False
# )
ppl_list_of_correlations = ["pearson", "spearman"] # , "kendall", "spearman"]
####################################
###### ROAD NETWORK Class ######
####################################
# format: city,location, N, E, S, W
if RUNNING_ON_SERVER:
rn_city_wise_bboxes = {
"Singapore": [1.51316, 104.135278, 1.130361, 103.566667],
"Zurich": [47.434666, 8.625441, 47.32022, 8.448006],
"Mumbai": [19.270177, 72.979731, 18.893957, 72.776333],
"Auckland": [-36.681247, 174.925937, -36.965932, 174.63532],
"Istanbul": [41.671, 29.9581, 40.7289, 27.9714],
"MexicoCity": [19.592757, -98.940303, 19.048237, -99.364924],
"Bogota": [4.837015, -73.996423, 4.4604, -74.223689],
"NewYorkCity": [40.916178, -73.700181, 40.477399, -74.25909],
"Capetown": [-34.462, 18.1107, -33.3852, 19.0926],
"London": [51.28676, -0.510375, 51.691874, 0.334015],
# "Tokyo": [35.0721, 139.1704, 35.9707, 140.5547], # @Tokyo removed because no data present in here-api at the time of our study
# "TokyoCore": [35.0721, 139.1704, 35.9707, 140.5547],
}
single_city = "Auckland"
rn_city_wise_bboxes = {single_city : rn_city_wise_bboxes[single_city]}
elif RUNNING_ON_LOCAL:
rn_city_wise_bboxes = {
"Singapore": [1.51316, 104.135278, 1.130361, 103.566667],
"Zurich": [47.434666, 8.625441, 47.32022, 8.448006],
"Mumbai": [19.270177, 72.979731, 18.893957, 72.776333],
"Auckland": [-36.681247, 174.925937, -36.965932, 174.63532],
"Istanbul": [41.671, 29.9581, 40.7289, 27.9714],
"MexicoCity": [19.592757, -98.940303, 19.048237, -99.364924],
"Bogota": [4.837015, -73.996423, 4.4604, -74.223689],
"NewYorkCity": [40.916178, -73.700181, 40.477399, -74.25909],
"Capetown": [-34.462, 18.1107, -33.3852, 19.0926],
"London": [51.28676, -0.510375, 51.691874, 0.334015],
# "Tokyo": [35.0721, 139.1704, 35.9707, 140.5547], # @Tokyo removed because no data present in here-api at the time of our study
# "TokyoCore": [35.0721, 139.1704, 35.9707, 140.5547],
}
single_city = "Istanbul"
rn_city_wise_bboxes = {single_city : rn_city_wise_bboxes[single_city]}
if RUNNING_ON_SERVER:
# rn_city_wise_tz_code can be commented out since we don't need to recompute the speed_data_object everytime.
rn_city_wise_tz_code = {
"Singapore": "Asia/Singapore",
"Zurich": "Europe/Zurich",
"Mumbai": "Asia/Kolkata",
"Auckland": "Pacific/Auckland",
"Istanbul": "Europe/Istanbul",
"MexicoCity": "America/Mexico_City",
"Bogota": "America/Bogota",
"NewYorkCity": "America/New_York",
"Capetown": "Africa/Johannesburg",
"London": "Europe/London",
# "Tokyo": "Asia/Tokyo"
# "TokyoCore": [35.0721, 139.1704, 35.9707, 140.5547],
}
elif RUNNING_ON_LOCAL:
# rn_city_wise_tz_code can be commented out since we don't need to recompute the speed_data_object everytime.
rn_city_wise_tz_code = {
"Singapore": "Asia/Singapore",
"Zurich": "Europe/Zurich",
"Mumbai": "Asia/Kolkata",
"Auckland": "Pacific/Auckland",
"Istanbul": "Europe/Istanbul",
"MexicoCity": "America/Mexico_City",
"Bogota": "America/Bogota",
"NewYorkCity": "America/New_York",
"Capetown": "Africa/Johannesburg",
"London": "Europe/London",
"Tokyo": "Asia/Tokyo"
}
rn_master_list_of_cities = list(rn_city_wise_bboxes.keys())
rn_basemap_zoom_level = 13
rn_do_not_filter_list = [] # ["Zurich"]
rn_do_not_filter = True
if rn_do_not_filter:
assert len(rn_do_not_filter_list) == 0
rn_plotting_enabled = False
rn_plotting_for_truncated_graphs = False
rn_truncate_method = "GPD_CUSTOM"
assert rn_truncate_method in ["GPD_DUMMY_NODES_SMART_TRUNC", "OSMNX_RETAIN_EDGE", "GPD_CUSTOM"]
rn_prefix_geojson_files = "gdam_410_"
rn_postfix_geojson_files = ".geojson"
rn_post_fix_road_network_object_file = "_road_network_object_square.pkl"
rn_base_map_filename = "_base_osm_truncated.png"
rn_compute_full_city_features = False
rn_add_edge_speed_and_tt = True
rn_no_stats_marker = "Empty"
rn_plot_basemap = False
rn_percentage_of_city_area = 100
if rn_percentage_of_city_area != 100:
assert rn_post_fix_road_network_object_file == "_road_network_object_small.pkl"
if single_city == "Istanbul":
rn_square_from_city_centre = 75 # 15 implies 15X15 sq.km.
# elif single_city == "London":
# rn_square_from_city_centre = 25 # 15 implies 15X15 sq.km.
else:
rn_square_from_city_centre = 50 # # 15 implies 15X15 sq.km.
if rn_square_from_city_centre != -1:
assert rn_percentage_of_city_area == 100 # we cannot have two filtering techniques
# basically it is not needed
assert rn_post_fix_road_network_object_file == "_road_network_object_square.pkl"
rn_simplify = False
print ("Loaded config file: Prefix for name of road network object pickle file!")
####################################
######### Scale Class ##########
####################################
if BASE_FOLDER == BASE_FOLDER_server:
scl_n_jobs_parallel = 45
else:
scl_n_jobs_parallel = 5
scl_temp_file_counter = True
# if rn_truncate_method == "GDF_CUSTOM":
# assert scl_n_jobs_parallel == 1
scl_master_list_of_cities = rn_master_list_of_cities
scl_list_of_depths = [1]
# scl_list_of_seeds = [
# 15,
# 30,
# 50,
# 60,
# 80,
# 110,
# 130,
# # 150,
# # 170
# ] # 40, 45, 50, 55, 60, 65, 70, 80, 85, 90, 95, 100, 120]
# test_small
running_extended_scales = 0 # 0 implies single scale (1 sq.km), 1 implies 3 scales (1 sqkm, 4 swkm and 0.25 sqkm.) and 2 implies all from 20 to 100
if RUNNING_ON_LOCAL:
if single_city == "Istanbul":
if running_extended_scales == 2:
scl_list_of_seeds = [30, 45, 60, 90, 105, 120, 135, 37, 75, 150]
elif running_extended_scales == 1:
scl_list_of_seeds = [37, 75, 150] # [37, 75, 150] # [37, 75, 150] #, 50] # , 50, 25] # , 25, 50] # , 50, 100] # list(range(50, 121, 40)) # [10, 25, 30, 45, 50, 65, 70, 85, 90, 105] # list(range(5, 6, 1)) # list(range(5, 50, 5)) + list(range(50, 300, 10))
elif running_extended_scales == 0:
scl_list_of_seeds = [75]
else:
if running_extended_scales == 2:
scl_list_of_seeds = [20, 30, 40, 60, 70, 80, 90, 25, 50, 100]
elif running_extended_scales == 1:
scl_list_of_seeds = [25, 50, 100] # [25, 50, 100] # [25, 50, 100]
elif running_extended_scales == 0:
scl_list_of_seeds = [50]
elif RUNNING_ON_SERVER:
if single_city == "Istanbul":
if running_extended_scales == 2:
scl_list_of_seeds = [30, 45, 60, 90, 105, 120, 135, 37, 75, 150]
elif running_extended_scales == 1:
scl_list_of_seeds = [37, 75, 150] # [37, 75, 150] # [37, 75, 150] #, 50] # , 50, 25] # , 25, 50] # , 50, 100] # list(range(50, 121, 40)) # [10, 25, 30, 45, 50, 65, 70, 85, 90, 105] # list(range(5, 6, 1)) # list(range(5, 50, 5)) + list(range(50, 300, 10))
elif running_extended_scales == 0:
scl_list_of_seeds = [75]
else:
if running_extended_scales == 2:
scl_list_of_seeds = [20, 30, 40, 60, 70, 80, 90, 25, 50, 100]
elif running_extended_scales == 1:
scl_list_of_seeds = [25, 50, 100] # [25, 50, 100] # [25, 50, 100]
elif running_extended_scales == 0:
scl_list_of_seeds = [50]
scl_error_percentage_tolerance = 1
scl_basemap_zoom_level = 19
####################################
######## Tile Class configs ########
####################################
tls_garbage_Test_Speed = False # tls_garbage_Test_Speed set to True for debugging, otherwise MUST BE FALSE;
# if set to True, will return all zeros; Zeros are used to ensure that a
# wrongly set config is easy to spot
tls_betweenness_features = True
tls_number_of_lanes = True
tls_add_edge_speed_and_tt = False
tls_add_max_speed = True
tls_add_metered_intersections = True
if tls_add_edge_speed_and_tt == True:
# the data must be present in the road network to begin with
assert rn_add_edge_speed_and_tt == True
tls_missing_lanes_default_value = 2
####################################
###### Speed Data Class ########
####################################
sd_base_folder_path = "speed_data"
sd_seg_file_path_within_city = "segments.csv"
sd_jf_file_path_within_city = "jf.csv"
sd_raw_speed_data_gran = 10
sd_target_speed_data_gran = 60
sd_temporal_combination_method = "mean"
assert sd_temporal_combination_method in ["mean", "max", "variance"]
sd_start_datetime_str = "2022-09-02T00:00:01"
sd_end_datetime_str = "2022-09-29T23:59:59"
sd_total_number_of_days = 30 # additional config item for sanity check
sd_total_number_of_data_points_for_each_segment = 30 * 24 * (60/sd_raw_speed_data_gran)
####################################
###### PreProcess Speed ########
####################################
ps_spatial_combination_method = "length_weighted_mean"
assert ps_spatial_combination_method in ["mean", "max", "length_weighted_mean", "variance"]
if RUNNING_ON_LOCAL:
ps_tod_list = [
# [9, 10],
# [6, 7, 8, 9, 10], # peak hour morning
# [16, 17, 18, 19, 20], # peak hour morning
list(range(1, 25))
]
elif RUNNING_ON_SERVER:
ps_tod_list = [
# [9, 10],
# [6, 7, 8, 9, 10], # peak hour morning
# [16, 17, 18, 19, 20], # peak hour morning
list(range(1, 25))
]
assert isinstance(ps_tod_list, list)
ps_set_all_speed_zero = False
####################################
##### TRAIN DATA Class configs #####
####################################
td_tod_list = ps_tod_list
td_standard_scaler = True
td_SIMPLIFY_GRAPH = True
td_min_max_scaler = False
td_plot_raw_variance_before_scaling = True
td_viz_y_hist = True
td_reuse_Graph_features = True
## drop features showing high collinearity, some of them were removed initially
td_drop_feature_lists = [
# "self_loop_proportion",
# "streets_per_node_count_1",
# "streets_per_node_count_4",
# "street_length_avg",
# "street_segment_count",
# "streets_per_node_count_2",
# "mean_lanes",
]
shift_tile_marker = 3
SHAP_mode_spatial_CV = "horizontal"
assert SHAP_mode_spatial_CV in ["vertical", "horizontal", "grid"]
SHAP_additive_regression_model = False # Poor GoF; no longer used
SHAP_sort_features_alphabetical_For_heatmaps = False
FAST_GEN_PDPs_for_multiple_runs = True # True only when fast PDPs are needed for a large number of scenarios; this must be False for normal runs
network_folder = CONGESTION_TYPE + "R3network_tmean_smean_" +str(rn_square_from_city_centre)+ "x" +str(rn_square_from_city_centre)+ "_shifting_" + str(shift_tile_marker)
warnings_folder = "warnings"
results_folder = CONGESTION_TYPE + "R3results_network_tmean_smean_"+str(rn_square_from_city_centre)+ "x" +str(rn_square_from_city_centre)+ "_shifting_" + str(shift_tile_marker) # "results_50x50_max_" + ("full" if ppl_smallest_sample == -1 else str(ppl_smallest_sample)) + "_data" + "-fi-max-max"
temp_folder_for_robust_pickle_files = os.path.join(BASE_FOLDER, "pickle_slack_temp")
if not os.path.exists(temp_folder_for_robust_pickle_files):
os.mkdir(temp_folder_for_robust_pickle_files)
SHAP_ScalingOfInputVector = True
SHAP_use_all_features_including_highly_correlated = False
SHAP_random_search_CV = False
if SHAP_random_search_CV:
SHAP_HPT_num_iters = 50
SHAP_Grid_search_CV = False
else:
SHAP_Grid_search_CV = True
SHAP_values_disabled = False # True only when we are interested in capturing the just the GoF for a large number of scenarios
SHAP_disable_all_plots_for_server = True
SHAP_GBM_ENABLED = False
SHAP_generate_beeswarm = True
# To ensure that we don't overwrite the network folder of max with mean or vice-versa
# When we use mean, we use mean in the network folder and
# When we use max, we use max in the network folder
# assert ps_spatial_combination_method == sd_temporal_combination_method
# assert sd_temporal_combination_method in network_folder