forked from drowninginflowers/pdap_url_cache
-
Notifications
You must be signed in to change notification settings - Fork 0
/
url_cache_builder.py
82 lines (75 loc) · 2.46 KB
/
url_cache_builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import argparse
import json
from datetime import datetime, timedelta
def match_freq(update_frequency):
match update_frequency:
case "As new shootings occur":
update_delta = 30
case "quarterly":
update_delta = 91
case "Quarterly":
update_delta = 45
case "<5 Minutes":
update_delta = 1
case "Monthly":
update_delta = 30
case "annually":
update_delta = 365
case "daily":
update_delta = 1
case "Nightly":
update_delta = 1
case "BiAnnually":
update_delta = 182
case "About weekly at least":
update_delta = 7
case "<2 Weeks":
update_delta = 14
case "Hourly":
update_delta = 1
case "Daily":
update_delta = 1
case "At least once per week":
update_delta = 7
case "semi-annually":
update_delta = 365
case "Weekly":
update_delta = 7
case "weekly or more often":
update_delta = 7
case "Annually":
update_delta = 365
case "weekly":
update_delta = 7
case "Irregularly every few months upon complaint or request.":
update_delta = 121
case "monthly":
update_delta = 30
case "Live":
update_delta = 1
case _:
update_delta = None
return update_delta
# Parse command-line arguments
parser = argparse.ArgumentParser(description='Parse a JSON file and cache URLs that need to be updated.')
parser.add_argument('json_file', help='Path to the JSON file to parse')
parser.add_argument('info_file', help='Path to the cache file to write')
args = parser.parse_args()
# Load JSON file
with open(args.json_file, 'r') as f:
data = json.load(f)
# Parse entries and cache URLs if necessary
url_cache_info = []
for entry in data:
source_url = entry.get('source_url')
if source_url is None:
continue
update_delta = match_freq(entry.get('update_frequency'))
agency_name = entry.get('agency_described').get('name')
url_cache_info.append({'agency_name': agency_name,
'source_url': source_url,
'update_delta': update_delta,
'last_cached': None})
# Write updated JSON data to file
with open(args.info_file, 'w') as f:
json.dump(url_cache_info, fp = f, indent = 4)