-
Notifications
You must be signed in to change notification settings - Fork 0
/
cache.py
97 lines (83 loc) · 3.06 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import requests
import json
import os
import pandas as pd
import requests
from bs4 import BeautifulSoup
"""
This file is used to generate 3 cahce files: test.json, airport_dict.json and airlines_us.json
"""
if not os.path.exists('test.json'):
url = "https://travelpayouts-travelpayouts-flight-data-v1.p.rapidapi.com/data/en-GB/airlines.json"
headers = {
"X-Access-Token": "undefined",
"X-RapidAPI-Key": "6ba009b4d2msh4e252de979d76a6p11dc08jsn1f59c660d315",
"X-RapidAPI-Host": "travelpayouts-travelpayouts-flight-data-v1.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers)
json_object = json.loads(response.text)
print(type(json_object))
print(len(json_object))
with open("test.json", "w") as outfile:
outfile.write(json.dumps(json_object))
if not os.path.exists('airport_dict.json'):
wikiurl = "https://en.wikipedia.org/wiki/List_of_airports_in_the_United_States"
response = requests.get(wikiurl)
print(response.status_code)
soup = BeautifulSoup(response.text, 'html.parser')
indiatable = soup.find('table', {'class': "wikitable"})
df = pd.read_html(str(indiatable))
df = pd.DataFrame(df[0])
airport_dict = {}
states = ""
for i in range(len(df)):
if pd.isnull(df.loc[i]["Airport"]):
states = df.loc[i]['City']
continue
if pd.isnull(df.loc[i]["IATA"]):
continue
airport_dict[df.loc[i]['IATA']] = [states, df.loc[i]['City'], df.loc[i]['Airport']]
json_object = json.dumps(airport_dict, indent=4)
with open("airport_dict.json", "w") as outfile:
outfile.write(json_object)
if not os.path.exists('airlines_us.json'):
f = open('airport_dict.json')
airport_dict = json.load(f)
for key in airport_dict.keys():
airport_dict[key].append({})
f = open('test.json')
data = json.load(f)
# print(type(data))
i=0
for route in data:
if (route["departure_airport_iata"] not in airport_dict) or \
(route["arrival_airport_iata"] not in airport_dict) or \
(route["transfers"]>0):
continue
else:
if route["arrival_airport_iata"] not in airport_dict[route["departure_airport_iata"]][-1]:
i+=1
if i % 10 == 0: print(i)
url = "https://www.airmilescalculator.com/distance/"+route["arrival_airport_iata"]+"-to-"+route["departure_airport_iata"]
# # print(url)
response = requests.get(url)
# print(response.status_code)
soup = BeautifulSoup(response.text, 'html.parser')
indiatable = soup.find_all('div', {'class': "numberline"})
number = []
for div in indiatable:
number.append(div.getText().strip("\n"))
airport_dict[route["departure_airport_iata"]][-1][(route["arrival_airport_iata"])]=number
# print(number)
if route["departure_airport_iata"] not in airport_dict[route["arrival_airport_iata"]][-1]:
airport_dict[route["arrival_airport_iata"]][-1][(route["departure_airport_iata"])]=number
keys=[]
for key in airport_dict.keys():
if len(airport_dict[key][-1])==0:
keys.append(key)
for key in keys:
airport_dict.pop(key, None)
print(len(airport_dict))
json_object = json.dumps(airport_dict, indent=4)
with open("airlines_us.json", "w") as outfile:
outfile.write(json_object)