-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
44 lines (30 loc) · 1.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from bs4 import BeautifulSoup
import requests
import pandas as pd
years = [
1930,1934,1938,1950,1954,1958,1962,1966,1970,1974,1978,1982,1986,1990,1994,1994,1998,2002,2006,2010,2014,2018
]
def get_matches(year):
web = f'https://en.wikipedia.org/wiki/{year}_FIFA_World_Cup'
response = requests.get(web)
content =response.text
soup = BeautifulSoup(content, "html.parser")
home = []
score = []
away = []
matches = soup.find_all('div',class_='footballbox')
for match in matches:
home.append(match.find('th',class_='fhome').get_text())
score.append(match.find('th',class_='fscore').get_text())
away.append(match.find('th',class_='faway').get_text())
dict_football = {'home': home,'score' : score,'away' : away}
df_football = pd.DataFrame(dict_football)
df_football['year'] = year
return df_football
# historical data
fifa = [get_matches(year) for year in years]
df_fifa = pd.concat(fifa,ignore_index=True)
df_fifa.to_csv('fifa_worldcup_history_data.csv',index=False)
# fixture
df_fixture = get_matches(2022)
df_fixture.to_csv('fifa_worldcup_fixture_data.csv',index=False)