Skip to content

Commit

Permalink
Update to use new NHL API
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanbraun committed Jan 31, 2024
1 parent 88b9f74 commit 472188e
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 76 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ If I were using Windows, it might look like this:
Set these aside for now and we'll pick them up in chapter 2.

## Changelog
### v0.1.0 (2024-01-31)
Major update to the API chapter. Rewrote everything to use the new NHL API
since the old one stopped working.

### v0.0.11 (2023-11-15)
Fixed some typos.

Expand Down
280 changes: 205 additions & 75 deletions code/05_03_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,123 +3,152 @@
from pandas import DataFrame, Series
import pandas as pd

#######
# teams
#######
#################
# teams/standings
#################

# after looking at url in browser, get what you need in python
teams_url = 'https://statsapi.web.nhl.com/api/v1/teams'
teams_resp = requests.get(teams_url)
standings_url = 'https://api-web.nhle.com/v1/standings/2024-01-21'
standings_resp = requests.get(standings_url)

teams_json = teams_resp.json()
standings_json = standings_resp.json()

# with open('./data/json/teams.json') as f:
# teams_json = json.load(f)
# with open('./data/json/standings.json') as f:
# standings_json = json.load(f)

teams_json
teams_json.keys()
standings_json
standings_json.keys()

type(teams_json['teams'])
type(standings_json['standings'])
standings_json['standings'][0]

teams_json['teams'][0]
canucks_nested = standings_json['standings'][0]

nj_nested = teams_json['teams'][0]
canucks_flat = {key: value for key, value in canucks_nested.items() if
type(value) not in (dict, list)}
canucks_flat

nj_flat = {key: value for key, value in nj_nested.items()
if type(value) is not dict}
nj_flat
canucks_nested['teamName']

nj_nested['venue']
canucks_flat['teamName'] = canucks_nested['teamName']['default']

nj_flat['venue_name'] = nj_nested['venue']['name']
nj_flat['venue_city'] = nj_nested['venue']['city']

nj_flat['franchise_id'] = nj_nested['franchise']['franchiseId']
nj_flat['division_id'] = nj_nested['division']['id']
nj_flat['convference_id'] = nj_nested['conference']['id']
{key: value for key, value in canucks_nested.items() if
type(value) in (dict, list)}

nj_flat
canucks_flat['placeName'] = canucks_nested['placeName']['default']
canucks_flat['teamCommonName'] = canucks_nested['teamCommonName']['default']
canucks_flat['teamAbbrev'] = canucks_nested['teamAbbrev']['default']

def flatten_team(nested):
flat = {key: value for key, value in nested.items() if type(value) is not dict}

flat['venue_name'] = nested['venue']['name']
flat['venue_city'] = nested['venue']['city']
flat['franchise_id'] = nested['franchise']['franchiseId']
flat['division_id'] = nested['division']['id']
flat['convference_id'] = nested['conference']['id']
flat = {key: value for key, value in nested.items() if type(value) not in (dict, list)}
flat['teamName'] = nested['teamName']['default']
flat['placeName'] = nested['placeName']['default']
flat['teamCommonName'] = nested['teamCommonName']['default']
flat['teamAbbrev'] = nested['teamAbbrev']['default']

return flat

df_teams = DataFrame([flatten_team(x) for x in teams_json['teams']])

df_teams.head()
df_teams = DataFrame([flatten_team(x) for x in standings_json['standings']])
df_teams[['teamAbbrev', 'wins', 'losses', 'ties', 'goalFor', 'goalAgainst']].head(10)

#########
# rosters
#########
rosters_url = 'https://statsapi.web.nhl.com/api/v1/teams?expand=team.roster'
rosters_resp = requests.get(rosters_url)
rosters_json = rosters_resp.json()

# with open('./data/json/rosters.json') as f:
# rosters_json = json.load(f)
tor_roster_url = 'https://api-web.nhle.com/v1/roster/TOR/20232024'
tor_roster_resp = requests.get(tor_roster_url)
tor_roster_json = tor_roster_resp.json()

# with open('./data/json/tor_roster.json') as f:
# teams_json = json.load(f)

# specific instance
nj = rosters_json['teams'][0]
nj_roster = nj['roster']['roster']
jb = nj_roster[0]
tor_roster_json.keys()

jb
forward1 = tor_roster_json['forwards'][0]
forward1

jb_flat = {}
jb_flat['person_id'] = jb['person']['id']
jb_flat['name'] = jb['person']['fullName']
jb_flat['jersey'] = jb['jerseyNumber']
jb_flat['position'] = jb['position']['code']
forward1_flat = {key: value for key, value in forward1.items()
if type(value) not in (dict, list)}
forward1_flat['firstName'] = forward1['firstName']['default']
forward1_flat['lastName'] = forward1['lastName']['default']
forward1_flat['birthCity'] = forward1['birthCity']['default']
forward1_flat['birthStateProvince'] = forward1['birthStateProvince']['default']

jb_flat
forward1_flat

def flatten_player(nested):
flat = {}
flat['person_id'] = nested['person']['id']
flat['name'] = nested['person']['fullName']
flat['position'] = nested['position']['code']
flat = {key: value for key, value in nested.items() if type(value) not in
(dict, list)}
flat['firstName'] = nested['firstName']['default']
flat['lastName'] = nested['lastName']['default']
flat['birthCity'] = nested['birthCity']['default']
flat['birthStateProvince'] = nested['birthStateProvince']['default']

return flat

df_nj = DataFrame([flatten_player(x) for x in nj_roster])
# commented out because it returns an error
# df_fwd = DataFrame([flatten_player(x) for x in tor_roster_json['forwards']])

def flatten_player2(nested):
flat = {key: value for key, value in nested.items() if type(value) not in
(dict, list)}
flat['firstName'] = nested['firstName']['default']
flat['lastName'] = nested['lastName']['default']
flat['birthCity'] = nested['birthCity']['default']

if 'birthStateProvince' in nested.keys():
flat['birthStateProvince'] = nested['birthStateProvince']['default']

return flat

df_fwd = DataFrame([flatten_player2(x) for x in tor_roster_json['forwards']])

def roster_by_team1(team):
roster_url = f'https://api-web.nhle.com/v1/roster/{team}/20232024'
roster_resp = requests.get(roster_url)
roster_json = roster_resp.json()

def process_roster1(team_dict):
roster = team_dict['roster']['roster']
df = DataFrame([flatten_player(x) for x in roster])
return df
df_fwd = DataFrame([flatten_player2(x) for x in roster_json['forwards']])
df_def = DataFrame([flatten_player2(x) for x in roster_json['defensemen']])
df_g = DataFrame([flatten_player2(x) for x in roster_json['goalies']])

df_nj2 = process_roster1(nj)
df_nj2.head()
return pd.concat([df_fwd, df_def, df_g], ignore_index=True)

df_tor = roster_by_team1('TOR')
df_tor.drop('headshot', axis=1)

df_sea = process_roster1(rosters_json['teams'][-1])
df_sea.head()
df_chi = roster_by_team1('CHI')
df_chi.drop('headshot', axis=1).head()

def process_roster2(team_dict):
roster = team_dict['roster']['roster']
df = DataFrame([flatten_player(x) for x in roster])
df['team_id'] = team_dict['id']
df['team_name'] = team_dict['name']
return df
def roster_by_team2(team):
roster_url = f'https://api-web.nhle.com/v1/roster/{team}/20232024'
roster_resp = requests.get(roster_url)
roster_json = roster_resp.json()

df_nj3 = process_roster2(nj)
df_nj3.head()
df_fwd = DataFrame([flatten_player2(x) for x in roster_json['forwards']])
df_def = DataFrame([flatten_player2(x) for x in roster_json['defensemen']])
df_g = DataFrame([flatten_player2(x) for x in roster_json['goalies']])

league_rosters = pd.concat([process_roster2(x) for x in rosters_json['teams']],
ignore_index=True)
df_all = pd.concat([df_fwd, df_def, df_g], ignore_index=True)
df_all['team'] = team
return df_all

league_rosters.sample(5)
df_sea = roster_by_team2('SEA')
df_sea.drop('headshot', axis=1).head()

league_rosters = pd.concat([roster_by_team2(x) for x in
df_teams['teamAbbrev']], ignore_index=True)

league_rosters[['id', 'firstName', 'lastName', 'positionCode', 'team']].sample(5)

###################
# player stats data
###################
player_id = 8471675 # sidney crosby
stats_url = f'https://statsapi.web.nhl.com/api/v1/people/{player_id}/stats?stats=yearByYear'
stats_url = f'https://api-web.nhle.com/v1/player/{player_id}/game-log/20232024/2'

# print it out to view in browser
stats_url

crosby_resp = requests.get(stats_url)
crosby_json = crosby_resp.json()
Expand Down Expand Up @@ -151,3 +180,104 @@ def hist_stats_by_player_year(player_id):

ovechkin_stats = hist_stats_by_player_year(8471214)
ovechkin_stats[['season', 'team', 'league', 'assists', 'goals', 'games']].head()

teams_json['data'][0]

nj_nested = teams_json['teams'][0]

nj_flat = {key: value for key, value in nj_nested.items()
if type(value) is not dict}
nj_flat

nj_nested['venue']

nj_flat['venue_name'] = nj_nested['venue']['name']
nj_flat['venue_city'] = nj_nested['venue']['city']

nj_flat['franchise_id'] = nj_nested['franchise']['franchiseId']
nj_flat['division_id'] = nj_nested['division']['id']
nj_flat['convference_id'] = nj_nested['conference']['id']

nj_flat

def flatten_team(nested):
flat = {key: value for key, value in nested.items() if type(value) is not dict}

flat['venue_name'] = nested['venue']['name']
flat['venue_city'] = nested['venue']['city']
flat['franchise_id'] = nested['franchise']['franchiseId']
flat['division_id'] = nested['division']['id']
flat['convference_id'] = nested['conference']['id']

return flat

df_teams = DataFrame([flatten_team(x) for x in teams_json['teams']])

df_teams.head()

###################
# player stats data
###################
player_id = 8471675 # sidney crosby
stats_url = f'https://api-web.nhle.com/v1/player/{player_id}/game-log/20232024/2'

crosby_resp = requests.get(stats_url)
crosby_json = crosby_resp.json()

# with open('./data/json/crosby.json') as f:
# crosby_json = json.load(f)

crosby_stats_0 = crosby_json['gameLog'][0]
crosby_stats_0

def flatten_game_log(nested):
flat = {key: value for key, value in nested.items()
if type(value) is not dict}
return flat

flatten_game_log(crosby_stats_0)

crosby_stats_all = DataFrame([flatten_game_log(x) for x in
crosby_json['gameLog']])

crosby_stats_all[['gameDate', 'opponentAbbrev', 'goals', 'assists', 'plusMinus']].head(10)


def games_by_player(player_id):
stats_url = f'https://api-web.nhle.com/v1/player/{player_id}/game-log/20232024/2'
stats_resp = requests.get(stats_url)
stats_json = stats_resp.json()
return DataFrame([flatten_game_log(x) for x in stats_json['gameLog']])

mcdavid_stats = games_by_player(8478402)
mcdavid_stats[['gameDate', 'opponentAbbrev', 'goals', 'assists', 'plusMinus']].head()

################################################################################
################################################################################

## note: this part isn't meant to be run
## i (nate) am running this Wed 1/31/24 to save data we'll load above
##
## including here to make it clearer this saved data above just comes from APIs

# standings_url = 'https://api-web.nhle.com/v1/standings/2024-01-21'
# tor_roster_url = 'https://api-web.nhle.com/v1/roster/TOR/20232024'
# stats_url = f'https://api-web.nhle.com/v1/player/{player_id}/game-log/20232024/2'

# standings_resp = requests.get(standings_url)
# tor_roster_resp = requests.get(tor_roster_url)
# crosby_resp = requests.get(stats_url)

# standings_json = standings_resp.json()
# tor_roster_json = tor_roster_resp.json()
# crosby_json = crosby_resp.json()

# with open('./data/json/standings.json', 'w') as f:
# json.dump(standings_json, f)

# with open('./data/json/tor_roster.json', 'w') as f:
# json.dump(tor_roster_json, f)

# with open('./data/json/crosby.json', 'w') as f:
# json.dump(crosby_json, f)

Loading

0 comments on commit 472188e

Please sign in to comment.