Skip to content

Commit

Permalink
Merge pull request #561 from alan-turing-institute/update-team-model
Browse files Browse the repository at this point in the history
Updating team model
  • Loading branch information
jack89roberts authored Nov 10, 2023
2 parents 1453b6d + 00c8b17 commit 8d404d4
Show file tree
Hide file tree
Showing 11 changed files with 900 additions and 615 deletions.
195 changes: 159 additions & 36 deletions airsenal/framework/bpl_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
Interface to the NumPyro team model in bpl-next:
https://github.com/anguswilliams91/bpl-next
"""
from typing import Dict, List, Optional, Union

import numpy as np
import pandas as pd
from bpl import ExtendedDixonColesMatchPredictor
from bpl import ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
from sqlalchemy.orm.session import Session

from airsenal.framework.schema import FifaTeamRating, Result, session
from airsenal.framework.schema import FifaTeamRating, Fixture, Result, session
from airsenal.framework.season import CURRENT_SEASON, get_teams_for_season
from airsenal.framework.utils import (
get_fixture_teams,
Expand All @@ -17,9 +20,11 @@
np.random.seed(42)


def get_result_dict(season, gameweek, dbsession):
def get_result_dict(
season: str, gameweek: int, dbsession: Session
) -> Dict[str, np.array]:
"""
query the match table and put results into pandas dataframe,
Query the match table and put results into pandas dataframe,
to train the team-level model.
"""
results = [
Expand All @@ -33,17 +38,34 @@ def get_result_dict(season, gameweek, dbsession):
next_gameweek=gameweek,
)
]
# compute the time difference for each fixture in results
# to the first fixture of the next gameweek
result_dates = np.array(
[pd.Timestamp(r.fixture.date).replace(tzinfo=None) for r in results]
)
end_date = pd.to_datetime(
[f.date for f in get_fixtures_for_gameweek(gameweek, season, dbsession)]
).min()
end_date = end_date.replace(tzinfo=None)
time_diff = (end_date - result_dates) / pd.Timedelta(days=365)
return {
"home_team": np.array([r.fixture.home_team for r in results]),
"away_team": np.array([r.fixture.away_team for r in results]),
"home_goals": np.array([r.home_score for r in results]),
"away_goals": np.array([r.away_score for r in results]),
"time_diff": time_diff,
"neutral_venue": np.zeros(len(results)),
"time_diff": time_diff,
"game_weights": np.ones(len(results)),
}


def get_ratings_dict(season, teams, dbsession):
"""Create a dataframe containing the fifa team ratings."""

def get_ratings_dict(
season: str, teams: List[str], dbsession: Session
) -> Dict[str, np.array]:
"""
Create a dataframe containing the fifa team ratings.
"""
ratings = dbsession.query(FifaTeamRating).filter_by(season=season).all()
if len(ratings) == 0:
raise ValueError(f"No FIFA ratings found for season {season}")
Expand All @@ -63,70 +85,163 @@ def get_ratings_dict(season, teams, dbsession):
return ratings_dict


def get_training_data(season, gameweek, dbsession, ratings=True):
def get_training_data(
season: str,
gameweek: int,
dbsession: Session,
ratings: bool = True,
):
"""Get training data for team model, optionally including FIFA ratings
as covariates if ratings is True. Data returned is for all matches up
to specified gameweek and season.
as covariates if ratings is True. If time_decay is None, do not include
exponential time decay in model.
Data returned is for all matches up to specified gameweek and season.
"""
training_data = get_result_dict(season, gameweek, dbsession)
if ratings:
teams = set(training_data["home_team"]) | set(training_data["away_team"])
training_data["team_covariates"] = get_ratings_dict(season, teams, dbsession)
training_data["team_covariates"] = get_ratings_dict(
season=season, teams=teams, dbsession=dbsession
)
return training_data


def create_and_fit_team_model(
training_data, model_class=ExtendedDixonColesMatchPredictor
):
training_data: dict,
model: Union[
ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
] = ExtendedDixonColesMatchPredictor(),
**fit_args,
) -> Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]:
"""
Get the team-level stan model, which can give probabilities of
each potential scoreline in a given fixture.
"""
return model_class().fit(training_data)
if not fit_args:
fit_args = {}
if "epsilon" in fit_args:
print(f"Fitting {type(model)} model with epsilon = {fit_args['epsilon']}")
else:
print(
f"Fitting {type(model)} model but no epsilon passed, "
"so using the default epsilon = 0"
)

return model.fit(training_data=training_data, **fit_args)


def add_new_teams_to_model(team_model, season, dbsession):
def add_new_teams_to_model(
team_model: Union[
ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
],
season: str,
dbsession: Session,
ratings: bool = True,
) -> Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]:
"""
Add teams that we don't have previous results for (e.g. promoted teams) to the model
using their FIFA ratings as covariates.
"""
teams = get_teams_for_season(season, dbsession=dbsession)
teams = get_teams_for_season(season=season, dbsession=dbsession)
for t in teams:
if t not in team_model.teams:
print(f"Adding {t} to team model with covariates")
ratings = get_ratings_dict(season, [t], dbsession)
team_model.add_new_team(t, team_covariates=ratings[t])
if ratings:
print(f"Adding {t} to team model with covariates")
ratings = get_ratings_dict(season, [t], dbsession)
team_model.add_new_team(t, team_covariates=ratings[t])
else:
print(f"Adding {t} to team model without covariates")
team_model.add_new_team(t)
return team_model


def get_fitted_team_model(
season, gameweek, dbsession, team_model_class=ExtendedDixonColesMatchPredictor
):
season: str,
gameweek: int,
dbsession: Session,
ratings: bool = True,
model: Union[
ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
] = ExtendedDixonColesMatchPredictor(),
**fit_args,
) -> Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]:
"""
get the fitted team model using the past results and the FIFA rankings
Get the fitted team model using the past results and the FIFA rankings.
"""
print(f"Fitting team model ({type(team_model_class())})...")
training_data = get_training_data(season, gameweek, dbsession)
team_model = create_and_fit_team_model(training_data, team_model_class)
return add_new_teams_to_model(team_model, season, dbsession)
print(f"Fitting team model ({type(model)})...")
training_data = get_training_data(
season=season,
gameweek=gameweek,
dbsession=dbsession,
ratings=ratings,
)
team_model = create_and_fit_team_model(
training_data=training_data, model=model, **fit_args
)
return add_new_teams_to_model(
team_model=team_model, season=season, dbsession=dbsession, ratings=ratings
)


def fixture_probabilities(
gameweek, season=CURRENT_SEASON, team_model=None, dbsession=session
):
gameweek: int,
season: str = CURRENT_SEASON,
model: Optional[
Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]
] = None,
dbsession: Session = session,
ratings: bool = True,
**fit_args,
) -> pd.DataFrame:
"""
Returns probabilities for all fixtures in a given gameweek and season, as a data
frame with a row for each fixture and columns being home_team,
away_team, home_win_probability, draw_probability, away_win_probability.
If no model is passed, it will fit a ExtendedDixonColesMatchPredictor model
by default.
"""
if team_model is None:
team_model = get_fitted_team_model(season, gameweek, dbsession)

# fit team model if none is passed or if it is not fitted yet
# (model.teams will be None if so)
if model is None:
# fit extended model by default
model = get_fitted_team_model(
season=season,
gameweek=gameweek,
dbsession=dbsession,
ratings=ratings,
model=ExtendedDixonColesMatchPredictor(),
**fit_args,
)
elif model.teams is None:
# model is not fit yet, so will need to fit
model = get_fitted_team_model(
season=season,
gameweek=gameweek,
dbsession=dbsession,
ratings=ratings,
model=model,
**fit_args,
)

# obtain fixtures
fixtures = get_fixture_teams(
get_fixtures_for_gameweek(gameweek, season=season, dbsession=dbsession)
get_fixtures_for_gameweek(gameweek=gameweek, season=season, dbsession=dbsession)
)
home_teams, away_teams = zip(*fixtures)
probabilities = team_model.predict_outcome_proba(home_teams, away_teams)

# obtain match probabilities
if isinstance(model, ExtendedDixonColesMatchPredictor):
probabilities = model.predict_outcome_proba(home_teams, away_teams)
elif isinstance(model, NeutralDixonColesMatchPredictor):
probabilities = model.predict_outcome_proba(
home_teams, away_teams, neutral_venue=np.zeros(len(home_teams))
)
else:
raise NotImplementedError(
"model must be either of type "
"'ExtendedDixonColesMatchPredictor' or "
"'NeutralDixonColesMatchPredictor'"
)
return pd.DataFrame(
{
"home_team": home_teams,
Expand All @@ -138,9 +253,17 @@ def fixture_probabilities(
)


def get_goal_probabilities_for_fixtures(fixtures, team_model, max_goals=10):
"""Get the probability that each team in a fixture scores any number of goals up
to max_goals."""
def get_goal_probabilities_for_fixtures(
fixtures: List[Fixture],
team_model: Union[
ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
],
max_goals: int = 10,
) -> Dict[int, Dict[str, Dict[int, float]]]:
"""
Get the probability that each team in a fixture scores any number of goals up
to max_goals.
"""
goals = np.arange(0, max_goals + 1)
probs = {}
for f in fixtures:
Expand Down
2 changes: 1 addition & 1 deletion airsenal/framework/player.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"""

from airsenal.framework.schema import Player
from airsenal.framework.season import CURRENT_SEASON
from airsenal.framework.utils import (
CURRENT_SEASON,
NEXT_GAMEWEEK,
get_player,
get_predicted_points_for_player,
Expand Down
Loading

0 comments on commit 8d404d4

Please sign in to comment.