alan-turing-institute · nbarlowATI · Mar 22, 2024 · Dec 30, 2022 · Dec 31, 2022 · Dec 31, 2022
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
       - uses: actions/checkout@v3

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,21 +2,21 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
     -  id: check-yaml
     -  id: check-toml
     -  id: end-of-file-fixer
     -  id: trailing-whitespace
 -   repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
     - id: isort
 -   repo: https://github.com/ambv/black
-    rev: 23.7.0
+    rev: 24.2.0
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.0.0
     hooks:
     - id: flake8
diff --git a/airsenal/framework/api_utils.py b/airsenal/framework/api_utils.py
@@ -1,6 +1,7 @@
 """
 Functions used by the AIrsenal API
 """
+
 from flask import jsonify
 from sqlalchemy.orm import scoped_session
 

diff --git a/airsenal/framework/aws_utils.py b/airsenal/framework/aws_utils.py
@@ -45,7 +45,7 @@ def get_league_standings_string():
         output_string += f"Standings for league {league_name} :"
         for i, entry in enumerate(standings):
             output_string += (
-                f"{i + 1,}: "
+                f"{i + 1}: "
                 f"{entry['name']}, "
                 f"managed by {entry['manager']}, "
                 f"with {entry['points']} points, "

diff --git a/airsenal/framework/bpl_interface.py b/airsenal/framework/bpl_interface.py
@@ -2,11 +2,15 @@
 Interface to the NumPyro team model in bpl-next:
 https://github.com/anguswilliams91/bpl-next
 """
+
+from typing import Dict, List, Optional, Union
+
 import numpy as np
 import pandas as pd
-from bpl import ExtendedDixonColesMatchPredictor
+from bpl import ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
+from sqlalchemy.orm.session import Session
 
-from airsenal.framework.schema import FifaTeamRating, Result, session
+from airsenal.framework.schema import FifaTeamRating, Fixture, Result, session
 from airsenal.framework.season import CURRENT_SEASON, get_teams_for_season
 from airsenal.framework.utils import (
     get_fixture_teams,
@@ -17,9 +21,11 @@
 np.random.seed(42)
 
 
-def get_result_dict(season, gameweek, dbsession):
+def get_result_dict(
+    season: str, gameweek: int, dbsession: Session
+) -> Dict[str, np.array]:
     """
-    query the match table and put results into pandas dataframe,
+    Query the match table and put results into pandas dataframe,
     to train the team-level model.
     """
     results = [
@@ -33,17 +39,34 @@ def get_result_dict(season, gameweek, dbsession):
             next_gameweek=gameweek,
         )
     ]
+    # compute the time difference for each fixture in results
+    # to the first fixture of the next gameweek
+    result_dates = np.array(
+        [pd.Timestamp(r.fixture.date).replace(tzinfo=None) for r in results]
+    )
+    end_date = pd.to_datetime(
+        [f.date for f in get_fixtures_for_gameweek(gameweek, season, dbsession)]
+    ).min()
+    end_date = end_date.replace(tzinfo=None)
+    time_diff = (end_date - result_dates) / pd.Timedelta(days=365)
     return {
         "home_team": np.array([r.fixture.home_team for r in results]),
         "away_team": np.array([r.fixture.away_team for r in results]),
         "home_goals": np.array([r.home_score for r in results]),
         "away_goals": np.array([r.away_score for r in results]),
+        "time_diff": time_diff,
+        "neutral_venue": np.zeros(len(results)),
+        "time_diff": time_diff,
+        "game_weights": np.ones(len(results)),
     }
 
 
-def get_ratings_dict(season, teams, dbsession):
-    """Create a dataframe containing the fifa team ratings."""
-
+def get_ratings_dict(
+    season: str, teams: List[str], dbsession: Session
+) -> Dict[str, np.array]:
+    """
+    Create a dataframe containing the fifa team ratings.
+    """
     ratings = dbsession.query(FifaTeamRating).filter_by(season=season).all()
     if len(ratings) == 0:
         raise ValueError(f"No FIFA ratings found for season {season}")
@@ -63,70 +86,163 @@ def get_ratings_dict(season, teams, dbsession):
     return ratings_dict
 
 
-def get_training_data(season, gameweek, dbsession, ratings=True):
+def get_training_data(
+    season: str,
+    gameweek: int,
+    dbsession: Session,
+    ratings: bool = True,
+):
     """Get training data for team model, optionally including FIFA ratings
-    as covariates if ratings is True. Data returned is for all matches up
-    to specified gameweek and season.
+    as covariates if ratings is True. If time_decay is None, do not include
+    exponential time decay in model.
+    Data returned is for all matches up to specified gameweek and season.
     """
     training_data = get_result_dict(season, gameweek, dbsession)
     if ratings:
         teams = set(training_data["home_team"]) | set(training_data["away_team"])
-        training_data["team_covariates"] = get_ratings_dict(season, teams, dbsession)
+        training_data["team_covariates"] = get_ratings_dict(
+            season=season, teams=teams, dbsession=dbsession
+        )
     return training_data
 
 
 def create_and_fit_team_model(
-    training_data, model_class=ExtendedDixonColesMatchPredictor
-):
+    training_data: dict,
+    model: Union[
+        ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
+    ] = ExtendedDixonColesMatchPredictor(),
+    **fit_args,
+) -> Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]:
     """
     Get the team-level stan model, which can give probabilities of
     each potential scoreline in a given fixture.
     """
-    return model_class().fit(training_data)
+    if not fit_args:
+        fit_args = {}
+    if "epsilon" in fit_args:
+        print(f"Fitting {type(model)} model with epsilon = {fit_args['epsilon']}")
+    else:
+        print(
+            f"Fitting {type(model)} model but no epsilon passed, "
+            "so using the default epsilon = 0"
+        )
 
+    return model.fit(training_data=training_data, **fit_args)
 
-def add_new_teams_to_model(team_model, season, dbsession):
+
+def add_new_teams_to_model(
+    team_model: Union[
+        ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
+    ],
+    season: str,
+    dbsession: Session,
+    ratings: bool = True,
+) -> Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]:
     """
     Add teams that we don't have previous results for (e.g. promoted teams) to the model
     using their FIFA ratings as covariates.
     """
-    teams = get_teams_for_season(season, dbsession=dbsession)
+    teams = get_teams_for_season(season=season, dbsession=dbsession)
     for t in teams:
         if t not in team_model.teams:
-            print(f"Adding {t} to team model with covariates")
-            ratings = get_ratings_dict(season, [t], dbsession)
-            team_model.add_new_team(t, team_covariates=ratings[t])
+            if ratings:
+                print(f"Adding {t} to team model with covariates")
+                ratings = get_ratings_dict(season, [t], dbsession)
+                team_model.add_new_team(t, team_covariates=ratings[t])
+            else:
+                print(f"Adding {t} to team model without covariates")
+                team_model.add_new_team(t)
     return team_model
 
 
 def get_fitted_team_model(
-    season, gameweek, dbsession, team_model_class=ExtendedDixonColesMatchPredictor
-):
+    season: str,
+    gameweek: int,
+    dbsession: Session,
+    ratings: bool = True,
+    model: Union[
+        ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
+    ] = ExtendedDixonColesMatchPredictor(),
+    **fit_args,
+) -> Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]:
     """
-    get the fitted team model using the past results and the FIFA rankings
+    Get the fitted team model using the past results and the FIFA rankings.
     """
-    print(f"Fitting team model ({type(team_model_class())})...")
-    training_data = get_training_data(season, gameweek, dbsession)
-    team_model = create_and_fit_team_model(training_data, team_model_class)
-    return add_new_teams_to_model(team_model, season, dbsession)
+    print(f"Fitting team model ({type(model)})...")
+    training_data = get_training_data(
+        season=season,
+        gameweek=gameweek,
+        dbsession=dbsession,
+        ratings=ratings,
+    )
+    team_model = create_and_fit_team_model(
+        training_data=training_data, model=model, **fit_args
+    )
+    return add_new_teams_to_model(
+        team_model=team_model, season=season, dbsession=dbsession, ratings=ratings
+    )
 
 
 def fixture_probabilities(
-    gameweek, season=CURRENT_SEASON, team_model=None, dbsession=session
-):
+    gameweek: int,
+    season: str = CURRENT_SEASON,
+    model: Optional[
+        Union[ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor]
+    ] = None,
+    dbsession: Session = session,
+    ratings: bool = True,
+    **fit_args,
+) -> pd.DataFrame:
     """
     Returns probabilities for all fixtures in a given gameweek and season, as a data
     frame with a row for each fixture and columns being home_team,
     away_team, home_win_probability, draw_probability, away_win_probability.
+
+    If no model is passed, it will fit a ExtendedDixonColesMatchPredictor model
+    by default.
     """
-    if team_model is None:
-        team_model = get_fitted_team_model(season, gameweek, dbsession)
+
+    # fit team model if none is passed or if it is not fitted yet
+    # (model.teams will be None if so)
+    if model is None:
+        # fit extended model by default
+        model = get_fitted_team_model(
+            season=season,
+            gameweek=gameweek,
+            dbsession=dbsession,
+            ratings=ratings,
+            model=ExtendedDixonColesMatchPredictor(),
+            **fit_args,
+        )
+    elif model.teams is None:
+        # model is not fit yet, so will need to fit
+        model = get_fitted_team_model(
+            season=season,
+            gameweek=gameweek,
+            dbsession=dbsession,
+            ratings=ratings,
+            model=model,
+            **fit_args,
+        )
+
+    # obtain fixtures
     fixtures = get_fixture_teams(
-        get_fixtures_for_gameweek(gameweek, season=season, dbsession=dbsession)
+        get_fixtures_for_gameweek(gameweek=gameweek, season=season, dbsession=dbsession)
     )
     home_teams, away_teams = zip(*fixtures)
-    probabilities = team_model.predict_outcome_proba(home_teams, away_teams)
-
+    # obtain match probabilities
+    if isinstance(model, ExtendedDixonColesMatchPredictor):
+        probabilities = model.predict_outcome_proba(home_teams, away_teams)
+    elif isinstance(model, NeutralDixonColesMatchPredictor):
+        probabilities = model.predict_outcome_proba(
+            home_teams, away_teams, neutral_venue=np.zeros(len(home_teams))
+        )
+    else:
+        raise NotImplementedError(
+            "model must be either of type "
+            "'ExtendedDixonColesMatchPredictor' or "
+            "'NeutralDixonColesMatchPredictor'"
+        )
     return pd.DataFrame(
         {
             "home_team": home_teams,
@@ -138,9 +254,17 @@ def fixture_probabilities(
     )
 
 
-def get_goal_probabilities_for_fixtures(fixtures, team_model, max_goals=10):
-    """Get the probability that each team in a fixture scores any number of goals up
-    to max_goals."""
+def get_goal_probabilities_for_fixtures(
+    fixtures: List[Fixture],
+    team_model: Union[
+        ExtendedDixonColesMatchPredictor, NeutralDixonColesMatchPredictor
+    ],
+    max_goals: int = 10,
+) -> Dict[int, Dict[str, Dict[int, float]]]:
+    """
+    Get the probability that each team in a fixture scores any number of goals up
+    to max_goals.
+    """
     goals = np.arange(0, max_goals + 1)
     probs = {}
     for f in fixtures:

diff --git a/airsenal/framework/data_fetcher.py b/airsenal/framework/data_fetcher.py
@@ -2,6 +2,7 @@
 Classes to query the FPL API to retrieve current FPL data,
 and to query football-data.org to retrieve match and fixture data.
 """
+
 import getpass
 import json
 import time

diff --git a/airsenal/framework/env.py b/airsenal/framework/env.py
@@ -1,6 +1,7 @@
 """
 Database can be either an sqlite file or a postgress server
 """
+
 import os
 from pathlib import Path
 

diff --git a/airsenal/framework/mappings.py b/airsenal/framework/mappings.py
@@ -2,7 +2,6 @@
 map between different conventions used in different data sources.
 """
 
-
 positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}
 
 alternative_team_names = {

diff --git a/airsenal/framework/optimization_transfers.py b/airsenal/framework/optimization_transfers.py
@@ -2,6 +2,7 @@
 Functions for optimising transfers across multiple gameweeks, including the possibility
 of using chips.
 """
+
 import random
 from operator import itemgetter
 

diff --git a/airsenal/framework/optimization_utils.py b/airsenal/framework/optimization_utils.py
@@ -1,6 +1,7 @@
 """
 functions to optimize the transfers for N weeks ahead
 """
+
 import warnings
 from copy import deepcopy
 from datetime import datetime

diff --git a/airsenal/framework/player.py b/airsenal/framework/player.py
@@ -3,8 +3,8 @@
 """
 
 from airsenal.framework.schema import Player
+from airsenal.framework.season import CURRENT_SEASON
 from airsenal.framework.utils import (
-    CURRENT_SEASON,
     NEXT_GAMEWEEK,
     get_player,
     get_predicted_points_for_player,
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,7 +2,6 @@ @@
     map between different conventions used in different data sources.
     """
     positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}
     alternative_team_names = {
@@ Expand Down @@