From c7f4f415a46f258f4bf16eb6b94a4589e609a82c Mon Sep 17 00:00:00 2001 From: jack89roberts Date: Sat, 17 Jul 2021 23:04:39 +0100 Subject: [PATCH 1/4] add predict_score_n_proba and predict_concede_n_proba --- bpl/base.py | 60 +++++++++++++++++++++++++++++++++++++++- tests/test_all_models.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/bpl/base.py b/bpl/base.py index a405667..0a028c1 100644 --- a/bpl/base.py +++ b/bpl/base.py @@ -2,7 +2,7 @@ from __future__ import annotations from abc import abstractmethod -from typing import Dict, Iterable, Union +from typing import Dict, Iterable, Optional, Union import jax.numpy as jnp import numpy as np @@ -79,3 +79,61 @@ def predict_outcome_proba( prob_draw = probs[:, x == y].sum(axis=-1) return {"home_win": prob_home_win, "away_win": prob_away_win, "draw": prob_draw} + + def predict_score_n_proba( + self, + n: int, + team: Union[str, Iterable[str]], + opponent: Union[str, Iterable[str]], + home: Optional[bool] = True, + ) -> float: + """ + Compute the probability that a team will score n goals. + Given a team and an opponent, calculate the probability that the team will + score n goals against this opponent. + + Args: + n (int): number of goals scored. + team (Union[str, Iterable[str]]): name of the team scoring the goals. + opponent (Union[str, Iterable[str]]): name of the opponent. + home (Optional[bool]): whether team is at home. + + Returns: + float: Probability that team scores n goals against opponent. + """ + score_fn = ( + (lambda x: self.predict_score_proba(team, opponent, n, x)) + if home + else (lambda x: self.predict_score_proba(opponent, team, x, n)) + ) + # sum probability all scorelines where team scored n goals + return sum(score_fn(np.arange(MAX_GOALS + 1))) + + def predict_concede_n_proba( + self, + n: int, + team: Union[str, Iterable[str]], + opponent: Union[str, Iterable[str]], + home: Optional[bool] = True, + ) -> float: + """ + Compute the probability that a team will concede n goals. + Given a team and an opponent, calculate the probability that the team will + concede n goals against this opponent. + + Args: + n (int): number of goals conceded. + team (Union[str, Iterable[str]]): name of the team conceding the goals. + opponent (Union[str, Iterable[str]]): name of the opponent. + home (Optional[bool]): whether team is at home. + + Returns: + float: Probability that team concedes n goals against opponent. + """ + score_fn = ( + (lambda x: self.predict_score_proba(team, opponent, x, n)) + if home + else (lambda x: self.predict_score_proba(opponent, team, n, x)) + ) + # sum probability all scorelines where team conceded n goals + return sum(score_fn(np.arange(MAX_GOALS + 1))) diff --git a/tests/test_all_models.py b/tests/test_all_models.py index 0cf686b..ff076f7 100644 --- a/tests/test_all_models.py +++ b/tests/test_all_models.py @@ -3,6 +3,7 @@ import pytest from bpl import DixonColesMatchPredictor, ExtendedDixonColesMatchPredictor +from bpl.base import MAX_GOALS MODELS = [DixonColesMatchPredictor, ExtendedDixonColesMatchPredictor] @@ -42,3 +43,48 @@ def test_predict_outcome_proba(dummy_data, model_cls): "draw" ] == pytest.approx(1.0, abs=1e-5) + +@pytest.mark.parametrize("model_cls", MODELS) +def test_predict_score_n_proba(dummy_data, model_cls): + model = model_cls().fit(dummy_data, num_samples=100, num_warmup=100) + + proba_home = jnp.array( + [ + model.predict_score_n_proba(n, "0", "1") + for n in range(MAX_GOALS + 1) + ] + ) + assert jnp.all((proba_home >= 0) & (proba_home <= 1)) + assert sum(proba_home) == pytest.approx(1.0, abs=1e-5) + + proba_away = jnp.array( + [ + model.predict_score_n_proba(n, "0", "1", home=False) + for n in range(MAX_GOALS + 1) + ] + ) + assert jnp.all((proba_away >= 0) & (proba_away <= 1)) + assert sum(proba_away) == pytest.approx(1.0, abs=1e-5) + + +@pytest.mark.parametrize("model_cls", MODELS) +def test_predict_concede_n_proba(dummy_data, model_cls): + model = model_cls().fit(dummy_data, num_samples=100, num_warmup=100) + + proba_home = jnp.array( + [ + model.predict_concede_n_proba(n, "0", "1") + for n in range(MAX_GOALS + 1) + ] + ) + assert jnp.all((proba_home >= 0) & (proba_home <= 1)) + assert sum(proba_home) == pytest.approx(1.0, abs=1e-5) + + proba_away = jnp.array( + [ + model.predict_concede_n_proba(n, "0", "1", home=False) + for n in range(MAX_GOALS + 1) + ] + ) + assert jnp.all((proba_away >= 0) & (proba_away <= 1)) + assert sum(proba_away) == pytest.approx(1.0, abs=1e-5) From 41ec8aeffb64179949d924a3b8db7bda70357be5 Mon Sep 17 00:00:00 2001 From: jack89roberts Date: Sun, 18 Jul 2021 13:06:29 +0100 Subject: [PATCH 2/4] score/concede n probability can now compute for multiple values of n at once --- bpl/base.py | 57 ++++++++++++++++++++++++++-------------- tests/test_all_models.py | 49 +++++++++++++++++----------------- 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/bpl/base.py b/bpl/base.py index 0a028c1..f19b03f 100644 --- a/bpl/base.py +++ b/bpl/base.py @@ -82,58 +82,77 @@ def predict_outcome_proba( def predict_score_n_proba( self, - n: int, + n: Union[int, Iterable[int]], team: Union[str, Iterable[str]], opponent: Union[str, Iterable[str]], home: Optional[bool] = True, - ) -> float: + ) -> jnp.array: """ Compute the probability that a team will score n goals. Given a team and an opponent, calculate the probability that the team will score n goals against this opponent. Args: - n (int): number of goals scored. + n (Union[int, Iterable[int]]): number of goals scored. team (Union[str, Iterable[str]]): name of the team scoring the goals. opponent (Union[str, Iterable[str]]): name of the opponent. home (Optional[bool]): whether team is at home. Returns: - float: Probability that team scores n goals against opponent. + jnp.array: Probability that team scores n goals against opponent. """ - score_fn = ( - (lambda x: self.predict_score_proba(team, opponent, n, x)) + n = [n] if isinstance(n, int) else n + + # flat lists of all possible scorelines with team scoring n goals + team_rep = np.repeat(team, (MAX_GOALS + 1) * len(n)) + opponent_rep = np.repeat(opponent, (MAX_GOALS + 1) * len(n)) + n_rep = np.resize(n, (MAX_GOALS + 1) * len(n)) + x_rep = np.repeat(np.arange(MAX_GOALS + 1), len(n)) + + probs = ( + self.predict_score_proba(team_rep, opponent_rep, n_rep, x_rep) if home - else (lambda x: self.predict_score_proba(opponent, team, x, n)) - ) - # sum probability all scorelines where team scored n goals - return sum(score_fn(np.arange(MAX_GOALS + 1))) + else self.predict_score_proba(opponent_rep, team_rep, x_rep, n_rep) + ).reshape(MAX_GOALS + 1, len(n)) + + # sum probability of all scorelines where team scored n goals + return probs.sum(axis=0) def predict_concede_n_proba( self, - n: int, + n: Union[int, Iterable[int]], team: Union[str, Iterable[str]], opponent: Union[str, Iterable[str]], home: Optional[bool] = True, - ) -> float: + ) -> jnp.array: """ Compute the probability that a team will concede n goals. Given a team and an opponent, calculate the probability that the team will concede n goals against this opponent. Args: - n (int): number of goals conceded. + n (Union[int, Iterable[int]]): number of goals conceded. team (Union[str, Iterable[str]]): name of the team conceding the goals. opponent (Union[str, Iterable[str]]): name of the opponent. home (Optional[bool]): whether team is at home. Returns: - float: Probability that team concedes n goals against opponent. + jnp.array: Probability that team concedes n goals against opponent. """ - score_fn = ( - (lambda x: self.predict_score_proba(team, opponent, x, n)) + n = [n] if isinstance(n, int) else n + + # flat lists of all possible scorelines with team conceding n goals + team_rep = np.repeat(team, (MAX_GOALS + 1) * len(n)) + opponent_rep = np.repeat(opponent, (MAX_GOALS + 1) * len(n)) + n_rep = np.resize(n, (MAX_GOALS + 1) * len(n)) + x_rep = np.repeat(np.arange(MAX_GOALS + 1), len(n)) + + probs = ( + self.predict_score_proba(team_rep, opponent_rep, x_rep, n_rep) if home - else (lambda x: self.predict_score_proba(opponent, team, n, x)) - ) + else self.predict_score_proba(opponent_rep, team_rep, n_rep, x_rep) + ).reshape(MAX_GOALS + 1, len(n)) + # sum probability all scorelines where team conceded n goals - return sum(score_fn(np.arange(MAX_GOALS + 1))) + return probs.sum(axis=0) + diff --git a/tests/test_all_models.py b/tests/test_all_models.py index ff076f7..32ce256 100644 --- a/tests/test_all_models.py +++ b/tests/test_all_models.py @@ -48,43 +48,44 @@ def test_predict_outcome_proba(dummy_data, model_cls): def test_predict_score_n_proba(dummy_data, model_cls): model = model_cls().fit(dummy_data, num_samples=100, num_warmup=100) - proba_home = jnp.array( - [ - model.predict_score_n_proba(n, "0", "1") - for n in range(MAX_GOALS + 1) - ] - ) + n = jnp.arange(MAX_GOALS + 1) + proba_home = model.predict_score_n_proba(n, "0", "1") + assert len(proba_home) == len(n) assert jnp.all((proba_home >= 0) & (proba_home <= 1)) assert sum(proba_home) == pytest.approx(1.0, abs=1e-5) - proba_away = jnp.array( - [ - model.predict_score_n_proba(n, "0", "1", home=False) - for n in range(MAX_GOALS + 1) - ] - ) + proba_away = model.predict_score_n_proba(n, "0", "1", home=False) + assert len(proba_home) == len(n) assert jnp.all((proba_away >= 0) & (proba_away <= 1)) assert sum(proba_away) == pytest.approx(1.0, abs=1e-5) + + assert sum(proba_home * n) > sum(proba_away * n) # score more at home + + proba_single = model.predict_score_n_proba(1, "0", "1") + assert len(proba_single) == 1 + assert (proba_single[0] >= 0) and (proba_single[0] <= 1) @pytest.mark.parametrize("model_cls", MODELS) def test_predict_concede_n_proba(dummy_data, model_cls): model = model_cls().fit(dummy_data, num_samples=100, num_warmup=100) - proba_home = jnp.array( - [ - model.predict_concede_n_proba(n, "0", "1") - for n in range(MAX_GOALS + 1) - ] - ) + n = jnp.arange(MAX_GOALS + 1) + proba_home = model.predict_concede_n_proba(n, "0", "1") + assert len(proba_home) == len(n) assert jnp.all((proba_home >= 0) & (proba_home <= 1)) assert sum(proba_home) == pytest.approx(1.0, abs=1e-5) - proba_away = jnp.array( - [ - model.predict_concede_n_proba(n, "0", "1", home=False) - for n in range(MAX_GOALS + 1) - ] - ) + proba_away = model.predict_concede_n_proba(n, "0", "1", home=False) + assert len(proba_home) == len(n) assert jnp.all((proba_away >= 0) & (proba_away <= 1)) assert sum(proba_away) == pytest.approx(1.0, abs=1e-5) + + assert sum(proba_home * n) < sum(proba_away * n) # concede more away + + proba_team_concede = model.predict_concede_n_proba(1, "0", "1") + assert len(proba_team_concede) == 1 + assert (proba_team_concede[0] >= 0) and (proba_team_concede[0] <= 1) + + proba_opponent_score = model.predict_score_n_proba(1, "1", "0", home=False) + assert proba_team_concede == pytest.approx(proba_opponent_score, abs=1e-5) From 44513e93fd123dfcf125b2c1162b4fa43d53a06b Mon Sep 17 00:00:00 2001 From: jack89roberts Date: Fri, 6 Aug 2021 21:06:40 +0100 Subject: [PATCH 3/4] black --- bpl/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bpl/base.py b/bpl/base.py index f19b03f..515b7b6 100644 --- a/bpl/base.py +++ b/bpl/base.py @@ -155,4 +155,3 @@ def predict_concede_n_proba( # sum probability all scorelines where team conceded n goals return probs.sum(axis=0) - From c526dab1ee3bed8001249a9bd8030d8edac2b738 Mon Sep 17 00:00:00 2001 From: jack89roberts Date: Fri, 6 Aug 2021 21:08:50 +0100 Subject: [PATCH 4/4] isort --- bpl/dixon_coles.py | 2 +- bpl/extended_dixon_coles.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bpl/dixon_coles.py b/bpl/dixon_coles.py index e42718c..64d7c9e 100644 --- a/bpl/dixon_coles.py +++ b/bpl/dixon_coles.py @@ -12,8 +12,8 @@ from numpyro.infer import MCMC, NUTS from numpyro.infer.reparam import LocScaleReparam -from bpl.base import BaseMatchPredictor from bpl._util import dixon_coles_correlation_term +from bpl.base import BaseMatchPredictor __all__ = ["DixonColesMatchPredictor"] diff --git a/bpl/extended_dixon_coles.py b/bpl/extended_dixon_coles.py index c9ce025..e0b5cd7 100644 --- a/bpl/extended_dixon_coles.py +++ b/bpl/extended_dixon_coles.py @@ -12,8 +12,8 @@ from numpyro.infer import MCMC, NUTS from numpyro.infer.reparam import LocScaleReparam -from bpl.base import BaseMatchPredictor from bpl._util import dixon_coles_correlation_term +from bpl.base import BaseMatchPredictor __all__ = ["ExtendedDixonColesMatchPredictor"]