Merge pull request #5 from anguswilliams91/feature/score-concede-n

Add predict_score_n_proba and predict_concede_n_proba to BaseMatchPredictor
anguswilliams91 · Feb 4, 2022 · 760bb2e · 760bb2e
2 parents c5e9a8f + c526dab
commit 760bb2e
Show file tree

Hide file tree

Showing 4 changed files with 126 additions and 3 deletions.
diff --git a/bpl/base.py b/bpl/base.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 from abc import abstractmethod
-from typing import Dict, Iterable, Union
+from typing import Dict, Iterable, Optional, Union
 
 import jax.numpy as jnp
 import numpy as np
@@ -79,3 +79,79 @@ def predict_outcome_proba(
         prob_draw = probs[:, x == y].sum(axis=-1)
 
         return {"home_win": prob_home_win, "away_win": prob_away_win, "draw": prob_draw}
+
+    def predict_score_n_proba(
+        self,
+        n: Union[int, Iterable[int]],
+        team: Union[str, Iterable[str]],
+        opponent: Union[str, Iterable[str]],
+        home: Optional[bool] = True,
+    ) -> jnp.array:
+        """
+        Compute the probability that a team will score n goals.
+        Given a team and an opponent, calculate the probability that the team will
+        score n goals against this opponent.
+
+        Args:
+            n (Union[int, Iterable[int]]): number of goals scored.
+            team (Union[str, Iterable[str]]): name of the team scoring the goals.
+            opponent (Union[str, Iterable[str]]): name of the opponent.
+            home (Optional[bool]): whether team is at home.
+
+        Returns:
+            jnp.array: Probability that team scores n goals against opponent.
+        """
+        n = [n] if isinstance(n, int) else n
+
+        # flat lists of all possible scorelines with team scoring n goals
+        team_rep = np.repeat(team, (MAX_GOALS + 1) * len(n))
+        opponent_rep = np.repeat(opponent, (MAX_GOALS + 1) * len(n))
+        n_rep = np.resize(n, (MAX_GOALS + 1) * len(n))
+        x_rep = np.repeat(np.arange(MAX_GOALS + 1), len(n))
+
+        probs = (
+            self.predict_score_proba(team_rep, opponent_rep, n_rep, x_rep)
+            if home
+            else self.predict_score_proba(opponent_rep, team_rep, x_rep, n_rep)
+        ).reshape(MAX_GOALS + 1, len(n))
+
+        # sum probability of all scorelines where team scored n goals
+        return probs.sum(axis=0)
+
+    def predict_concede_n_proba(
+        self,
+        n: Union[int, Iterable[int]],
+        team: Union[str, Iterable[str]],
+        opponent: Union[str, Iterable[str]],
+        home: Optional[bool] = True,
+    ) -> jnp.array:
+        """
+        Compute the probability that a team will concede n goals.
+        Given a team and an opponent, calculate the probability that the team will
+        concede n goals against this opponent.
+
+        Args:
+            n (Union[int, Iterable[int]]): number of goals conceded.
+            team (Union[str, Iterable[str]]): name of the team conceding the goals.
+            opponent (Union[str, Iterable[str]]): name of the opponent.
+            home (Optional[bool]): whether team is at home.
+
+        Returns:
+            jnp.array: Probability that team concedes n goals against opponent.
+        """
+        n = [n] if isinstance(n, int) else n
+
+        # flat lists of all possible scorelines with team conceding n goals
+        team_rep = np.repeat(team, (MAX_GOALS + 1) * len(n))
+        opponent_rep = np.repeat(opponent, (MAX_GOALS + 1) * len(n))
+        n_rep = np.resize(n, (MAX_GOALS + 1) * len(n))
+        x_rep = np.repeat(np.arange(MAX_GOALS + 1), len(n))
+
+        probs = (
+            self.predict_score_proba(team_rep, opponent_rep, x_rep, n_rep)
+            if home
+            else self.predict_score_proba(opponent_rep, team_rep, n_rep, x_rep)
+        ).reshape(MAX_GOALS + 1, len(n))
+
+        # sum probability all scorelines where team conceded n goals
+        return probs.sum(axis=0)
diff --git a/bpl/dixon_coles.py b/bpl/dixon_coles.py
@@ -12,8 +12,8 @@
 from numpyro.infer import MCMC, NUTS
 from numpyro.infer.reparam import LocScaleReparam
 
-from bpl.base import BaseMatchPredictor
 from bpl._util import dixon_coles_correlation_term
+from bpl.base import BaseMatchPredictor
 
 __all__ = ["DixonColesMatchPredictor"]
 

diff --git a/bpl/extended_dixon_coles.py b/bpl/extended_dixon_coles.py
@@ -12,8 +12,8 @@
 from numpyro.infer import MCMC, NUTS
 from numpyro.infer.reparam import LocScaleReparam
 
-from bpl.base import BaseMatchPredictor
 from bpl._util import dixon_coles_correlation_term
+from bpl.base import BaseMatchPredictor
 
 __all__ = ["ExtendedDixonColesMatchPredictor"]
 

diff --git a/tests/test_all_models.py b/tests/test_all_models.py
@@ -3,6 +3,7 @@
 import pytest
 
 from bpl import DixonColesMatchPredictor, ExtendedDixonColesMatchPredictor
+from bpl.base import MAX_GOALS
 
 MODELS = [DixonColesMatchPredictor, ExtendedDixonColesMatchPredictor]
 
@@ -42,3 +43,49 @@ def test_predict_outcome_proba(dummy_data, model_cls):
         "draw"
     ] == pytest.approx(1.0, abs=1e-5)
 
+
+@pytest.mark.parametrize("model_cls", MODELS)
+def test_predict_score_n_proba(dummy_data, model_cls):
+    model = model_cls().fit(dummy_data, num_samples=100, num_warmup=100)
+
+    n = jnp.arange(MAX_GOALS + 1)
+    proba_home = model.predict_score_n_proba(n, "0", "1")
+    assert len(proba_home) == len(n)
+    assert jnp.all((proba_home >= 0) & (proba_home <= 1))
+    assert sum(proba_home) == pytest.approx(1.0, abs=1e-5)
+
+    proba_away = model.predict_score_n_proba(n, "0", "1", home=False)
+    assert len(proba_home) == len(n)
+    assert jnp.all((proba_away >= 0) & (proba_away <= 1))
+    assert sum(proba_away) == pytest.approx(1.0, abs=1e-5)
+
+    assert sum(proba_home * n) > sum(proba_away * n)  # score more at home
+
+    proba_single = model.predict_score_n_proba(1, "0", "1")
+    assert len(proba_single) == 1
+    assert (proba_single[0] >= 0) and (proba_single[0] <= 1)
+
+
+@pytest.mark.parametrize("model_cls", MODELS)
+def test_predict_concede_n_proba(dummy_data, model_cls):
+    model = model_cls().fit(dummy_data, num_samples=100, num_warmup=100)
+
+    n = jnp.arange(MAX_GOALS + 1)
+    proba_home = model.predict_concede_n_proba(n, "0", "1")
+    assert len(proba_home) == len(n)
+    assert jnp.all((proba_home >= 0) & (proba_home <= 1))
+    assert sum(proba_home) == pytest.approx(1.0, abs=1e-5)
+
+    proba_away = model.predict_concede_n_proba(n, "0", "1", home=False)
+    assert len(proba_home) == len(n)
+    assert jnp.all((proba_away >= 0) & (proba_away <= 1))
+    assert sum(proba_away) == pytest.approx(1.0, abs=1e-5)
+
+    assert sum(proba_home * n) < sum(proba_away * n)  # concede more away
+
+    proba_team_concede = model.predict_concede_n_proba(1, "0", "1")
+    assert len(proba_team_concede) == 1
+    assert (proba_team_concede[0] >= 0) and (proba_team_concede[0] <= 1)
+
+    proba_opponent_score = model.predict_score_n_proba(1, "1", "0", home=False)
+    assert proba_team_concede == pytest.approx(proba_opponent_score, abs=1e-5)