Skip to content

Commit

Permalink
The first usable API
Browse files Browse the repository at this point in the history
  • Loading branch information
dustalov committed Jul 6, 2024
1 parent 3f5e33a commit 3e95c03
Show file tree
Hide file tree
Showing 5 changed files with 359 additions and 109 deletions.
85 changes: 55 additions & 30 deletions Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
"import numpy.typing as npt\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"from evalica import Winner\n",
"from plotly.graph_objects import Figure"
]
},
Expand All @@ -34,33 +33,44 @@
"outputs": [],
"source": [
"df_food = pd.read_csv(\"food.csv\", dtype=str)\n",
"\n",
"df_food['winner'] = df_food[\"winner\"].map({\n",
" \"left\": evalica.Winner.X,\n",
" \"right\": evalica.Winner.Y,\n",
" \"tie\": evalica.Winner.Draw,\n",
"})\n",
"\n",
"df_food.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "024e6a61-0ddb-4235-aa7b-6fb5280b5d9e",
"id": "27ebe4b8-d159-4689-be31-ba5e25848e69",
"metadata": {},
"outputs": [],
"source": [
"index: dict[str, int] = {}\n",
"\n",
"for xy in zip(df_food[\"left\"], df_food[\"right\"], strict=False):\n",
" for e in xy:\n",
" index[e] = index.get(e, len(index))\n",
"\n",
"xs = [index[x] for x in df_food[\"left\"]]\n",
"ys = [index[y] for y in df_food[\"right\"]]\n",
"ws = df_food[\"winner\"].map({\n",
" \"left\": Winner.X,\n",
" \"right\": Winner.Y,\n",
" \"tie\": Winner.Draw,\n",
" }).tolist()\n",
"\n",
"wins, ties = evalica.matrices(xs, ys, ws)\n",
"\n",
"wins, ties"
"matrices = evalica.matrices(df_food['left'], df_food['right'], df_food['winner'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "42da86e0-c2c6-42c4-9c5e-84d56587fdea",
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(matrices.win_matrix, index=matrices.index, columns=matrices.index) # win matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5dc51f6-bca7-4153-8265-18c019e8a639",
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(matrices.tie_matrix, index=matrices.index, columns=matrices.index) # tie matrix"
]
},
{
Expand All @@ -70,40 +80,55 @@
"metadata": {},
"outputs": [],
"source": [
"scores, _iterations = evalica.bradley_terry(wins + ties / 2, tolerance=1e-4, limit=100)\n",
"scores"
"bt_result = evalica.bradley_terry(df_food['left'], df_food['right'], df_food['winner'])\n",
"bt_result.scores.sort_values(ascending=False).to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64bf8f2b-a08f-46b9-b4df-e5d58d8c8a50",
"metadata": {},
"outputs": [],
"source": [
"n_result = evalica.newman(df_food['left'], df_food['right'], df_food['winner'])\n",
"n_result.scores.sort_values(ascending=False).to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d331724-d4aa-4270-b0d7-d07ecab17ac7",
"id": "35cdcc91-d182-4d1c-842e-251052defcd4",
"metadata": {},
"outputs": [],
"source": [
"scores, _v, _iterations = evalica.newman(wins.astype(np.float64), ties / 2, .5, tolerance=1e-4, limit=100)\n",
"scores"
"eigen_result = evalica.eigen(df_food['left'], df_food['right'], df_food['winner'])\n",
"eigen_result.scores.sort_values(ascending=False).to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "62c6a626-0d64-4b8a-979b-5bf33dac67ba",
"id": "39c5d898-944c-472d-abc2-553c54503adf",
"metadata": {},
"outputs": [],
"source": [
"def to_pairwise(scores: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:\n",
" return scores[:, np.newaxis] / (scores + scores[:, np.newaxis])"
"elo_result = evalica.elo(df_food['left'], df_food['right'], df_food['winner'])\n",
"elo_result.scores.sort_values(ascending=False).to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1bff4e2-441d-48ad-93e4-e075b0ecb007",
"id": "66e1a1c5-b32d-4988-92b3-7549fbad3845",
"metadata": {},
"outputs": [],
"source": [
"to_pairwise(scores)"
"bt_pairwise = evalica.pairwise(bt_result.scores)\n",
"\n",
"df_bt_pairwise = pd.DataFrame(bt_pairwise, index=bt_result.scores.index, columns=bt_result.scores.index)\n",
"\n",
"df_bt_pairwise"
]
},
{
Expand All @@ -127,7 +152,7 @@
"metadata": {},
"outputs": [],
"source": [
"visualize(to_pairwise(scores))"
"visualize(df_bt_pairwise)"
]
},
{
Expand Down
223 changes: 214 additions & 9 deletions python/evalica/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from .evalica import Winner, __version__
from .evalica import py_bradley_terry as bradley_terry
from .evalica import py_counting as counting
from .evalica import py_eigen as eigen
from .evalica import py_elo as elo
from .evalica import py_matrices as matrices
from .evalica import py_newman as newman
from collections import OrderedDict
from collections.abc import Hashable, Iterable
from dataclasses import dataclass
from typing import TypeVar

import numpy as np
import numpy.typing as npt
import pandas as pd

from .evalica import Winner, __version__, py_bradley_terry, py_counting, py_eigen, py_elo, py_matrices, py_newman
from .naive import bradley_terry as bradley_terry_naive
from .naive import newman as newman_naive

Expand All @@ -15,5 +18,207 @@
Winner.Ignore,
]

__all__ = ["__version__", "Winner", "matrices", "counting", "bradley_terry", "newman", "elo", "eigen", "WINNERS",
"bradley_terry_naive", "newman_naive"]
T = TypeVar("T", bound=Hashable)

def index(xs: Iterable[T], *yss: Iterable[T]) -> dict[T, int]:
index: dict[T, int] = OrderedDict()

for ys in (xs, *yss):
for y in ys:
index[y] = index.get(y, len(index))

return index

def _index_elements(xs: Iterable[T], ys: Iterable[T]) -> tuple["pd.Index[T]", list[int], list[int]]:
xy_index = index(xs, ys)

xs_indexed = [xy_index[x] for x in xs]
ys_indexed = [xy_index[y] for y in ys]

return pd.Index(xy_index), xs_indexed, ys_indexed


@dataclass(frozen=True)
class MatricesResult:
win_matrix: npt.NDArray[np.int64]
tie_matrix: npt.NDArray[np.int64]
index: "pd.Index[Hashable]"

def matrices(
xs: Iterable[T],
ys: Iterable[T],
ws: Iterable[Winner],
) -> MatricesResult:
xy_index, _xs, _ys = _index_elements(xs, ys)

W, T = py_matrices(_xs, _ys, ws) # noqa: N806

return MatricesResult(
win_matrix=W,
tie_matrix=T,
index=xy_index,
)


@dataclass(frozen=True)
class CountingResult:
scores: "pd.Series[T]"
win_matrix: npt.NDArray[np.int64]


def counting(
xs: Iterable[T],
ys: Iterable[T],
ws: Iterable[Winner],
) -> CountingResult:
xy_index, _xs, _ys = _index_elements(xs, ys)

W, _ = py_matrices(_xs, _ys, ws) # noqa: N806

counts = py_counting(W)

return CountingResult(
scores=pd.Series(counts, index=xy_index, name=counting.__name__),
win_matrix=W,
)

@dataclass(frozen=True)
class BradleyTerryResult:
scores: "pd.Series[T]"
matrix: npt.NDArray[np.float64]
tie_weight: float
iterations: int

def bradley_terry(
xs: Iterable[T],
ys: Iterable[T],
ws: Iterable[Winner],
tie_weight: float = .5,
tolerance: float = 1e-4,
limit: int = 100,
) -> BradleyTerryResult:
assert np.isfinite(tie_weight), "tie_weight must be finite"

xy_index, _xs, _ys = _index_elements(xs, ys)

W, T = py_matrices(_xs, _ys, ws) # noqa: N806

M = W.astype(float) + tie_weight * T.astype(float) # noqa: N806

scores, iterations = py_bradley_terry(M, tolerance, limit)

return BradleyTerryResult(
scores=pd.Series(scores, index=xy_index, name=bradley_terry.__name__),
matrix=M,
tie_weight=tie_weight,
iterations=iterations,
)

@dataclass(frozen=True)
class NewmanResult:
scores: "pd.Series[T]"
win_matrix: npt.NDArray[np.float64]
tie_matrix: npt.NDArray[np.float64]
v: float
v_init: float
iterations: int

def newman(
xs: Iterable[T],
ys: Iterable[T],
ws: Iterable[Winner],
v_init: float = .5,
tolerance: float = 1e-4,
limit: int = 100,
) -> NewmanResult:
assert np.isfinite(v_init), "v_init must be finite"

xy_index, _xs, _ys = _index_elements(xs, ys)

W, T = py_matrices(_xs, _ys, ws) # noqa: N806
W_float, T_float = W.astype(float), T.astype(float) # noqa: N806

scores, v, iterations = py_newman(W_float, T_float, v_init, tolerance, limit)

return NewmanResult(
scores=pd.Series(scores, index=xy_index, name=newman.__name__),
win_matrix=W_float,
tie_matrix=T_float,
v=v,
v_init=v_init,
iterations=iterations,
)

@dataclass(frozen=True)
class EloResult:
scores: "pd.Series[T]"
r: float
k: int
s: float

def elo(
xs: Iterable[T],
ys: Iterable[T],
ws: Iterable[Winner],
r: float = 1500,
k: int = 30,
s: float = 400,
) -> EloResult:
xy_index, _xs, _ys = _index_elements(xs, ys)

scores = py_elo(_xs, _ys, ws, r, k, s)

return EloResult(
scores=pd.Series(scores, index=xy_index, name=elo.__name__),
r=r,
k=k,
s=s,
)

@dataclass(frozen=True)
class EigenResult:
scores: "pd.Series[T]"
matrix: npt.NDArray[np.float64]
tie_weight: float

def eigen(
xs: Iterable[T],
ys: Iterable[T],
ws: Iterable[Winner],
tie_weight: float = .5,
) -> EigenResult:
xy_index, _xs, _ys = _index_elements(xs, ys)

W, T = py_matrices(_xs, _ys, ws) # noqa: N806

M = W.astype(float) + tie_weight * T.astype(float) # noqa: N806

scores = py_eigen(M)

return EigenResult(
scores=pd.Series(scores, index=xy_index, name=eigen.__name__),
matrix=M,
tie_weight=tie_weight,
)

def pairwise(scores: "pd.Series[T] | npt.NDArray[np.float64]") -> npt.NDArray[np.float64]:
if isinstance(scores, pd.Series):
return pairwise(scores.sort_values(ascending=False).to_numpy())

return scores[:, np.newaxis] / (scores + scores[:, np.newaxis])

__all__ = [
"Winner",
"__version__",
"bradley_terry",
"counting",
"eigen",
"elo",
"py_matrices",
"newman",
"bradley_terry_naive",
"newman_naive",
"WINNERS",
"index",
"pairwise",
]
Loading

0 comments on commit 3e95c03

Please sign in to comment.