Skip to content

Commit

Permalink
Add llmfao
Browse files Browse the repository at this point in the history
  • Loading branch information
dustalov committed Jul 6, 2024
1 parent 83e6d1b commit cbac611
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
21 changes: 18 additions & 3 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,26 @@ def food() -> tuple[list[str], list[str], list[evalica.Winner]]:
return xs.tolist(), ys.tolist(), ws.tolist()


@pytest.fixture()
def llmfao() -> tuple[list[str], list[str], list[evalica.Winner]]:
df_llmfao = pd.read_csv("https://github.com/dustalov/llmfao/raw/master/crowd-comparisons.csv", dtype=str)

xs = df_llmfao["left"]
ys = df_llmfao["right"]
ws = df_llmfao["winner"].map({
"left": evalica.Winner.X,
"right": evalica.Winner.Y,
"tie": evalica.Winner.Draw,
})

return xs.tolist(), ys.tolist(), ws.tolist()


class Example(NamedTuple):
"""A tuple holding example data."""

xs: list[int]
ys: npt.NDArray[np.int64]
xs: list[int] | npt.NDArray[np.int64]
ys: list[int] | npt.NDArray[np.int64]
ws: list[evalica.Winner]


Expand All @@ -60,6 +75,6 @@ def xs_ys_ws(draw: Callable[[SearchStrategy[Any]], Any]) -> Example:

return Example(
xs=draw(elements),
ys=np.array(draw(elements)),
ys=np.array(draw(elements), dtype=np.int64),
ws=draw(winners),
)
32 changes: 32 additions & 0 deletions python/evalica/test_evalica.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,35 @@ def test_elo_food(food: tuple[list[str], list[str], list[evalica.Winner]]) -> No

assert len(result.scores) == len(set(xs) | set(ys))
assert np.isfinite(result.scores).all()



def test_bradley_terry_llmfao(llmfao: tuple[list[str], list[str], list[evalica.Winner]]) -> None:
xs, ys, ws = llmfao

result = evalica.bradley_terry(xs, ys, ws)

assert len(result.scores) == len(set(xs) | set(ys))
assert np.isfinite(result.scores).all()
assert result.iterations > 0


def test_newman_llmfao(llmfao: tuple[list[str], list[str], list[evalica.Winner]]) -> None:
xs, ys, ws = llmfao

result = evalica.newman(xs, ys, ws)

assert len(result.scores) == len(set(xs) | set(ys))
assert np.isfinite(result.scores).all()
assert np.isfinite(result.v)
assert np.isfinite(result.v_init)
assert result.iterations > 0


def test_elo_llmfao(llmfao: tuple[list[str], list[str], list[evalica.Winner]]) -> None:
xs, ys, ws = llmfao

result = evalica.elo(xs, ys, ws)

assert len(result.scores) == len(set(xs) | set(ys))
assert np.isfinite(result.scores).all()

0 comments on commit cbac611

Please sign in to comment.