Skip to content

Commit

Permalink
Add function evaluator (#295)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Nov 16, 2023
1 parent 31ca38b commit 968d589
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 24 deletions.
2 changes: 1 addition & 1 deletion python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ lint:
poetry run black . --check

format:
poetry run ruff format
poetry run ruff format .
poetry run ruff --fix .
poetry run black .

Expand Down
75 changes: 74 additions & 1 deletion python/langsmith/evaluation/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import asyncio
import uuid
from abc import abstractmethod
from typing import Dict, List, Optional, TypedDict, Union
from typing import Callable, Dict, List, Optional, TypedDict, Union

try:
from pydantic.v1 import BaseModel, Field # type: ignore[import]
except ImportError:
from pydantic import BaseModel, Field

from functools import wraps

from langsmith.schemas import SCORE_TYPE, VALUE_TYPE, Example, Run


Expand Down Expand Up @@ -64,3 +66,74 @@ async def aevaluate_run(
return await asyncio.get_running_loop().run_in_executor(
None, self.evaluate_run, run, example
)


class DynamicRunEvaluator(RunEvaluator):
"""
A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.
This class is designed to be used with the `@run_evaluator` decorator, allowing
functions that take a `Run` and an optional `Example` as arguments, and return
an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.
Attributes:
func (Callable): The function that is wrapped by this evaluator.
""" # noqa: E501

def __init__(
self,
func: Callable[
[Run, Optional[Example]], Union[EvaluationResult, EvaluationResults]
],
):
"""
Initialize the DynamicRunEvaluator with a given function.
Args:
func (Callable): A function that takes a `Run` and an optional `Example` as
arguments, and returns an `EvaluationResult` or `EvaluationResults`.
"""
wraps(func)(self)
self.func = func

def evaluate_run(
self, run: Run, example: Optional[Example] = None
) -> Union[EvaluationResult, EvaluationResults]:
"""
Evaluate a run using the wrapped function.
This method directly invokes the wrapped function with the provided arguments.
Args:
run (Run): The run to be evaluated.
example (Optional[Example]): An optional example to be used in the evaluation.
Returns:
Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
""" # noqa: E501
return self.func(run, example)

def __call__(
self, run: Run, example: Optional[Example] = None
) -> Union[EvaluationResult, EvaluationResults]:
"""
Make the evaluator callable, allowing it to be used like a function.
This method enables the evaluator instance to be called directly, forwarding the
call to `evaluate_run`.
Args:
run (Run): The run to be evaluated.
example (Optional[Example]): An optional example to be used in the evaluation.
Returns:
Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
""" # noqa: E501
return self.evaluate_run(run, example)


def run_evaluator(
func: Callable[[Run, Optional[Example]], Union[EvaluationResult, EvaluationResults]]
):
"""Decorator to create a run evaluator from a function."""
return DynamicRunEvaluator(func)
6 changes: 5 additions & 1 deletion python/langsmith/run_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,11 @@ def _setup_run(
metadata_.update(metadata or {})
metadata_["ls_method"] = "traceable"
extra_inner["metadata"] = metadata_
inputs = _get_inputs(signature, *args, **kwargs)
try:
inputs = _get_inputs(signature, *args, **kwargs)
except TypeError as e:
logger.debug(f"Failed to infer inputs for {name_}: {e}")
inputs = {"args": args, "kwargs": kwargs}
outer_tags = _TAGS.get()
tags_ = (langsmith_extra.get("tags") or []) + (outer_tags or [])
_TAGS.set(tags_)
Expand Down
40 changes: 20 additions & 20 deletions python/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ requests = "^2"
pytest = "^7.3.1"
black = "^23.3.0"
mypy = "^1.3.0"
ruff = "^0.0.270"
ruff = "^0.1.5"
pydantic = ">=1,<2"
types-requests = "^2.31.0.1"
pandas-stubs = "^2.0.1.230501"
Expand Down

0 comments on commit 968d589

Please sign in to comment.