From 968d589e85a213e5aee16e0c3d77a15139be59f8 Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Thu, 16 Nov 2023 15:27:18 -0800 Subject: [PATCH] Add function evaluator (#295) --- python/Makefile | 2 +- python/langsmith/evaluation/evaluator.py | 75 +++++++++++++++++++++++- python/langsmith/run_helpers.py | 6 +- python/poetry.lock | 40 ++++++------- python/pyproject.toml | 2 +- 5 files changed, 101 insertions(+), 24 deletions(-) diff --git a/python/Makefile b/python/Makefile index 9233c2cb6..aca72eea1 100644 --- a/python/Makefile +++ b/python/Makefile @@ -12,7 +12,7 @@ lint: poetry run black . --check format: - poetry run ruff format + poetry run ruff format . poetry run ruff --fix . poetry run black . diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py index b2b697f3a..3e1455bf9 100644 --- a/python/langsmith/evaluation/evaluator.py +++ b/python/langsmith/evaluation/evaluator.py @@ -1,13 +1,15 @@ import asyncio import uuid from abc import abstractmethod -from typing import Dict, List, Optional, TypedDict, Union +from typing import Callable, Dict, List, Optional, TypedDict, Union try: from pydantic.v1 import BaseModel, Field # type: ignore[import] except ImportError: from pydantic import BaseModel, Field +from functools import wraps + from langsmith.schemas import SCORE_TYPE, VALUE_TYPE, Example, Run @@ -64,3 +66,74 @@ async def aevaluate_run( return await asyncio.get_running_loop().run_in_executor( None, self.evaluate_run, run, example ) + + +class DynamicRunEvaluator(RunEvaluator): + """ + A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`. + + This class is designed to be used with the `@run_evaluator` decorator, allowing + functions that take a `Run` and an optional `Example` as arguments, and return + an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`. + + Attributes: + func (Callable): The function that is wrapped by this evaluator. + """ # noqa: E501 + + def __init__( + self, + func: Callable[ + [Run, Optional[Example]], Union[EvaluationResult, EvaluationResults] + ], + ): + """ + Initialize the DynamicRunEvaluator with a given function. + + Args: + func (Callable): A function that takes a `Run` and an optional `Example` as + arguments, and returns an `EvaluationResult` or `EvaluationResults`. + """ + wraps(func)(self) + self.func = func + + def evaluate_run( + self, run: Run, example: Optional[Example] = None + ) -> Union[EvaluationResult, EvaluationResults]: + """ + Evaluate a run using the wrapped function. + + This method directly invokes the wrapped function with the provided arguments. + + Args: + run (Run): The run to be evaluated. + example (Optional[Example]): An optional example to be used in the evaluation. + + Returns: + Union[EvaluationResult, EvaluationResults]: The result of the evaluation. + """ # noqa: E501 + return self.func(run, example) + + def __call__( + self, run: Run, example: Optional[Example] = None + ) -> Union[EvaluationResult, EvaluationResults]: + """ + Make the evaluator callable, allowing it to be used like a function. + + This method enables the evaluator instance to be called directly, forwarding the + call to `evaluate_run`. + + Args: + run (Run): The run to be evaluated. + example (Optional[Example]): An optional example to be used in the evaluation. + + Returns: + Union[EvaluationResult, EvaluationResults]: The result of the evaluation. + """ # noqa: E501 + return self.evaluate_run(run, example) + + +def run_evaluator( + func: Callable[[Run, Optional[Example]], Union[EvaluationResult, EvaluationResults]] +): + """Decorator to create a run evaluator from a function.""" + return DynamicRunEvaluator(func) diff --git a/python/langsmith/run_helpers.py b/python/langsmith/run_helpers.py index b609b5f60..abbba0f7b 100644 --- a/python/langsmith/run_helpers.py +++ b/python/langsmith/run_helpers.py @@ -158,7 +158,11 @@ def _setup_run( metadata_.update(metadata or {}) metadata_["ls_method"] = "traceable" extra_inner["metadata"] = metadata_ - inputs = _get_inputs(signature, *args, **kwargs) + try: + inputs = _get_inputs(signature, *args, **kwargs) + except TypeError as e: + logger.debug(f"Failed to infer inputs for {name_}: {e}") + inputs = {"args": args, "kwargs": kwargs} outer_tags = _TAGS.get() tags_ = (langsmith_extra.get("tags") or []) + (outer_tags or []) _TAGS.set(tags_) diff --git a/python/poetry.lock b/python/poetry.lock index 5da0c09ae..c000732f4 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1103,28 +1103,28 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "ruff" -version = "0.0.270" -description = "An extremely fast Python linter, written in Rust." +version = "0.1.5" +description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.0.270-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:f74c4d550f7b8e808455ac77bbce38daafc458434815ba0bc21ae4bdb276509b"}, - {file = "ruff-0.0.270-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:643de865fd35cb76c4f0739aea5afe7b8e4d40d623df7e9e6ea99054e5cead0a"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eca02e709b3308eb7255b5f74e779be23b5980fca3862eae28bb23069cd61ae4"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3ed3b198768d2b3a2300fb18f730cd39948a5cc36ba29ae9d4639a11040880be"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:739495d2dbde87cf4e3110c8d27bc20febf93112539a968a4e02c26f0deccd1d"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:08188f8351f4c0b6216e8463df0a76eb57894ca59a3da65e4ed205db980fd3ae"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0827b074635d37984fc98d99316bfab5c8b1231bb83e60dacc83bd92883eedb4"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d61ae4841313f6eeb8292dc349bef27b4ce426e62c36e80ceedc3824e408734"}, - {file = "ruff-0.0.270-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0eb412f20e77529a01fb94d578b19dcb8331b56f93632aa0cce4a2ea27b7aeba"}, - {file = "ruff-0.0.270-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b775e2c5fc869359daf8c8b8aa0fd67240201ab2e8d536d14a0edf279af18786"}, - {file = "ruff-0.0.270-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:21f00e47ab2308617c44435c8dfd9e2e03897461c9e647ec942deb2a235b4cfd"}, - {file = "ruff-0.0.270-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0bbfbf6fd2436165566ca85f6e57be03ed2f0a994faf40180cfbb3604c9232ef"}, - {file = "ruff-0.0.270-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8af391ef81f7be960be10886a3c1aac0b298bde7cb9a86ec2b05faeb2081ce6b"}, - {file = "ruff-0.0.270-py3-none-win32.whl", hash = "sha256:b4c037fe2f75bcd9aed0c89c7c507cb7fa59abae2bd4c8b6fc331a28178655a4"}, - {file = "ruff-0.0.270-py3-none-win_amd64.whl", hash = "sha256:0012f9b7dc137ab7f1f0355e3c4ca49b562baf6c9fa1180948deeb6648c52957"}, - {file = "ruff-0.0.270-py3-none-win_arm64.whl", hash = "sha256:9613456b0b375766244c25045e353bc8890c856431cd97893c97b10cc93bd28d"}, - {file = "ruff-0.0.270.tar.gz", hash = "sha256:95db07b7850b30ebf32b27fe98bc39e0ab99db3985edbbf0754d399eb2f0e690"}, + {file = "ruff-0.1.5-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:32d47fc69261c21a4c48916f16ca272bf2f273eb635d91c65d5cd548bf1f3d96"}, + {file = "ruff-0.1.5-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:171276c1df6c07fa0597fb946139ced1c2978f4f0b8254f201281729981f3c17"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ef33cd0bb7316ca65649fc748acc1406dfa4da96a3d0cde6d52f2e866c7b39"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b2c205827b3f8c13b4a432e9585750b93fd907986fe1aec62b2a02cf4401eee6"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb408e3a2ad8f6881d0f2e7ad70cddb3ed9f200eb3517a91a245bbe27101d379"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f20dc5e5905ddb407060ca27267c7174f532375c08076d1a953cf7bb016f5a24"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aafb9d2b671ed934998e881e2c0f5845a4295e84e719359c71c39a5363cccc91"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4894dddb476597a0ba4473d72a23151b8b3b0b5f958f2cf4d3f1c572cdb7af7"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a00a7ec893f665ed60008c70fe9eeb58d210e6b4d83ec6654a9904871f982a2a"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a8c11206b47f283cbda399a654fd0178d7a389e631f19f51da15cbe631480c5b"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fa29e67b3284b9a79b1a85ee66e293a94ac6b7bb068b307a8a373c3d343aa8ec"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9b97fd6da44d6cceb188147b68db69a5741fbc736465b5cea3928fdac0bc1aeb"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:721f4b9d3b4161df8dc9f09aa8562e39d14e55a4dbaa451a8e55bdc9590e20f4"}, + {file = "ruff-0.1.5-py3-none-win32.whl", hash = "sha256:f80c73bba6bc69e4fdc73b3991db0b546ce641bdcd5b07210b8ad6f64c79f1ab"}, + {file = "ruff-0.1.5-py3-none-win_amd64.whl", hash = "sha256:c21fe20ee7d76206d290a76271c1af7a5096bc4c73ab9383ed2ad35f852a0087"}, + {file = "ruff-0.1.5-py3-none-win_arm64.whl", hash = "sha256:82bfcb9927e88c1ed50f49ac6c9728dab3ea451212693fe40d08d314663e412f"}, + {file = "ruff-0.1.5.tar.gz", hash = "sha256:5cbec0ef2ae1748fb194f420fb03fb2c25c3258c86129af7172ff8f198f125ab"}, ] [[package]] @@ -1441,4 +1441,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "0d7872739d26b459fdccf999d62527a8e747bde3badfc3b449ba10c87aeab060" +content-hash = "b1cab1759372f15c1913c2765f9029ac6c924193dec81e8df2792b4ae01aa0b6" diff --git a/python/pyproject.toml b/python/pyproject.toml index 404f7f1eb..507d5db49 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -34,7 +34,7 @@ requests = "^2" pytest = "^7.3.1" black = "^23.3.0" mypy = "^1.3.0" -ruff = "^0.0.270" +ruff = "^0.1.5" pydantic = ">=1,<2" types-requests = "^2.31.0.1" pandas-stubs = "^2.0.1.230501"