diff --git a/js/package.json b/js/package.json index 4d4272981..5b98b0ee3 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "langsmith", - "version": "0.1.16", + "version": "0.1.17", "description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.", "packageManager": "yarn@1.22.19", "files": [ @@ -173,4 +173,4 @@ }, "./package.json": "./package.json" } -} \ No newline at end of file +} diff --git a/js/src/evaluation/evaluator.ts b/js/src/evaluation/evaluator.ts index f258da389..f105c1e1e 100644 --- a/js/src/evaluation/evaluator.ts +++ b/js/src/evaluation/evaluator.ts @@ -1,5 +1,48 @@ import { Example, Run, ScoreType, ValueType } from "../schemas.js"; +/** + * Represents a categorical class. + */ +export type Category = { + /** + * The value of the category. + */ + value?: number; + /** + * The label of the category. + */ + label: string; +}; + +/** + * Configuration for feedback. + */ +export type FeedbackConfig = { + /** + * The type of feedback. + * - "continuous": Feedback with a continuous numeric. + * - "categorical": Feedback with a categorical value (classes) + * - "freeform": Feedback with a freeform text value (notes). + */ + type: "continuous" | "categorical" | "freeform"; + + /** + * The minimum value for continuous feedback. + */ + min?: number; + + /** + * The maximum value for continuous feedback. + */ + max?: number; + + /** + * The categories for categorical feedback. + * Each category can be a string or an object with additional properties. + */ + categories?: (Category | Record)[]; +}; + /** * Represents the result of an evaluation. */ @@ -39,6 +82,13 @@ export type EvaluationResult = { * the root of the trace. */ targetRunId?: string; + + /** + * The feedback config associated with the evaluation result. + * If set, this will be used to define how a feedback key + * should be interpreted. + */ + feedbackConfig?: FeedbackConfig; }; export interface RunEvaluator { diff --git a/js/src/index.ts b/js/src/index.ts index 4a61115ee..3bb7c706d 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -11,4 +11,4 @@ export type { export { RunTree, type RunTreeConfig } from "./run_trees.js"; // Update using yarn bump-version -export const __version__ = "0.1.16"; +export const __version__ = "0.1.17"; diff --git a/python/langsmith/client.py b/python/langsmith/client.py index b3072641e..f0b3dd20d 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3304,6 +3304,9 @@ def _log_evaluation_feedback( correction=res.correction, source_info=source_info_, source_run_id=res.source_run_id, + feedback_config=cast( + Optional[ls_schemas.FeedbackConfig], res.feedback_config + ), feedback_source_type=ls_schemas.FeedbackSourceType.MODEL, project_id=project_id, ) diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py index 361274928..f9f13cb2b 100644 --- a/python/langsmith/evaluation/evaluator.py +++ b/python/langsmith/evaluation/evaluator.py @@ -3,7 +3,9 @@ import asyncio import uuid from abc import abstractmethod -from typing import Any, Callable, Dict, List, Optional, TypedDict, Union, cast +from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast + +from typing_extensions import TypedDict try: from pydantic.v1 import BaseModel, Field, ValidationError # type: ignore[import] @@ -15,6 +17,30 @@ from langsmith.schemas import SCORE_TYPE, VALUE_TYPE, Example, Run +class Category(TypedDict): + """A category for categorical feedback.""" + + value: Optional[Union[float, int]] + """The numeric score/ordinal corresponding to this category.""" + label: str + """The label for this category.""" + + +class FeedbackConfig(TypedDict, total=False): + """Configuration to define a type of feedback. + + Applied on on the first creation of a feedback_key. + """ + + type: Literal["continuous", "categorical", "freeform"] + """The type of feedback.""" + min: Optional[Union[float, int]] + """The minimum permitted value (if continuous type).""" + max: Optional[Union[float, int]] + """The maximum value permitted value (if continuous type).""" + categories: Optional[List[Union[Category, dict]]] + + class EvaluationResult(BaseModel): """Evaluation result.""" @@ -30,6 +56,8 @@ class EvaluationResult(BaseModel): """What the correct value should be, if applicable.""" evaluator_info: Dict = Field(default_factory=dict) """Additional information about the evaluator.""" + feedback_config: Optional[Union[FeedbackConfig, dict]] = None + """The configuration used to generate this feedback.""" source_run_id: Optional[Union[uuid.UUID, str]] = None """The ID of the trace of the evaluator itself.""" target_run_id: Optional[Union[uuid.UUID, str]] = None diff --git a/python/pyproject.toml b/python/pyproject.toml index 9ebcfd02c..4175beaf5 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langsmith" -version = "0.1.42" +version = "0.1.44" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." authors = ["LangChain "] license = "MIT"