From 5a22fc231d581bde2ffa1b618350b175facea247 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Thu, 19 Dec 2024 13:21:46 -0800 Subject: [PATCH] updated ref --- python/langsmith/client.py | 2558 ++++++++++++++++++++---------------- 1 file changed, 1430 insertions(+), 1128 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 03a8f5e37..3c9b15df0 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -172,15 +172,11 @@ def _parse_token_or_url( def _is_langchain_hosted(url: str) -> bool: """Check if the URL is langchain hosted. - Parameters - ---------- - url : str - The URL to check. + Args: + url (str): The URL to check. Returns: - ------- - bool - True if the URL is langchain hosted, False otherwise. + bool: True if the URL is langchain hosted, False otherwise. """ try: netloc = urllib_parse.urlsplit(url).netloc.split(":")[0] @@ -201,9 +197,7 @@ def _default_retry_config() -> Retry: If urllib3 version is 1.26 or greater, retry on all methods. Returns: - ------- - Retry - The default retry configuration. + Retry: The default retry configuration. """ retry_params = dict( total=3, @@ -231,10 +225,8 @@ def _default_retry_config() -> Retry: def close_session(session: requests.Session) -> None: """Close the session. - Parameters - ---------- - session : Session - The session to close. + Args: + session (requests.Session): The session to close. """ logger.debug("Closing Client.session") session.close() @@ -243,17 +235,15 @@ def close_session(session: requests.Session) -> None: def _validate_api_key_if_hosted(api_url: str, api_key: Optional[str]) -> None: """Verify API key is provided if url not localhost. - Parameters - ---------- - api_url : str - The API URL. - api_key : str or None - The API key. + Args: + api_url (str): The API URL. + api_key (Optional[str]): The API key. + + Returns: + None Raises: - ------ - LangSmithUserError - If the API key is not provided when using the hosted service. + LangSmithUserError: If the API key is not provided when using the hosted service. """ # If the domain is langchain.com, raise error if no api_key if not api_key: @@ -268,9 +258,7 @@ def _get_tracing_sampling_rate() -> float | None: """Get the tracing sampling rate. Returns: - ------- - float - The tracing sampling rate. + Optional[float]: The tracing sampling rate. """ sampling_rate_str = ls_utils.get_env_var("TRACING_SAMPLING_RATE") if sampling_rate_str is None: @@ -410,49 +398,38 @@ def __init__( ) -> None: """Initialize a Client instance. - Parameters - ---------- - api_url : str or None, default=None - URL for the LangSmith API. Defaults to the LANGCHAIN_ENDPOINT - environment variable or https://api.smith.langchain.com if not set. - api_key : str or None, default=None - API key for the LangSmith API. Defaults to the LANGCHAIN_API_KEY - environment variable. - retry_config : Retry or None, default=None - Retry configuration for the HTTPAdapter. - timeout_ms : int, tuple[int, int], or None, default=None - Timeout for the HTTPAdapter. Can also be a 2-tuple of - (connect timeout, read timeout) to set them separately. - web_url : str or None, default=None - URL for the LangSmith web app. Default is auto-inferred from - the ENDPOINT. - session: requests.Session or None, default=None - The session to use for requests. If None, a new session will be - created. - anonymizer : Optional[Callable[[dict], dict]] - A function applied for masking serialized run inputs and outputs, - before sending to the API. - hide_inputs: Whether to hide run inputs when tracing with this client. - If True, hides the entire inputs. If a function, applied to - all run inputs when creating runs. - hide_outputs: Whether to hide run outputs when tracing with this client. - If True, hides the entire outputs. If a function, applied to - all run outputs when creating runs. - info: Optional[ls_schemas.LangSmithInfo] - The information about the LangSmith API. If not provided, it will - be fetched from the API. - api_urls: Optional[Dict[str, str]] - A dictionary of write API URLs and their corresponding API keys. - Useful for multi-tenant setups. Data is only read from the first - URL in the dictionary. However, ONLY Runs are written (POST and PATCH) - to all URLs in the dictionary. Feedback, sessions, datasets, examples, - annotation queues and evaluation results are only written to the first. + Args: + api_url (Optional[str]): URL for the LangSmith API. Defaults to the LANGCHAIN_ENDPOINT + environment variable or https://api.smith.langchain.com if not set. + api_key (Optional[str]): API key for the LangSmith API. Defaults to the LANGCHAIN_API_KEY + environment variable. + retry_config (Optional[Retry]): Retry configuration for the HTTPAdapter. + timeout_ms (Optional[Union[int, Tuple[int, int]]]): Timeout for the HTTPAdapter. Can also be a 2-tuple of + (connect timeout, read timeout) to set them separately. + web_url (Optional[str]): URL for the LangSmith web app. Default is auto-inferred from + the ENDPOINT. + session (Optional[requests.Session]): The session to use for requests. If None, a new session will be + created. + auto_batch_tracing (bool, default=True): Whether to automatically batch tracing. + anonymizer (Optional[Callable[[dict], dict]]): A function applied for masking serialized run inputs and outputs, + before sending to the API. + hide_inputs (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run inputs when tracing with this client. + If True, hides the entire inputs. If a function, applied to + all run inputs when creating runs. + hide_outputs (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run outputs when tracing with this client. + If True, hides the entire outputs. If a function, applied to + all run outputs when creating runs. + info (Optional[ls_schemas.LangSmithInfo]): The information about the LangSmith API. + If not provided, it will be fetched from the API. + api_urls (Optional[Dict[str, str]]): A dictionary of write API URLs and their corresponding API keys. + Useful for multi-tenant setups. Data is only read from the first + URL in the dictionary. However, ONLY Runs are written (POST and PATCH) + to all URLs in the dictionary. Feedback, sessions, datasets, examples, + annotation queues and evaluation results are only written to the first. Raises: - ------ - LangSmithUserError - If the API key is not provided when using the hosted service. - If both api_url and api_urls are provided. + LangSmithUserError: If the API key is not provided when using the hosted service. + If both api_url and api_urls are provided. """ if api_url and api_urls: raise ls_utils.LangSmithUserError( @@ -581,9 +558,7 @@ def _repr_html_(self) -> str: """Return an HTML representation of the instance with a link to the URL. Returns: - ------- - str - The HTML representation of the instance. + str: The HTML representation of the instance. """ link = self._host_url return f'LangSmith Client' @@ -592,9 +567,7 @@ def __repr__(self) -> str: """Return a string representation of the instance with a link to the URL. Returns: - ------- - str - The string representation of the instance. + str: The string representation of the instance. """ return f"Client (API URL: {self.api_url})" @@ -612,9 +585,7 @@ def _headers(self) -> Dict[str, str]: """Get the headers for the API request. Returns: - ------- - Dict[str, str] - The headers for the API request. + Dict[str, str]: The headers for the API request. """ headers = { "User-Agent": f"langsmith-py/{langsmith.__version__}", @@ -629,9 +600,7 @@ def info(self) -> ls_schemas.LangSmithInfo: """Get the information about the LangSmith API. Returns: - ------- - Optional[ls_schemas.LangSmithInfo] - The information about the LangSmith API, or None if the API is + ls_schemas.LangSmithInfo: The information about the LangSmith API, or None if the API is not available. """ if self._info is None: @@ -697,42 +666,26 @@ def request_with_retries( ) -> requests.Response: """Send a request with retries. - Parameters - ---------- - request_method : str - The HTTP request method. - pathname : str - The pathname of the request URL. Will be appended to the API URL. - request_kwargs : Mapping - Additional request parameters. - stop_after_attempt : int, default=1 - The number of attempts to make. - retry_on : Sequence[Type[BaseException]] or None, default=None - The exceptions to retry on. In addition to: - [LangSmithConnectionError, LangSmithAPIError]. - to_ignore : Sequence[Type[BaseException]] or None, default=None - The exceptions to ignore / pass on. - handle_response : Callable[[requests.Response, int], Any] or None, default=None - A function to handle the response and return whether to continue - retrying. - **kwargs : Any - Additional keyword arguments to pass to the request. + Args: + method (str): The HTTP request method. + pathname (str): The pathname of the request URL. Will be appended to the API URL. + request_kwargs (Mapping): Additional request parameters. + stop_after_attempt (int, default=1): The number of attempts to make. + retry_on (Optional[Sequence[Type[BaseException]]]): The exceptions to retry on. In addition to: + [LangSmithConnectionError, LangSmithAPIError]. + to_ignore (Optional[Sequence[Type[BaseException]]]): The exceptions to ignore / pass on. + handle_response (Optional[Callable[[requests.Response, int], Any]]): A function to handle the response and return whether to continue retrying. + _context (str, default=""): The context of the request. + **kwargs (Any): Additional keyword arguments to pass to the request. Returns: - ------- - Response - The response object. + requests.Response: The response object. Raises: - ------ - LangSmithAPIError - If a server error occurs. - LangSmithUserError - If the request fails. - LangSmithConnectionError - If a connection error occurs. - LangSmithError - If the request fails. + LangSmithAPIError: If a server error occurs. + LangSmithUserError: If the request fails. + LangSmithConnectionError: If a connection error occurs. + LangSmithError: If the request fails. """ request_kwargs = request_kwargs or {} request_kwargs = { @@ -925,16 +878,11 @@ def _get_paginated_list( ) -> Iterator[dict]: """Get a paginated list of items. - Parameters - ---------- - path : str - The path of the request URL. - params : dict or None, default=None - The query parameters. + Args: + path (str): The path of the request URL. + params (Optional[dict]): The query parameters. Yields: - ------ - dict The items in the paginated list. """ params_ = params.copy() if params else {} @@ -967,19 +915,13 @@ def _get_cursor_paginated_list( ) -> Iterator[dict]: """Get a cursor paginated list of items. - Parameters - ---------- - path : str - The path of the request URL. - body : dict or None, default=None - The query body. - request_method : str, default="post" - The HTTP request method. - data_key : str, default="runs" + Args: + path (str): The path of the request URL. + body (Optional[dict]): The query body. + request_method (Literal["GET", "POST"], default="POST"): The HTTP request method. + data_key (str, default="runs"): The key in the response body that contains the items. Yields: - ------ - dict The items in the paginated list. """ params_ = body.copy() if body else {} @@ -1016,30 +958,40 @@ def upload_dataframe( ) -> ls_schemas.Dataset: """Upload a dataframe as individual examples to the LangSmith API. - Parameters - ---------- - df : pd.DataFrame - The dataframe to upload. - name : str - The name of the dataset. - input_keys : Sequence[str] - The input keys. - output_keys : Sequence[str] - The output keys. - description : str or None, default=None - The description of the dataset. - data_type : DataType or None, default=DataType.kv - The data type of the dataset. + Args: + df (pd.DataFrame): The dataframe to upload. + name (str): The name of the dataset. + input_keys (Sequence[str]): The input keys. + output_keys (Sequence[str]): The output keys. + description (Optional[str]): The description of the dataset. + data_type (Optional[DataType]): The data type of the dataset. Returns: - ------- - Dataset - The uploaded dataset. + Dataset: The uploaded dataset. Raises: - ------ - ValueError - If the csv_file is not a string or tuple. + ValueError: If the csv_file is not a string or tuple. + + Examples: + .. code-block:: python + from langsmith import Client + import os + import pandas as pd + + client = Client() + + df = pd.read_parquet('path/to/your/myfile.parquet') + input_keys = ['column1', 'column2'] # replace with your input column names + output_keys = ['output1', 'output2'] # replace with your output column names + + dataset = client.upload_dataframe( + df=df, + input_keys=input_keys, + output_keys=output_keys, + name="My Parquet Dataset", + description="Dataset created from a parquet file", + data_type="kv" # The default + ) """ csv_file = io.BytesIO() df.to_csv(csv_file, index=False) @@ -1065,32 +1017,41 @@ def upload_csv( ) -> ls_schemas.Dataset: """Upload a CSV file to the LangSmith API. - Parameters - ---------- - csv_file : str or Tuple[str, BytesIO] - The CSV file to upload. If a string, it should be the path - If a tuple, it should be a tuple containing the filename - and a BytesIO object. - input_keys : Sequence[str] - The input keys. - output_keys : Sequence[str] - The output keys. - name : str or None, default=None - The name of the dataset. - description : str or None, default=None - The description of the dataset. - data_type : DataType or None, default=DataType.kv - The data type of the dataset. + Args: + csv_file (Union[str, Tuple[str, io.BytesIO]]): The CSV file to upload. If a string, it should be the path + If a tuple, it should be a tuple containing the filename + and a BytesIO object. + input_keys (Sequence[str]): The input keys. + output_keys (Sequence[str]): The output keys. + name (Optional[str]): The name of the dataset. + description (Optional[str]): The description of the dataset. + data_type (Optional[ls_schemas.DataType]): The data type of the dataset. Returns: - ------- - Dataset - The uploaded dataset. + Dataset: The uploaded dataset. Raises: - ------ - ValueError - If the csv_file is not a string or tuple. + ValueError: If the csv_file is not a string or tuple. + + Examples: + .. code-block:: python + from langsmith import Client + import os + + client = Client() + + csv_file = 'path/to/your/myfile.csv' + input_keys = ['column1', 'column2'] # replace with your input column names + output_keys = ['output1', 'output2'] # replace with your output column names + + dataset = client.upload_csv( + csv_file=csv_file, + input_keys=input_keys, + output_keys=output_keys, + name="My CSV Dataset", + description="Dataset created from a CSV file", + data_type="kv" # The default + ) """ data = { "input_keys": input_keys, @@ -1144,8 +1105,8 @@ def _run_transform( Args: run (Union[ls_schemas.Run, dict]): The run object to transform. - update (bool, optional): Whether the payload is for an "update" event. - copy (bool, optional): Whether to deepcopy run inputs/outputs. + update (Optional[bool]): Whether the payload is for an "update" event. + copy (Optional[bool]): Whether to deepcopy run inputs/outputs. Returns: dict: The transformed run object as a dictionary. @@ -1242,24 +1203,42 @@ def create_run( ) -> None: """Persist a run to the LangSmith API. - Parameters - ---------- - name : str - The name of the run. - inputs : Dict[str, Any] - The input values for the run. - run_type : str - The type of the run, such as tool, chain, llm, retriever, - embedding, prompt, or parser. - revision_id : ID_TYPE or None, default=None - The revision ID of the run. - **kwargs : Any - Additional keyword arguments. + Args: + name (str): The name of the run. + inputs (Dict[str, Any]): The input values for the run. + run_type (str): The type of the run, such as tool, chain, llm, retriever, + embedding, prompt, or parser. + project_name (Optional[str]): The project name of the run. + revision_id (Optional[Union[UUID, str]]): The revision ID of the run. + **kwargs (Any): Additional keyword arguments. + + Returns: + None Raises: - ------ - LangSmithUserError - If the API key is not provided when using the hosted service. + LangSmithUserError: If the API key is not provided when using the hosted service. + + Examples: + .. code-block:: python + from langsmith import Client + import datetime + from uuid import uuid4 + + client = Client() + + run_id = uuid4() + client.create_run( + id=run_id, + project_name=project_name, + name="test_run", + run_type="llm", + inputs={"prompt": "hello world"}, + outputs={"generation": "hi there"}, + start_time=datetime.datetime.now(datetime.timezone.utc), + end_time=datetime.datetime.now(datetime.timezone.utc), + hide_inputs=True, + hide_outputs=True, + ) """ project_name = project_name or kwargs.pop( "session_name", @@ -1418,25 +1397,89 @@ def batch_ingest_runs( """Batch ingest/upsert multiple runs in the Langsmith system. Args: - create (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]): + create (Optional[Sequence[Union[Run, RunLikeDict]]]): A sequence of `Run` objects or equivalent dictionaries representing runs to be created / posted. - update (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]): + update (Optional[Sequence[Union[Run, RunLikeDict]]]): A sequence of `Run` objects or equivalent dictionaries representing runs that have already been created and should be updated / patched. - pre_sampled (bool, optional): Whether the runs have already been subject + pre_sampled (bool, default=False): Whether the runs have already been subject to sampling, and therefore should not be sampled again. Defaults to False. - Returns: - None - Raises: LangsmithAPIError: If there is an error in the API request. + Returns: + None + Note: - The run objects MUST contain the dotted_order and trace_id fields to be accepted by the API. + + Examples: + .. code-block:: python + from langsmith import Client + import datetime + from uuid import uuid4 + + client = Client() + _session = "__test_batch_ingest_runs" + trace_id = uuid4() + trace_id_2 = uuid4() + run_id_2 = uuid4() + current_time = datetime.datetime.now(datetime.timezone.utc).strftime( + "%Y%m%dT%H%M%S%fZ" + ) + later_time = ( + datetime.datetime.now(datetime.timezone.utc) + timedelta(seconds=1) + ).strftime("%Y%m%dT%H%M%S%fZ") + + runs_to_create = [ + { + "id": str(trace_id), + "session_name": _session, + "name": "run 1", + "run_type": "chain", + "dotted_order": f"{current_time}{str(trace_id)}", + "trace_id": str(trace_id), + "inputs": {"input1": 1, "input2": 2}, + "outputs": {"output1": 3, "output2": 4}, + }, + { + "id": str(trace_id_2), + "session_name": _session, + "name": "run 3", + "run_type": "chain", + "dotted_order": f"{current_time}{str(trace_id_2)}", + "trace_id": str(trace_id_2), + "inputs": {"input1": 1, "input2": 2}, + "error": "error", + }, + { + "id": str(run_id_2), + "session_name": _session, + "name": "run 2", + "run_type": "chain", + "dotted_order": f"{current_time}{str(trace_id)}." + f"{later_time}{str(run_id_2)}", + "trace_id": str(trace_id), + "parent_run_id": str(trace_id), + "inputs": {"input1": 5, "input2": 6}, + }, + ] + runs_to_update = [ + { + "id": str(run_id_2), + "dotted_order": f"{current_time}{str(trace_id)}." + f"{later_time}{str(run_id_2)}", + "trace_id": str(trace_id), + "parent_run_id": str(trace_id), + "outputs": {"output1": 4, "output2": 5}, + }, + ] + + client.batch_ingest_runs(create=runs_to_create, update=runs_to_update) """ if not create and not update: return @@ -1547,19 +1590,83 @@ def multipart_ingest( update (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]): A sequence of `Run` objects or equivalent dictionaries representing runs that have already been created and should be updated / patched. - pre_sampled (bool, optional): Whether the runs have already been subject + pre_sampled (bool, default=False): Whether the runs have already been subject to sampling, and therefore should not be sampled again. Defaults to False. - Returns: - None - Raises: LangsmithAPIError: If there is an error in the API request. + + Returns: + None Note: - The run objects MUST contain the dotted_order and trace_id fields to be accepted by the API. + + Examples: + .. code-block:: python + from langsmith import Client + import datetime + from uuid import uuid4 + + client = Client() + _session = "__test_batch_ingest_runs" + trace_id = uuid4() + trace_id_2 = uuid4() + run_id_2 = uuid4() + current_time = datetime.datetime.now(datetime.timezone.utc).strftime( + "%Y%m%dT%H%M%S%fZ" + ) + later_time = ( + datetime.datetime.now(datetime.timezone.utc) + timedelta(seconds=1) + ).strftime("%Y%m%dT%H%M%S%fZ") + + runs_to_create = [ + { + "id": str(trace_id), + "session_name": _session, + "name": "run 1", + "run_type": "chain", + "dotted_order": f"{current_time}{str(trace_id)}", + "trace_id": str(trace_id), + "inputs": {"input1": 1, "input2": 2}, + "outputs": {"output1": 3, "output2": 4}, + }, + { + "id": str(trace_id_2), + "session_name": _session, + "name": "run 3", + "run_type": "chain", + "dotted_order": f"{current_time}{str(trace_id_2)}", + "trace_id": str(trace_id_2), + "inputs": {"input1": 1, "input2": 2}, + "error": "error", + }, + { + "id": str(run_id_2), + "session_name": _session, + "name": "run 2", + "run_type": "chain", + "dotted_order": f"{current_time}{str(trace_id)}." + f"{later_time}{str(run_id_2)}", + "trace_id": str(trace_id), + "parent_run_id": str(trace_id), + "inputs": {"input1": 5, "input2": 6}, + }, + ] + runs_to_update = [ + { + "id": str(run_id_2), + "dotted_order": f"{current_time}{str(trace_id)}." + f"{later_time}{str(run_id_2)}", + "trace_id": str(trace_id), + "parent_run_id": str(trace_id), + "outputs": {"output1": 4, "output2": 5}, + }, + ] + + client.multipart_ingest(create=runs_to_create, update=runs_to_update) """ if not (create or update): return @@ -1688,31 +1795,52 @@ def update_run( ) -> None: """Update a run in the LangSmith API. - Parameters - ---------- - run_id : str or UUID - The ID of the run to update. - name : str or None, default=None - The name of the run. - end_time : datetime or None - The end time of the run. - error : str or None, default=None - The error message of the run. - inputs : Dict or None, default=None - The input values for the run. - outputs : Dict or None, default=None - The output values for the run. - events : Sequence[dict] or None, default=None - The events for the run. - extra : Dict or None, default=None - The extra information for the run. - tags : List[str] or None, default=None - The tags for the run. - attachments: dict[str, ls_schemas.Attachment] or None, default=None - A dictionary of attachments to add to the run. The keys are the attachment names, - and the values are Attachment objects containing the data and mime type. - **kwargs : Any - Kwargs are ignored. + Args: + run_id (Union[UUID, str]): The ID of the run to update. + name (Optional[str]): The name of the run. + end_time (Optional[datetime.datetime]): The end time of the run. + error (Optional[str]): The error message of the run. + inputs (Optional[Dict]): The input values for the run. + outputs (Optional[Dict]): The output values for the run. + events (Optional[Sequence[dict]]): The events for the run. + extra (Optional[Dict]): The extra information for the run. + tags (Optional[List[str]]): The tags for the run. + attachments (Optional[Dict[str, Attachment]]): A dictionary of attachments to add to the run. The keys are the attachment names, + and the values are Attachment objects containing the data and mime type. + **kwargs (Any): Kwargs are ignored. + + Returns: + None + + Examples: + .. code-block:: python + from langsmith import Client + import datetime + from uuid import uuid4 + + client = Client() + project_name = "__test_update_run" + + start_time = datetime.datetime.now() + revision_id = uuid4() + run: dict = dict( + id=uuid4(), + name="test_run", + run_type="llm", + inputs={"text": "hello world"}, + project_name=project_name, + api_url=os.getenv("LANGCHAIN_ENDPOINT"), + start_time=start_time, + extra={"extra": "extra"}, + revision_id=revision_id, + ) + # Create the run + client.create_run(**run) + run["outputs"] = {"output": ["Hi"]} + run["extra"]["foo"] = "bar" + run["name"] = "test_run_updated" + # Update the run + client.update_run(run["id"], **run) """ data: Dict[str, Any] = { "id": _as_uuid(run_id, "run_id"), @@ -1782,20 +1910,14 @@ def _update_run(self, run_update: dict) -> None: def _load_child_runs(self, run: ls_schemas.Run) -> ls_schemas.Run: """Load child runs for a given run. - Parameters - ---------- - run : Run - The run to load child runs for. + Args: + run (Run): The run to load child runs for. Returns: - ------- - Run - The run with loaded child runs. + Run: The run with loaded child runs. Raises: - ------ - LangSmithError - If a child run has no parent. + LangSmithError: If a child run has no parent. """ child_runs = self.list_runs(id=run.child_run_ids) treemap: DefaultDict[uuid.UUID, List[ls_schemas.Run]] = collections.defaultdict( @@ -1820,17 +1942,24 @@ def read_run( ) -> ls_schemas.Run: """Read a run from the LangSmith API. - Parameters - ---------- - run_id : str or UUID - The ID of the run to read. - load_child_runs : bool, default=False - Whether to load nested child runs. + Args: + run_id (Union[UUID, str]): + The ID of the run to read. + load_child_runs (bool, default=False): + Whether to load nested child runs. Returns: - ------- - Run - The run. + Run: The run read from the LangSmith API. + + Examples: + .. code-block:: python + from langsmith import Client + + # Existing run + run_id = "your-run-id" + + client = Client() + stored_run = client.read_run(run_id) """ response = self.request_with_retries( "GET", f"/runs/{_as_uuid(run_id, 'run_id')}" @@ -1863,108 +1992,93 @@ def list_runs( ) -> Iterator[ls_schemas.Run]: """List runs from the LangSmith API. - Parameters - ---------- - project_id : UUID or None, default=None - The ID(s) of the project to filter by. - project_name : str or None, default=None - The name(s) of the project to filter by. - run_type : str or None, default=None - The type of the runs to filter by. - trace_id : UUID or None, default=None - The ID of the trace to filter by. - reference_example_id : UUID or None, default=None - The ID of the reference example to filter by. - query : str or None, default=None - The query string to filter by. - filter : str or None, default=None - The filter string to filter by. - trace_filter : str or None, default=None - Filter to apply to the ROOT run in the trace tree. This is meant to - be used in conjunction with the regular `filter` parameter to let you - filter runs by attributes of the root run within a trace. - tree_filter : str or None, default=None - Filter to apply to OTHER runs in the trace tree, including - sibling and child runs. This is meant to be used in conjunction with - the regular `filter` parameter to let you filter runs by attributes - of any run within a trace. - is_root : bool or None, default=None - Whether to filter by root runs. - parent_run_id : UUID or None, default=None - The ID of the parent run to filter by. - start_time : datetime or None, default=None - The start time to filter by. - error : bool or None, default=None - Whether to filter by error status. - run_ids : List[str or UUID] or None, default=None - The IDs of the runs to filter by. - limit : int or None, default=None - The maximum number of runs to return. - **kwargs : Any - Additional keyword arguments. + Args: + project_id (Optional[Union[UUID, str], Sequence[Union[UUID, str]]]): + The ID(s) of the project to filter by. + project_name (Optional[Union[str, Sequence[str]]]): The name(s) of the project to filter by. + run_type (Optional[str]): The type of the runs to filter by. + trace_id (Optional[Union[UUID, str]]): The ID of the trace to filter by. + reference_example_id (Optional[Union[UUID, str]]): The ID of the reference example to filter by. + query (Optional[str]): The query string to filter by. + filter (Optional[str]): The filter string to filter by. + trace_filter (Optional[str]): Filter to apply to the ROOT run in the trace tree. This is meant to + be used in conjunction with the regular `filter` parameter to let you + filter runs by attributes of the root run within a trace. + tree_filter (Optional[str]): Filter to apply to OTHER runs in the trace tree, including + sibling and child runs. This is meant to be used in conjunction with + the regular `filter` parameter to let you filter runs by attributes + of any run within a trace. + is_root (Optional[bool]): Whether to filter by root runs. + parent_run_id (Optional[Union[UUID, str]]): + The ID of the parent run to filter by. + start_time (Optional[datetime.datetime]): + The start time to filter by. + error (Optional[bool]): Whether to filter by error status. + run_ids (Optional[Sequence[Union[UUID, str]]]): + The IDs of the runs to filter by. + select (Optional[Sequence[str]]): The fields to select. + limit (Optional[int]): The maximum number of runs to return. + **kwargs (Any): Additional keyword arguments. Yields: - ------ - Run The runs. Examples: - -------- - .. code-block:: python + .. code-block:: python - # List all runs in a project - project_runs = client.list_runs(project_name="") + # List all runs in a project + project_runs = client.list_runs(project_name="") - # List LLM and Chat runs in the last 24 hours - todays_llm_runs = client.list_runs( - project_name="", - start_time=datetime.now() - timedelta(days=1), - run_type="llm", - ) + # List LLM and Chat runs in the last 24 hours + todays_llm_runs = client.list_runs( + project_name="", + start_time=datetime.now() - timedelta(days=1), + run_type="llm", + ) - # List root traces in a project - root_runs = client.list_runs(project_name="", is_root=1) + # List root traces in a project + root_runs = client.list_runs(project_name="", is_root=1) - # List runs without errors - correct_runs = client.list_runs(project_name="", error=False) + # List runs without errors + correct_runs = client.list_runs(project_name="", error=False) - # List runs and only return their inputs/outputs (to speed up the query) - input_output_runs = client.list_runs( - project_name="", select=["inputs", "outputs"] - ) + # List runs and only return their inputs/outputs (to speed up the query) + input_output_runs = client.list_runs( + project_name="", select=["inputs", "outputs"] + ) - # List runs by run ID - run_ids = [ - "a36092d2-4ad5-4fb4-9c0d-0dba9a2ed836", - "9398e6be-964f-4aa4-8ae9-ad78cd4b7074", - ] - selected_runs = client.list_runs(id=run_ids) + # List runs by run ID + run_ids = [ + "a36092d2-4ad5-4fb4-9c0d-0dba9a2ed836", + "9398e6be-964f-4aa4-8ae9-ad78cd4b7074", + ] + selected_runs = client.list_runs(id=run_ids) - # List all "chain" type runs that took more than 10 seconds and had - # `total_tokens` greater than 5000 - chain_runs = client.list_runs( - project_name="", - filter='and(eq(run_type, "chain"), gt(latency, 10), gt(total_tokens, 5000))', - ) + # List all "chain" type runs that took more than 10 seconds and had + # `total_tokens` greater than 5000 + chain_runs = client.list_runs( + project_name="", + filter='and(eq(run_type, "chain"), gt(latency, 10), gt(total_tokens, 5000))', + ) - # List all runs called "extractor" whose root of the trace was assigned feedback "user_score" score of 1 - good_extractor_runs = client.list_runs( - project_name="", - filter='eq(name, "extractor")', - trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))', - ) + # List all runs called "extractor" whose root of the trace was assigned feedback "user_score" score of 1 + good_extractor_runs = client.list_runs( + project_name="", + filter='eq(name, "extractor")', + trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))', + ) - # List all runs that started after a specific timestamp and either have "error" not equal to null or a "Correctness" feedback score equal to 0 - complex_runs = client.list_runs( - project_name="", - filter='and(gt(start_time, "2023-07-15T12:34:56Z"), or(neq(error, null), and(eq(feedback_key, "Correctness"), eq(feedback_score, 0.0))))', - ) + # List all runs that started after a specific timestamp and either have "error" not equal to null or a "Correctness" feedback score equal to 0 + complex_runs = client.list_runs( + project_name="", + filter='and(gt(start_time, "2023-07-15T12:34:56Z"), or(neq(error, null), and(eq(feedback_key, "Correctness"), eq(feedback_score, 0.0))))', + ) - # List all runs where `tags` include "experimental" or "beta" and `latency` is greater than 2 seconds - tagged_runs = client.list_runs( - project_name="", - filter='and(or(has(tags, "experimental"), has(tags, "beta")), gt(latency, 2))', - ) + # List all runs where `tags` include "experimental" or "beta" and `latency` is greater than 2 seconds + tagged_runs = client.list_runs( + project_name="", + filter='and(or(has(tags, "experimental"), has(tags, "beta")), gt(latency, 2))', + ) """ # noqa: E501 project_ids = [] if isinstance(project_id, (uuid.UUID, str)): @@ -2061,12 +2175,13 @@ def get_run_stats( based on the runs that match the query. Args: - id (Optional[List[ID_TYPE]]): List of run IDs to filter by. - trace (Optional[ID_TYPE]): Trace ID to filter by. - parent_run (Optional[ID_TYPE]): Parent run ID to filter by. + id (Optional[List[Union[UUID, str]]]): List of run IDs to filter by. + trace (Optional[Union[UUID, str]]): Trace ID to filter by. + parent_run (Optional[Union[UUID, str]]): Parent run ID to filter by. run_type (Optional[str]): Run type to filter by. - projects (Optional[List[ID_TYPE]]): List of session IDs to filter by. - reference_example (Optional[List[ID_TYPE]]): List of reference example IDs to filter by. + project_names (Optional[List[str]]): List of project names to filter by. + project_ids (Optional[List[Union[UUID, str]]]): List of project IDs to filter by. + reference_example_ids (Optional[List[Union[UUID, str]]]): List of reference example IDs to filter by. start_time (Optional[str]): Start time to filter by. end_time (Optional[str]): End time to filter by. error (Optional[bool]): Filter by error status. @@ -2135,19 +2250,13 @@ def get_run_url( More for use interacting with runs after the fact for data analysis or ETL workloads. - Parameters - ---------- - run : Run - The run. - project_name : str or None, default=None - The name of the project. - project_id : UUID or None, default=None - The ID of the project. + Args: + run (RunBase): The run. + project_name (Optional[str]): The name of the project. + project_id (Optional[Union[UUID, str]]): The ID of the project. Returns: - ------- - str - The URL for the run. + str: The URL for the run. """ if session_id := getattr(run, "session_id", None): pass @@ -2167,7 +2276,16 @@ def get_run_url( ) def share_run(self, run_id: ID_TYPE, *, share_id: Optional[ID_TYPE] = None) -> str: - """Get a share link for a run.""" + """Get a share link for a run. + + Args: + run_id (Union[UUID, str]): The ID of the run to share. + share_id (Optional[Union[UUID, str]]): Custom share ID. + If not provided, a random UUID will be generated. + + Returns: + str: The URL of the shared run. + """ run_id_ = _as_uuid(run_id, "run_id") data = { "run_id": str(run_id_), @@ -2184,7 +2302,14 @@ def share_run(self, run_id: ID_TYPE, *, share_id: Optional[ID_TYPE] = None) -> s return f"{self._host_url}/public/{share_token}/r" def unshare_run(self, run_id: ID_TYPE) -> None: - """Delete share link for a run.""" + """Delete share link for a run. + + Args: + run_id (Union[UUID, str]): The ID of the run to unshare. + + Returns: + None + """ response = self.request_with_retries( "DELETE", f"/runs/{_as_uuid(run_id, 'run_id')}/share", @@ -2196,7 +2321,7 @@ def read_run_shared_link(self, run_id: ID_TYPE) -> Optional[str]: """Retrieve the shared link for a specific run. Args: - run_id (ID_TYPE): The ID of the run. + run_id (Union[UUID, str]): The ID of the run. Returns: Optional[str]: The shared link for the run, or None if the link is not @@ -2214,14 +2339,30 @@ def read_run_shared_link(self, run_id: ID_TYPE) -> Optional[str]: return f"{self._host_url}/public/{result['share_token']}/r" def run_is_shared(self, run_id: ID_TYPE) -> bool: - """Get share state for a run.""" + """Get share state for a run. + + Args: + run_id (Union[UUID, str]): The ID of the run. + + Returns: + bool: True if the run is shared, False otherwise. + """ link = self.read_run_shared_link(_as_uuid(run_id, "run_id")) return link is not None def read_shared_run( self, share_token: Union[ID_TYPE, str], run_id: Optional[ID_TYPE] = None ) -> ls_schemas.Run: - """Get shared runs.""" + """Get shared runs. + + Args: + share_token (Union[UUID, str]): The share token or URL of the shared run. + run_id (Optional[Union[UUID, str]]): The ID of the specific run to retrieve. + If not provided, the full shared run will be returned. + + Returns: + Run: The shared run. + """ _, token_uuid = _parse_token_or_url(share_token, "", kind="run") path = f"/public/{token_uuid}/run" if run_id is not None: @@ -2237,7 +2378,15 @@ def read_shared_run( def list_shared_runs( self, share_token: Union[ID_TYPE, str], run_ids: Optional[List[str]] = None ) -> Iterator[ls_schemas.Run]: - """Get shared runs.""" + """Get shared runs. + + Args: + share_token (Union[UUID, str]): The share token or URL of the shared run. + run_ids (Optional[List[str]]): A list of run IDs to filter the results by. + + Yields: + A shared run. + """ body = {"id": run_ids} if run_ids else {} _, token_uuid = _parse_token_or_url(share_token, "", kind="run") for run in self._get_cursor_paginated_list( @@ -2254,7 +2403,7 @@ def read_dataset_shared_schema( """Retrieve the shared schema of a dataset. Args: - dataset_id (Optional[ID_TYPE]): The ID of the dataset. + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset. Either `dataset_id` or `dataset_name` must be given. dataset_name (Optional[str]): The name of the dataset. Either `dataset_id` or `dataset_name` must be given. @@ -2291,7 +2440,20 @@ def share_dataset( *, dataset_name: Optional[str] = None, ) -> ls_schemas.DatasetShareSchema: - """Get a share link for a dataset.""" + """Get a share link for a dataset. + + Args: + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset. + Either `dataset_id` or `dataset_name` must be given. + dataset_name (Optional[str]): The name of the dataset. + Either `dataset_id` or `dataset_name` must be given. + + Returns: + ls_schemas.DatasetShareSchema: The shared schema of the dataset. + + Raises: + ValueError: If neither `dataset_id` nor `dataset_name` is given. + """ if dataset_id is None and dataset_name is None: raise ValueError("Either dataset_id or dataset_name must be given") if dataset_id is None: @@ -2313,7 +2475,14 @@ def share_dataset( ) def unshare_dataset(self, dataset_id: ID_TYPE) -> None: - """Delete share link for a dataset.""" + """Delete share link for a dataset. + + Args: + dataset_id (Union[UUID, str]): The ID of the dataset to unshare. + + Returns: + None + """ response = self.request_with_retries( "DELETE", f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/share", @@ -2325,7 +2494,14 @@ def read_shared_dataset( self, share_token: str, ) -> ls_schemas.Dataset: - """Get shared datasets.""" + """Get shared datasets. + + Args: + share_token (Union[UUID, str]): The share token or URL of the shared dataset. + + Returns: + Dataset: The shared dataset. + """ _, token_uuid = _parse_token_or_url(share_token, self.api_url) response = self.request_with_retries( "GET", @@ -2342,7 +2518,15 @@ def read_shared_dataset( def list_shared_examples( self, share_token: str, *, example_ids: Optional[List[ID_TYPE]] = None ) -> List[ls_schemas.Example]: - """Get shared examples.""" + """Get shared examples. + + Args: + share_token (Union[UUID, str]): The share token or URL of the shared dataset. + example_ids (Optional[List[UUID, str]], optional): The IDs of the examples to filter by. Defaults to None. + + Returns: + List[ls_schemas.Example]: The list of shared examples. + """ params = {} if example_ids is not None: params["id"] = [str(id) for id in example_ids] @@ -2370,18 +2554,14 @@ def list_shared_projects( """List shared projects. Args: - dataset_share_token : str - The share token of the dataset. - project_ids : List[ID_TYPE], optional - List of project IDs to filter the results, by default None. - name : str, optional - Name of the project to filter the results, by default None. - name_contains : str, optional - Substring to search for in project names, by default None. - limit : int, optional + dataset_share_token (str): The share token of the dataset. + project_ids (Optional[List[Union[UUID, str]]]): List of project IDs to filter the results, by default None. + name (Optional[str]): Name of the project to filter the results, by default None. + name_contains (Optional[str]): Substring to search for in project names, by default None. + limit (Optional[int]): Maximum number of projects to return, by default None. Yields: - TracerSessionResult: The shared projects. + The shared projects. """ params = {"id": project_ids, "name": name, "name_contains": name_contains} share_token = _as_uuid(dataset_share_token, "dataset_share_token") @@ -2407,25 +2587,16 @@ def create_project( ) -> ls_schemas.TracerSession: """Create a project on the LangSmith API. - Parameters - ---------- - project_name : str - The name of the project. - project_extra : dict or None, default=None - Additional project information. - metadata: dict or None, default=None - Additional metadata to associate with the project. - description : str or None, default=None - The description of the project. - upsert : bool, default=False - Whether to update the project if it already exists. - reference_dataset_id: UUID or None, default=None - The ID of the reference dataset to associate with the project. + Args: + project_name (str): The name of the project. + project_extra (Optional[dict]): Additional project information. + metadata (Optional[dict]): Additional metadata to associate with the project. + description (Optional[str]): The description of the project. + upsert (bool, default=False): Whether to update the project if it already exists. + reference_dataset_id (Optional[Union[UUID, str]): The ID of the reference dataset to associate with the project. Returns: - ------- - TracerSession - The created project. + TracerSession: The created project. """ endpoint = f"{self.api_url}/sessions" extra = project_extra @@ -2463,24 +2634,23 @@ def update_project( ) -> ls_schemas.TracerSession: """Update a LangSmith project. - Parameters - ---------- - project_id : UUID - The ID of the project to update. - name : str or None, default=None - The new name to give the project. This is only valid if the project - has been assigned an end_time, meaning it has been completed/closed. - description : str or None, default=None - The new description to give the project. - metadata: dict or None, default=None - - project_extra : dict or None, default=None - Additional project information. + Args: + project_id (Union[UUID, str]): + The ID of the project to update. + name (Optional[str]): + The new name to give the project. This is only valid if the project + has been assigned an end_time, meaning it has been completed/closed. + description (Optional[str]): + The new description to give the project. + metadata (Optional[dict]): + Additional metadata to associate with the project. + project_extra (Optional[dict]): + Additional project information. + end_time (Optional[datetime.datetime]): + The time the project was completed. Returns: - ------- - TracerSession - The updated project. + TracerSession: The updated project. """ endpoint = f"{self.api_url}/sessions/{_as_uuid(project_id, 'project_id')}" extra = project_extra @@ -2537,20 +2707,16 @@ def read_project( ) -> ls_schemas.TracerSessionResult: """Read a project from the LangSmith API. - Parameters - ---------- - project_id : str or None, default=None - The ID of the project to read. - project_name : str or None, default=None - The name of the project to read. - Note: Only one of project_id or project_name may be given. - include_stats : bool, default=False - Whether to include a project's aggregate statistics in the response. + Args: + project_id (Optional[str]): + The ID of the project to read. + project_name (Optional[str]): The name of the project to read. + Only one of project_id or project_name may be given. + include_stats (bool, default=False): + Whether to include a project's aggregate statistics in the response. Returns: - ------- - TracerSessionResult - The project. + TracerSessionResult: The project. """ path = "/sessions" params: Dict[str, Any] = {"limit": 1} @@ -2578,17 +2744,14 @@ def has_project( ) -> bool: """Check if a project exists. - Parameters - ---------- - project_name : str - The name of the project to check for. - project_id : str or None, default=None - The ID of the project to check for. + Args: + project_name (str): + The name of the project to check for. + project_id (Optional[str]): + The ID of the project to check for. Returns: - ------- - bool - Whether the project exists. + bool: Whether the project exists. """ try: self.read_project(project_name=project_name) @@ -2607,10 +2770,12 @@ def get_test_results( Note: this will fetch whatever data exists in the DB. Results are not immediately available in the DB upon evaluation run completion. + Args: + project_id (Optional[Union[UUID, str]]): The ID of the project. + project_name (Optional[str]): The name of the project. + Returns: - -------- - pd.DataFrame - A dataframe containing the test results. + pd.DataFrame: A dataframe containing the test results. """ warnings.warn( "Function get_test_results is in beta.", UserWarning, stacklevel=2 @@ -2709,29 +2874,29 @@ def list_projects( ) -> Iterator[ls_schemas.TracerSession]: """List projects from the LangSmith API. - Parameters - ---------- - project_ids : Optional[List[ID_TYPE]], optional - A list of project IDs to filter by, by default None - name : Optional[str], optional - The name of the project to filter by, by default None - name_contains : Optional[str], optional - A string to search for in the project name, by default None - reference_dataset_id : Optional[List[ID_TYPE]], optional - A dataset ID to filter by, by default None - reference_dataset_name : Optional[str], optional - The name of the reference dataset to filter by, by default None - reference_free : Optional[bool], optional - Whether to filter for only projects not associated with a dataset. - limit : Optional[int], optional - The maximum number of projects to return, by default None - metadata: Optional[Dict[str, Any]], optional - Metadata to filter by. + Args: + project_ids (Optional[List[Union[UUID, str]]]): + A list of project IDs to filter by, by default None + name (Optional[str]): + The name of the project to filter by, by default None + name_contains (Optional[str]): + A string to search for in the project name, by default None + reference_dataset_id (Optional[List[Union[UUID, str]]]): + A dataset ID to filter by, by default None + reference_dataset_name (Optional[str]): + The name of the reference dataset to filter by, by default None + reference_free (Optional[bool]): + Whether to filter for only projects not associated with a dataset. + limit (Optional[int]): + The maximum number of projects to return, by default None + metadata (Optional[Dict[str, Any]]): + Metadata to filter by. Yields: - ------ - TracerSession The projects. + + Raises: + ValueError: If both reference_dataset_id and reference_dataset_name are given. """ params: Dict[str, Any] = { "limit": min(limit, 100) if limit is not None else 100 @@ -2771,12 +2936,17 @@ def delete_project( ) -> None: """Delete a project from LangSmith. - Parameters - ---------- - project_name : str or None, default=None - The name of the project to delete. - project_id : str or None, default=None - The ID of the project to delete. + Args: + project_name (Optional[str]): + The name of the project to delete. + project_id (Optional[str]): + The ID of the project to delete. + + Returns: + None + + Raises: + ValueError: If neither project_name or project_id is provided. """ if project_name is not None: project_id = str(self.read_project(project_name=project_name).id) @@ -2802,32 +2972,27 @@ def create_dataset( ) -> ls_schemas.Dataset: """Create a dataset in the LangSmith API. - Parameters - ---------- - dataset_name : str - The name of the dataset. - description : Optional[str], default=None - The description of the dataset. - data_type : ls_schemas.DataType, default=ls_schemas.DataType.kv - The data type of the dataset. - inputs_schema : Optional[Dict[str, Any]], default=None - The schema definition for the inputs of the dataset. - outputs_schema : Optional[Dict[str, Any]], default=None - The schema definition for the outputs of the dataset. - transformations : Optional[List[ls_schemas.DatasetTransformation]], default=None - A list of transformations to apply to the dataset. - metadata : Optional[dict], default=None - Additional metadata to associate with the dataset. + Args: + dataset_name (str): + The name of the dataset. + description (Optional[str]): + The description of the dataset. + data_type (DataType, default=DataType.kv): + The data type of the dataset. + inputs_schema (Optional[Dict[str, Any]]): + The schema definition for the inputs of the dataset. + outputs_schema (Optional[Dict[str, Any]]): + The schema definition for the outputs of the dataset. + transformations (Optional[List[DatasetTransformation]]): + A list of transformations to apply to the dataset. + metadata (Optional[dict]): + Additional metadata to associate with the dataset. Returns: - ------- - ls_schemas.Dataset - The created dataset. + Dataset: The created dataset. Raises: - ------ - requests.HTTPError - If the request to create the dataset fails. + requests.HTTPError: If the request to create the dataset fails. """ dataset: Dict[str, Any] = { "name": dataset_name, @@ -2860,21 +3025,18 @@ def create_dataset( ) def has_dataset( - self, *, dataset_name: Optional[str] = None, dataset_id: Optional[str] = None + self, *, dataset_name: Optional[str] = None, dataset_id: Optional[ID_TYPE] = None ) -> bool: """Check whether a dataset exists in your tenant. - Parameters - ---------- - dataset_name : str or None, default=None - The name of the dataset to check. - dataset_id : str or None, default=None - The ID of the dataset to check. + Args: + dataset_name (Optional[str]): + The name of the dataset to check. + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to check. Returns: - ------- - bool - Whether the dataset exists. + bool: Whether the dataset exists. """ try: self.read_dataset(dataset_name=dataset_name, dataset_id=dataset_id) @@ -2891,17 +3053,14 @@ def read_dataset( ) -> ls_schemas.Dataset: """Read a dataset from the LangSmith API. - Parameters - ---------- - dataset_name : str or None, default=None - The name of the dataset to read. - dataset_id : UUID or None, default=None - The ID of the dataset to read. + Args: + dataset_name (Optional[str]): + The name of the dataset to read. + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to read. Returns: - ------- - Dataset - The dataset. + Dataset: The dataset. """ path = "/datasets" params: Dict[str, Any] = {"limit": 1} @@ -2943,45 +3102,39 @@ def diff_dataset_versions( ) -> ls_schemas.DatasetDiffInfo: """Get the difference between two versions of a dataset. - Parameters - ---------- - dataset_id : str or None, default=None - The ID of the dataset. - dataset_name : str or None, default=None - The name of the dataset. - from_version : str or datetime.datetime - The starting version for the diff. - to_version : str or datetime.datetime - The ending version for the diff. + Args: + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset. + dataset_name (Optional[str]): + The name of the dataset. + from_version (Union[str, datetime.datetime]): + The starting version for the diff. + to_version (Union[str, datetime.datetime]): + The ending version for the diff. Returns: - ------- - DatasetDiffInfo - The difference between the two versions of the dataset. + DatasetDiffInfo: The difference between the two versions of the dataset. Examples: - -------- - .. code-block:: python - - # Get the difference between two tagged versions of a dataset - from_version = "prod" - to_version = "dev" - diff = client.diff_dataset_versions( - dataset_name="my-dataset", - from_version=from_version, - to_version=to_version, - ) - print(diff) - - # Get the difference between two timestamped versions of a dataset - from_version = datetime.datetime(2024, 1, 1) - to_version = datetime.datetime(2024, 2, 1) - diff = client.diff_dataset_versions( - dataset_name="my-dataset", - from_version=from_version, - to_version=to_version, - ) - print(diff) + .. code-block:: python + + # Get the difference between two tagged versions of a dataset + from_version = "prod" + to_version = "dev" + diff = client.diff_dataset_versions( + dataset_name="my-dataset", + from_version=from_version, + to_version=to_version, + ) + + # Get the difference between two timestamped versions of a dataset + from_version = datetime.datetime(2024, 1, 1) + to_version = datetime.datetime(2024, 2, 1) + diff = client.diff_dataset_versions( + dataset_name="my-dataset", + from_version=from_version, + to_version=to_version, + ) """ if dataset_id is None: if dataset_name is None: @@ -3009,21 +3162,21 @@ def diff_dataset_versions( return ls_schemas.DatasetDiffInfo(**response.json()) def read_dataset_openai_finetuning( - self, dataset_id: Optional[str] = None, *, dataset_name: Optional[str] = None + self, dataset_id: Optional[ID_TYPE] = None, *, dataset_name: Optional[str] = None ) -> list: """Download a dataset in OpenAI Jsonl format and load it as a list of dicts. - Parameters - ---------- - dataset_id : str - The ID of the dataset to download. - dataset_name : str - The name of the dataset to download. + Args: + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to download. + dataset_name (Optional[str]): + The name of the dataset to download. Returns: - ------- - list - The dataset loaded as a list of dicts. + list[dict]: The dataset loaded as a list of dicts. + + Raises: + ValueError: If neither dataset_id nor dataset_name is provided. """ path = "/datasets" if dataset_id is not None: @@ -3051,9 +3204,21 @@ def list_datasets( ) -> Iterator[ls_schemas.Dataset]: """List the datasets on the LangSmith API. + Args: + dataset_ids (Optional[List[Union[UUID, str]]]): + A list of dataset IDs to filter the results by. + data_type (Optional[str]): + The data type of the datasets to filter the results by. + dataset_name (Optional[str]): + The name of the dataset to filter the results by. + dataset_name_contains (Optional[str]): + A substring to search for in the dataset names. + metadata (Optional[Dict[str, Any]]): + A dictionary of metadata to filter the results by. + limit (Optional[int]): + The maximum number of datasets to return. + Yields: - ------- - Dataset The datasets. """ params: Dict[str, Any] = { @@ -3089,12 +3254,14 @@ def delete_dataset( ) -> None: """Delete a dataset from the LangSmith API. - Parameters - ---------- - dataset_id : UUID or None, default=None - The ID of the dataset to delete. - dataset_name : str or None, default=None - The name of the dataset to delete. + Args: + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to delete. + dataset_name (Optional[str]): + The name of the dataset to delete. + + Returns: + None """ if dataset_name is not None: dataset_id = self.read_dataset(dataset_name=dataset_name).id @@ -3124,30 +3291,33 @@ def update_dataset_tag( use the read_dataset_version method to find the exact version to apply the tags to. - Parameters - ---------- - dataset_id : UUID - The ID of the dataset to update. - as_of : datetime.datetime - The timestamp of the dataset to apply the new tags to. - tag : str - The new tag to apply to the dataset. + Args: + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to update. + dataset_name (Optional[str]): + The name of the dataset to update. + as_of (datetime.datetime): + The timestamp of the dataset to apply the new tags to. + tag (str): + The new tag to apply to the dataset. + + Returns: + None Examples: - -------- - .. code-block:: python + .. code-block:: python - dataset_name = "my-dataset" - # Get the version of a dataset <= a given timestamp - dataset_version = client.read_dataset_version( - dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1) - ) - # Assign that version a new tag - client.update_dataset_tags( - dataset_name="my-dataset", - as_of=dataset_version.as_of, - tag="prod", - ) + dataset_name = "my-dataset" + # Get the version of a dataset <= a given timestamp + dataset_version = client.read_dataset_version( + dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1) + ) + # Assign that version a new tag + client.update_dataset_tags( + dataset_name="my-dataset", + as_of=dataset_version.as_of, + tag="prod", + ) """ if dataset_name is not None: dataset_id = self.read_dataset(dataset_name=dataset_name).id @@ -3175,13 +3345,13 @@ def list_dataset_versions( """List dataset versions. Args: - dataset_id (Optional[ID_TYPE]): The ID of the dataset. + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset. dataset_name (Optional[str]): The name of the dataset. search (Optional[str]): The search query. limit (Optional[int]): The maximum number of versions to return. - Returns: - Iterator[ls_schemas.DatasetVersion]: An iterator of dataset versions. + Yields: + The dataset versions. """ if dataset_id is None: dataset_id = self.read_dataset(dataset_name=dataset_name).id @@ -3219,25 +3389,23 @@ def read_dataset_version( tag (Optional[str]): The tag of the dataset to retrieve. Returns: - ls_schemas.DatasetVersion: The dataset version. - + DatasetVersion: The dataset version. Examples: - --------- - .. code-block:: python + .. code-block:: python - # Get the latest version of a dataset - client.read_dataset_version(dataset_name="my-dataset", tag="latest") + # Get the latest version of a dataset + client.read_dataset_version(dataset_name="my-dataset", tag="latest") - # Get the version of a dataset <= a given timestamp - client.read_dataset_version( - dataset_name="my-dataset", - as_of=datetime.datetime(2024, 1, 1), - ) + # Get the version of a dataset <= a given timestamp + client.read_dataset_version( + dataset_name="my-dataset", + as_of=datetime.datetime(2024, 1, 1), + ) - # Get the version of a dataset with a specific tag - client.read_dataset_version(dataset_name="my-dataset", tag="prod") + # Get the version of a dataset with a specific tag + client.read_dataset_version(dataset_name="my-dataset", tag="prod") """ if dataset_id is None: dataset_id = self.read_dataset(dataset_name=dataset_name).id @@ -3264,10 +3432,13 @@ def clone_public_dataset( Args: token_or_url (str): The token of the public dataset to clone. - source_api_url: The URL of the langsmith server where the data is hosted. + source_api_url (Optional[str]): The URL of the langsmith server where the data is hosted. Defaults to the API URL of your current client. - dataset_name (str): The name of the dataset to create in your tenant. + dataset_name (Optional[str]): The name of the dataset to create in your tenant. Defaults to the name of the public dataset. + + Returns: + Dataset: The cloned dataset. """ source_api_url = source_api_url or self.api_url source_api_url, token_uuid = _parse_token_or_url(token_or_url, source_api_url) @@ -3334,7 +3505,23 @@ def create_llm_example( dataset_name: Optional[str] = None, created_at: Optional[datetime.datetime] = None, ) -> ls_schemas.Example: - """Add an example (row) to an LLM-type dataset.""" + """Add an example (row) to an LLM-type dataset. + + Args: + prompt (str): + The input prompt for the example. + generation (Optional[str]): + The output generation for the example. + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset. + dataset_name (Optional[str]): + The name of the dataset. + created_at (Optional[datetime.datetime]): + The creation timestamp of the example. + + Returns: + Example: The created example + """ return self.create_example( inputs={"input": prompt}, outputs={"output": generation}, @@ -3354,7 +3541,23 @@ def create_chat_example( dataset_name: Optional[str] = None, created_at: Optional[datetime.datetime] = None, ) -> ls_schemas.Example: - """Add an example (row) to a Chat-type dataset.""" + """Add an example (row) to a Chat-type dataset. + + Args: + messages (List[Union[Mapping[str, Any], BaseMessageLike]]): + The input messages for the example. + generations (Optional[Union[Mapping[str, Any], BaseMessageLike]]): + The output messages for the example. + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset. + dataset_name (Optional[str]): + The name of the dataset. + created_at (Optional[datetime.datetime]): + The creation timestamp of the example. + + Returns: + Example: The created example + """ final_input = [] for message in messages: if ls_utils.is_base_message_like(message): @@ -3390,7 +3593,17 @@ def create_example_from_run( dataset_name: Optional[str] = None, created_at: Optional[datetime.datetime] = None, ) -> ls_schemas.Example: - """Add an example (row) to a dataset from a run.""" + """Add an example (row) to a dataset from a run. + + Args: + run (Run): The run to create an example from. + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset. + dataset_name (Optional[str]): The name of the dataset. + created_at (Optional[datetime.datetime]): The creation timestamp of the example. + + Returns: + Example: The created example + """ if dataset_id is None: dataset_id = self.read_dataset(dataset_name=dataset_name).id dataset_name = None # Nested call expects only 1 defined @@ -3625,7 +3838,15 @@ def update_examples_multipart( dataset_id: ID_TYPE, updates: Optional[List[ls_schemas.ExampleUpdateWithAttachments]] = None, ) -> ls_schemas.UpsertExamplesResponse: - """Upload examples.""" + """Update examples using multipart. + + Args: + dataset_id (Union[UUID, str]): The ID of the dataset to update. + updates (Optional[List[ExampleUpdateWithAttachments]]): The updates to apply to the examples. + + Raises: + ValueError: If the multipart examples endpoint is not enabled. + """ if not (self.info.instance_flags or {}).get( "dataset_examples_multipart_enabled", False ): @@ -3657,7 +3878,18 @@ def upload_examples_multipart( dataset_id: ID_TYPE, uploads: Optional[List[ls_schemas.ExampleUploadWithAttachments]] = None, ) -> ls_schemas.UpsertExamplesResponse: - """Upload examples.""" + """Upload examples using multipart. + + Args: + dataset_id (Union[UUID, str]): The ID of the dataset to upload to. + uploads (Optional[List[ExampleUploadWithAttachments]]): The examples to upload. + + Returns: + ls_schemas.UpsertExamplesResponse: The count and ids of the successfully uploaded examples + + Raises: + ValueError: If the multipart examples endpoint is not enabled. + """ if not (self.info.instance_flags or {}).get( "dataset_examples_multipart_enabled", False ): @@ -3691,7 +3923,6 @@ def upsert_examples_multipart( .. deprecated:: 0.1.0 This method is deprecated. Use :func:`langsmith.upload_examples_multipart` instead. - """ # noqa: E501 if not (self.info.instance_flags or {}).get( "examples_multipart_enabled", False @@ -3733,25 +3964,31 @@ def create_examples( ) -> None: """Create examples in a dataset. - Parameters - ---------- - inputs : Sequence[Mapping[str, Any]] - The input values for the examples. - outputs : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None - The output values for the examples. - metadata : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None - The metadata for the examples. - splits : Optional[Sequence[Optional[str | List[str]]]], default=None - The splits for the examples, which are divisions - of your dataset such as 'train', 'test', or 'validation'. - source_run_ids : Optional[Sequence[Optional[ID_TYPE]]], default=None - The IDs of the source runs associated with the examples. - ids : Optional[Sequence[ID_TYPE]], default=None - The IDs of the examples. - dataset_id : Optional[ID_TYPE], default=None - The ID of the dataset to create the examples in. - dataset_name : Optional[str], default=None - The name of the dataset to create the examples in. + Args: + inputs (Sequence[Mapping[str, Any]]): + The input values for the examples. + outputs (Optional[Sequence[Optional[Mapping[str, Any]]]]): + The output values for the examples. + metadata (Optional[Sequence[Optional[Mapping[str, Any]]]]): + The metadata for the examples. + splits (Optional[Sequence[Optional[str | List[str]]]]): + The splits for the examples, which are divisions + of your dataset such as 'train', 'test', or 'validation'. + source_run_ids (Optional[Sequence[Optional[Union[UUID, str]]]]): + The IDs of the source runs associated with the examples. + ids (Optional[Sequence[Union[UUID, str]]]): + The IDs of the examples. + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to create the examples in. + dataset_name (Optional[str]): + The name of the dataset to create the examples in. + **kwargs: Any: Additional keyword arguments are ignored. + + Raises: + ValueError: If neither dataset_id nor dataset_name is provided. + + Returns: + None """ if dataset_id is None and dataset_name is None: raise ValueError("Either dataset_id or dataset_name must be provided.") @@ -3822,25 +4059,25 @@ def create_example( for a model or chain. Args: - inputs : Mapping[str, Any] + inputs (Mapping[str, Any]): The input values for the example. - dataset_id : UUID or None, default=None + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to create the example in. - dataset_name : str or None, default=None + dataset_name (Optional[str]): The name of the dataset to create the example in. - created_at : datetime or None, default=None + created_at (Optional[datetime.datetime]): The creation timestamp of the example. - outputs : Mapping[str, Any] or None, default=None + outputs (Optional[Mapping[str, Any]]): The output values for the example. - metadata : Mapping[str, Any] or None, default=None + metadata (Optional[Mapping[str, Any]]): The metadata for the example. - split : str or List[str] or None, default=None + split (Optional[str | List[str]]): The splits for the example, which are divisions of your dataset such as 'train', 'test', or 'validation'. - example_id : UUID or None, default=None + example_id (Optional[Union[UUID, str]]): The ID of the example to create. If not provided, a new example will be created. - source_run_id : UUID or None, default=None + source_run_id (Optional[Union[UUID, str]]): The ID of the source run associated with this example. Returns: @@ -3880,7 +4117,11 @@ def read_example( """Read an example from the LangSmith API. Args: - example_id (UUID): The ID of the example to read. + example_id (Union[UUID, str]): The ID of the example to read. + as_of (Optional[datetime.datetime]): The dataset version tag OR + timestamp to retrieve the example as of. + Response examples will only be those that were present at the time + of the tagged (or timestamped) version. Returns: Example: The example. @@ -3931,34 +4172,39 @@ def list_examples( """Retrieve the example rows of the specified dataset. Args: - dataset_id (UUID, optional): The ID of the dataset to filter by. + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to filter by. Defaults to None. - dataset_name (str, optional): The name of the dataset to filter by. + dataset_name (Optional[str]): The name of the dataset to filter by. Defaults to None. - example_ids (List[UUID], optional): The IDs of the examples to filter by. + example_ids (Optional[Sequence[Union[UUID, str]]): The IDs of the examples to filter by. Defaults to None. - as_of (datetime, str, or optional): The dataset version tag OR + as_of (Optional[Union[datetime.datetime, str]]): The dataset version tag OR timestamp to retrieve the examples as of. Response examples will only be those that were present at the time of the tagged (or timestamped) version. - splits (List[str], optional): A list of dataset splits, which are + splits (Optional[Sequence[str]]): A list of dataset splits, which are divisions of your dataset such as 'train', 'test', or 'validation'. Returns examples only from the specified splits. - inline_s3_urls (bool, optional): Whether to inline S3 URLs. + inline_s3_urls (bool, default=True): Whether to inline S3 URLs. Defaults to True. - offset (int): The offset to start from. Defaults to 0. - limit (int, optional): The maximum number of examples to return. - filter (str, optional): A structured fileter string to apply to + offset (int, default=0): The offset to start from. Defaults to 0. + limit (Optional[int]): The maximum number of examples to return. + metadata (Optional[dict]): A dictionary of metadata to filter by. + filter (Optional[str]): A structured fileter string to apply to the examples. + include_attachments (bool, default=False): Whether to include the + attachments in the response. Defaults to False. + **kwargs (Any): Additional keyword arguments are ignored. Yields: - Example: The examples. + The examples. Examples: List all examples for a dataset: .. code-block:: python + from langsmith import Client client = Client() @@ -3970,6 +4216,7 @@ def list_examples( List examples by id .. code-block:: python + example_ids = [ '734fc6a0-c187-4266-9721-90b7a025751a', 'd6b4c1b9-6160-4d63-9b61-b034c585074f', @@ -3980,11 +4227,13 @@ def list_examples( List examples by metadata .. code-block:: python + examples = client.list_examples(dataset_name=dataset_name, metadata={"foo": "bar"}) List examples by structured filter .. code-block:: python + examples = client.list_examples( dataset_name=dataset_name, filter='and(not(has(metadata, \'{"foo": "bar"}\')), exists(metadata, "tenant_id"))' @@ -4050,16 +4299,14 @@ def index_dataset( ``client.similar_examples()``. Args: - dataset_id (UUID): The ID of the dataset to index. - tag (str, optional): The version of the dataset to index. If 'latest' + dataset_id (Union[UUID, str]): The ID of the dataset to index. + tag (Optional[str]): The version of the dataset to index. If 'latest' then any updates to the dataset (additions, updates, deletions of examples) will be reflected in the index. + **kwargs (Any): Additional keyword arguments to pass as part of request body. Returns: None - - Raises: - requests.HTTPError """ # noqa: E501 dataset_id = _as_uuid(dataset_id, "dataset_id") resp = self.request_with_retries( @@ -4091,14 +4338,17 @@ def similar_examples( inputs (dict): The inputs to use as a search query. Must match the dataset input schema. Must be JSON serializable. limit (int): The maximum number of examples to return. - dataset_id (str or UUID): The ID of the dataset to search over. - filter (str, optional): A filter string to apply to the search results. Uses + dataset_id (Union[UUID, str]): The ID of the dataset to search over. + filter (Optional[str]): A filter string to apply to the search results. Uses the same syntax as the `filter` parameter in `list_runs()`. Only a subset of operations are supported. Defaults to None. For example, you can use ``and(eq(metadata.some_tag, 'some_value'), neq(metadata.env, 'dev'))`` to filter only examples where some_tag has some_value, and the environment is not dev. - kwargs (Any): Additional keyword args to pass as part of request body. + **kwargs: Additional keyword arguments to pass as part of request body. + + Returns: + list[ExampleSearch]: List of ExampleSearch objects. Examples: .. code-block:: python @@ -4112,7 +4362,7 @@ def similar_examples( dataset_id="...", ) - .. code-block:: pycon + .. code-block:: python [ ExampleSearch( @@ -4173,26 +4423,25 @@ def update_example( ) -> Dict[str, Any]: """Update a specific example. - Parameters - ---------- - example_id : str or UUID - The ID of the example to update. - inputs : Dict[str, Any] or None, default=None - The input values to update. - outputs : Mapping[str, Any] or None, default=None - The output values to update. - metadata : Dict or None, default=None - The metadata to update. - split : str or List[str] or None, default=None - The dataset split to update, such as - 'train', 'test', or 'validation'. - dataset_id : UUID or None, default=None - The ID of the dataset to update. + Args: + example_id (Union[UUID, str]): + The ID of the example to update. + inputs (Optional[Dict[str, Any]]): + The input values to update. + outputs (Optional[Mapping[str, Any]]): + The output values to update. + metadata (Optional[Dict]): + The metadata to update. + split (Optional[str | List[str]]): + The dataset split to update, such as + 'train', 'test', or 'validation'. + dataset_id (Optional[Union[UUID, str]]): + The ID of the dataset to update. + attachments_operations (Optional[AttachmentsOperations]): + The attachments operations to perform. Returns: - ------- - Dict[str, Any] - The updated example. + Dict[str, Any]: The updated example. """ if attachments_operations is not None: if not (self.info.instance_flags or {}).get( @@ -4233,26 +4482,25 @@ def update_examples( ) -> Dict[str, Any]: """Update multiple examples. - Parameters - ---------- - example_ids : Sequence[ID_TYPE] - The IDs of the examples to update. - inputs : Optional[Sequence[Optional[Dict[str, Any]]], default=None - The input values for the examples. - outputs : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None - The output values for the examples. - metadata : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None - The metadata for the examples. - split : Optional[Sequence[Optional[str | List[str]]]], default=None - The splits for the examples, which are divisions - of your dataset such as 'train', 'test', or 'validation'. - dataset_ids : Optional[Sequence[Optional[ID_TYPE]]], default=None - The IDs of the datasets to move the examples to. + Args: + example_ids (Sequence[Union[UUID, str]]): + The IDs of the examples to update. + inputs (Optional[Sequence[Optional[Dict[str, Any]]]): + The input values for the examples. + outputs (Optional[Sequence[Optional[Mapping[str, Any]]]]): + The output values for the examples. + metadata (Optional[Sequence[Optional[Mapping[str, Any]]]]): + The metadata for the examples. + splits (Optional[Sequence[Optional[str | List[str]]]]): + The splits for the examples, which are divisions + of your dataset such as 'train', 'test', or 'validation'. + dataset_ids (Optional[Sequence[Optional[Union[UUID, str]]]]): + The IDs of the datasets to move the examples to. + attachments_operations (Optional[Sequence[Optional[ls_schemas.AttachmentsOperations]]): + The operations to perform on the attachments. Returns: - ------- - Dict[str, Any] - The response from the server (specifies the number of examples updated). + Dict[str, Any]: The response from the server (specifies the number of examples updated). """ if attachments_operations is not None: if not (self.info.instance_flags or {}).get( @@ -4316,10 +4564,12 @@ def update_examples( def delete_example(self, example_id: ID_TYPE) -> None: """Delete an example by ID. - Parameters - ---------- - example_id : str or UUID - The ID of the example to delete. + Args: + example_id (Union[UUID, str]): + The ID of the example to delete. + + Returns: + None """ response = self.request_with_retries( "DELETE", @@ -4338,13 +4588,14 @@ def list_dataset_splits( """Get the splits for a dataset. Args: - dataset_id (ID_TYPE): The ID of the dataset. - as_of (Optional[Union[str, datetime.datetime]], optional): The version + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset. + dataset_name (Optional[str]): The name of the dataset. + as_of (Optional[Union[str, datetime.datetime]]): The version of the dataset to retrieve splits for. Can be a timestamp or a string tag. Defaults to "latest". Returns: - List[str]: The names of this dataset's. + List[str]: The names of this dataset's splits. """ if dataset_id is None: if dataset_name is None: @@ -4376,11 +4627,12 @@ def update_dataset_splits( """Update the splits for a dataset. Args: - dataset_id (ID_TYPE): The ID of the dataset to update. + dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to update. + dataset_name (Optional[str]): The name of the dataset to update. split_name (str): The name of the split to update. - example_ids (List[ID_TYPE]): The IDs of the examples to add to or + example_ids (List[Union[UUID, str]]): The IDs of the examples to add to or remove from the split. - remove (bool, optional): If True, remove the examples from the split. + remove (Optional[bool]): If True, remove the examples from the split. If False, add the examples to the split. Defaults to False. Returns: @@ -4411,22 +4663,17 @@ def _resolve_run_id( ) -> ls_schemas.Run: """Resolve the run ID. - Parameters - ---------- - run : Run or RunBase or str or UUID - The run to resolve. - load_child_runs : bool - Whether to load child runs. + Args: + run (Union[Run, RunBase, str, UUID]): + The run to resolve. + load_child_runs (bool): + Whether to load child runs. Returns: - ------- - Run - The resolved run. + Run: The resolved run. Raises: - ------ - TypeError - If the run type is invalid. + TypeError: If the run type is invalid. """ if isinstance(run, (str, uuid.UUID)): run_ = self.read_run(run, load_child_runs=load_child_runs) @@ -4441,17 +4688,14 @@ def _resolve_example_id( ) -> Optional[ls_schemas.Example]: """Resolve the example ID. - Parameters - ---------- - example : Example or str or UUID or dict or None - The example to resolve. - run : Run - The run associated with the example. + Args: + example (Optional[Union[Example, str, UUID, dict]]): + The example to resolve. + run (Run): + The run associated with the example. Returns: - ------- - Example or None - The resolved example. + Optional[Example]: The resolved example. """ if isinstance(example, (str, uuid.UUID)): reference_example_ = self.read_example(example) @@ -4521,25 +4765,22 @@ def evaluate_run( ) -> ls_evaluator.EvaluationResult: """Evaluate a run. - Parameters - ---------- - run : Run or RunBase or str or UUID - The run to evaluate. - evaluator : RunEvaluator - The evaluator to use. - source_info : Dict[str, Any] or None, default=None - Additional information about the source of the evaluation to log - as feedback metadata. - reference_example : Example or str or dict or UUID or None, default=None - The example to use as a reference for the evaluation. - If not provided, the run's reference example will be used. - load_child_runs : bool, default=False - Whether to load child runs when resolving the run ID. + Args: + run (Union[Run, RunBase, str, UUID]): + The run to evaluate. + evaluator (RunEvaluator): + The evaluator to use. + source_info (Optional[Dict[str, Any]]): + Additional information about the source of the evaluation to log + as feedback metadata. + reference_example (Optional[Union[Example, str, dict, UUID]]): + The example to use as a reference for the evaluation. + If not provided, the run's reference example will be used. + load_child_runs (bool, default=False): + Whether to load child runs when resolving the run ID. Returns: - ------- - Feedback - The feedback object created by the evaluation. + Feedback: The feedback object created by the evaluation. """ run_ = self._resolve_run_id(run, load_child_runs=load_child_runs) reference_example_ = self._resolve_example_id(reference_example, run_) @@ -4616,25 +4857,22 @@ async def aevaluate_run( ) -> ls_evaluator.EvaluationResult: """Evaluate a run asynchronously. - Parameters - ---------- - run : Run or str or UUID - The run to evaluate. - evaluator : RunEvaluator - The evaluator to use. - source_info : Dict[str, Any] or None, default=None - Additional information about the source of the evaluation to log - as feedback metadata. - reference_example : Optional Example or UUID, default=None - The example to use as a reference for the evaluation. - If not provided, the run's reference example will be used. - load_child_runs : bool, default=False - Whether to load child runs when resolving the run ID. + Args: + run (Union[Run, str, UUID]): + The run to evaluate. + evaluator (RunEvaluator): + The evaluator to use. + source_info (Optional[Dict[str, Any]]): + Additional information about the source of the evaluation to log + as feedback metadata. + reference_example (Optional[Union[Example, str, dict, UUID]]): + The example to use as a reference for the evaluation. + If not provided, the run's reference example will be used. + load_child_runs (bool, default=False) + Whether to load child runs when resolving the run ID. Returns: - ------- - EvaluationResult - The evaluation result object created by the evaluation. + EvaluationResult: The evaluation result object created by the evaluation. """ run_ = self._resolve_run_id(run, load_child_runs=load_child_runs) reference_example_ = self._resolve_example_id(reference_example, run_) @@ -4676,52 +4914,55 @@ def create_feedback( ) -> ls_schemas.Feedback: """Create a feedback in the LangSmith API. - Parameters - ---------- - run_id : str or UUID - The ID of the run to provide feedback for. Either the run_id OR - the project_id must be provided. - trace_id : str or UUID - The trace ID of the run to provide feedback for. This is optional. - key : str - The name of the metric or 'aspect' this feedback is about. - score : float or int or bool or None, default=None - The score to rate this run on the metric or aspect. - value : float or int or bool or str or dict or None, default=None - The display value or non-numeric value for this feedback. - correction : dict or None, default=None - The proper ground truth for this run. - comment : str or None, default=None - A comment about this feedback, such as a justification for the score or - chain-of-thought trajectory for an LLM judge. - source_info : Dict[str, Any] or None, default=None - Information about the source of this feedback. - feedback_source_type : FeedbackSourceType or str, default=FeedbackSourceType.API - The type of feedback source, such as model (for model-generated feedback) - or API. - source_run_id : str or UUID or None, default=None, - The ID of the run that generated this feedback, if a "model" type. - feedback_id : str or UUID or None, default=None - The ID of the feedback to create. If not provided, a random UUID will be - generated. - feedback_config: langsmith.schemas.FeedbackConfig or None, default=None, - The configuration specifying how to interpret feedback with this key. - Examples include continuous (with min/max bounds), categorical, - or freeform. - stop_after_attempt : int, default=10 - The number of times to retry the request before giving up. - project_id : str or UUID - The ID of the project_id to provide feedback on. One - and only one - of - this and run_id must be provided. - comparative_experiment_id : str or UUID - If this feedback was logged as a part of a comparative experiment, this - associates the feedback with that experiment. - feedback_group_id : str or UUID - When logging preferences, ranking runs, or other comparative feedback, - this is used to group feedback together. - extra : dict - Metadata for the feedback. - trace_id: Optional[ID_TYPE] = The trace ID of the run to provide feedback for. Enables batch ingestion. + Args: + run_id (Optional[Union[UUID, str]]): + The ID of the run to provide feedback for. Either the run_id OR + the project_id must be provided. + key (str): + The name of the metric or 'aspect' this feedback is about. + score (Optional[Union[float, int, bool]]): + The score to rate this run on the metric or aspect. + value (Optional[Union[float, int, bool, str, dict]]): + The display value or non-numeric value for this feedback. + correction (Optional[dict]): + The proper ground truth for this run. + comment (Optional[str]): + A comment about this feedback, such as a justification for the score or + chain-of-thought trajectory for an LLM judge. + source_info (Optional[Dict[str, Any]]): + Information about the source of this feedback. + feedback_source_type (Union[FeedbackSourceType, str]): + The type of feedback source, such as model (for model-generated feedback) + or API. + source_run_id (Optional[Union[UUID, str]]): + The ID of the run that generated this feedback, if a "model" type. + feedback_id (Optional[Union[UUID, str]]): + The ID of the feedback to create. If not provided, a random UUID will be + generated. + feedback_config (Optional[FeedbackConfig]): + The configuration specifying how to interpret feedback with this key. + Examples include continuous (with min/max bounds), categorical, + or freeform. + stop_after_attempt (int, default=10): + The number of times to retry the request before giving up. + project_id (Optional[Union[UUID, str]]): + The ID of the project_id to provide feedback on. One - and only one - of + this and run_id must be provided. + comparative_experiment_id (Optional[Union[UUID, str]]): + If this feedback was logged as a part of a comparative experiment, this + associates the feedback with that experiment. + feedback_group_id (Optional[Union[UUID, str]]): + When logging preferences, ranking runs, or other comparative feedback, + this is used to group feedback together. + extra (Optional[Dict]): + Metadata for the feedback. + trace_id (Optional[Union[UUID, str]]): + The trace ID of the run to provide feedback for. Enables batch ingestion. + **kwargs (Any): + Additional keyword arguments. + + Returns: + Feedback: The created feedback object. """ if run_id is None and project_id is None: raise ValueError("One of run_id and project_id must be provided") @@ -4830,18 +5071,20 @@ def update_feedback( ) -> None: """Update a feedback in the LangSmith API. - Parameters - ---------- - feedback_id : str or UUID - The ID of the feedback to update. - score : float or int or bool or None, default=None - The score to update the feedback with. - value : float or int or bool or str or dict or None, default=None - The value to update the feedback with. - correction : dict or None, default=None - The correction to update the feedback with. - comment : str or None, default=None - The comment to update the feedback with. + Args: + feedback_id (Union[UUID, str]): + The ID of the feedback to update. + score (Optional[Union[float, int, bool]]): + The score to update the feedback with. + value (Optional[Union[float, int, bool, str, dict]]): + The value to update the feedback with. + correction (Optional[dict]): + The correction to update the feedback with. + comment (Optional[str]): + The comment to update the feedback with. + + Returns: + None """ feedback_update: Dict[str, Any] = {} if score is not None: @@ -4863,15 +5106,12 @@ def update_feedback( def read_feedback(self, feedback_id: ID_TYPE) -> ls_schemas.Feedback: """Read a feedback from the LangSmith API. - Parameters - ---------- - feedback_id : str or UUID - The ID of the feedback to read. + Args: + feedback_id (Union[UUID, str]): + The ID of the feedback to read. Returns: - ------- - Feedback - The feedback. + Feedback: The feedback. """ response = self.request_with_retries( "GET", @@ -4890,23 +5130,20 @@ def list_feedback( ) -> Iterator[ls_schemas.Feedback]: """List the feedback objects on the LangSmith API. - Parameters - ---------- - run_ids : List[str or UUID] or None, default=None - The IDs of the runs to filter by. - feedback_key: List[str] or None, default=None - The feedback key(s) to filter by. Example: 'correctness' - The query performs a union of all feedback keys. - feedback_source_type: List[FeedbackSourceType] or None, default=None - The type of feedback source, such as model - (for model-generated feedback) or API. - limit : int or None, default=None - **kwargs : Any - Additional keyword arguments. + Args: + run_ids (Optional[Sequence[Union[UUID, str]]]): + The IDs of the runs to filter by. + feedback_key (Optional[Sequence[str]]): + The feedback key(s) to filter by. Examples: 'correctness' + The query performs a union of all feedback keys. + feedback_source_type (Optional[Sequence[FeedbackSourceType]]): + The type of feedback source, such as model or API. + limit (Optional[int]): + The maximum number of feedback to return. + **kwargs (Any): + Additional keyword arguments. Yields: - ------ - Feedback The feedback objects. """ params: dict = { @@ -4928,10 +5165,12 @@ def list_feedback( def delete_feedback(self, feedback_id: ID_TYPE) -> None: """Delete a feedback by ID. - Parameters - ---------- - feedback_id : str or UUID - The ID of the feedback to delete. + Args: + feedback_id (Union[UUID, str]): + The ID of the feedback to delete. + + Returns: + None """ response = self.request_with_retries( "DELETE", @@ -4955,22 +5194,22 @@ def create_feedback_from_token( Args: token_or_url (Union[str, uuid.UUID]): The token or URL from which to create feedback. - score (Union[float, int, bool, None], optional): The score of the feedback. + score (Optional[Union[float, int, bool]]): The score of the feedback. Defaults to None. - value (Union[float, int, bool, str, dict, None], optional): The value of the + value (Optional[Union[float, int, bool, str, dict]]): The value of the feedback. Defaults to None. - correction (Union[dict, None], optional): The correction of the feedback. + correction (Optional[dict]): The correction of the feedback. Defaults to None. - comment (Union[str, None], optional): The comment of the feedback. Defaults + comment (Optional[str]): The comment of the feedback. Defaults to None. - metadata (Optional[dict], optional): Additional metadata for the feedback. + metadata (Optional[dict]): Additional metadata for the feedback. Defaults to None. Raises: ValueError: If the source API URL is invalid. Returns: - None: This method does not return anything. + None """ source_api_url, token_uuid = _parse_token_or_url( token_or_url, self.api_url, num_parts=1 @@ -5010,21 +5249,23 @@ def create_presigned_feedback_token( API key. Args: - run_id: - feedback_key: - expiration: The expiration time of the pre-signed URL. + run_id (Union[UUID, str]): + The ID of the run. + feedback_key (str): + The key of the feedback to create. + expiration (Optional[datetime.datetime | datetime.timedelta]): The expiration time of the pre-signed URL. Either a datetime or a timedelta offset from now. Default to 3 hours. - feedback_config: FeedbackConfig or None. + feedback_config (Optional[FeedbackConfig]): If creating a feedback_key for the first time, this defines how the metric should be interpreted, such as a continuous score (w/ optional bounds), or distribution over categorical values. - feedback_id: The ID of the feedback to create. If not provided, a new + feedback_id (Optional[Union[UUID, str]): The ID of the feedback to create. If not provided, a new feedback will be created. Returns: - The pre-signed URL for uploading feedback data. + FeedbackIngestToken: The pre-signed URL for uploading feedback data. """ body: Dict[str, Any] = { "run_id": run_id, @@ -5074,19 +5315,21 @@ def create_presigned_feedback_tokens( API key. Args: - run_id: - feedback_key: - expiration: The expiration time of the pre-signed URL. + run_id (Union[UUID, str]): + The ID of the run. + feedback_keys (Sequence[str]): + The key of the feedback to create. + expiration (Optional[datetime.datetime | datetime.timedelta]): The expiration time of the pre-signed URL. Either a datetime or a timedelta offset from now. Default to 3 hours. - feedback_config: FeedbackConfig or None. + feedback_configs (Optional[Sequence[Optional[FeedbackConfig]]]): If creating a feedback_key for the first time, this defines how the metric should be interpreted, such as a continuous score (w/ optional bounds), or distribution over categorical values. Returns: - The pre-signed URL for uploading feedback data. + Sequence[FeedbackIngestToken]: The pre-signed URL for uploading feedback data. """ # validate if feedback_configs is not None and len(feedback_keys) != len(feedback_configs): @@ -5166,12 +5409,11 @@ def list_presigned_feedback_tokens( """List the feedback ingest tokens for a run. Args: - run_id: The ID of the run to filter by. - limit: The maximum number of tokens to return. + run_id (Union[UUID, str]): The ID of the run to filter by. + limit (Optional[int]): The maximum number of tokens to return. Yields: - FeedbackIngestToken - The feedback ingest tokens. + The feedback ingest tokens. """ params = { "run_id": _as_uuid(run_id, "run_id"), @@ -5197,17 +5439,17 @@ def list_annotation_queues( """List the annotation queues on the LangSmith API. Args: - queue_ids : List[str or UUID] or None, default=None + queue_ids (Optional[List[Union[UUID, str]]]): The IDs of the queues to filter by. - name : str or None, default=None + name (Optional[str]): The name of the queue to filter by. - name_contains : str or None, default=None + name_contains (Optional[str]): The substring that the queue name should contain. - limit : int or None, default=None + limit (Optional[int]): + The maximum number of queues to return. Yields: - AnnotationQueue - The annotation queues. + The annotation queues. """ params: dict = { "ids": ( @@ -5238,16 +5480,15 @@ def create_annotation_queue( """Create an annotation queue on the LangSmith API. Args: - name : str + name (str): The name of the annotation queue. - description : str, optional + description (Optional[str]): The description of the annotation queue. - queue_id : str or UUID, optional + queue_id (Optional[Union[UUID, str]]): The ID of the annotation queue. Returns: - AnnotationQueue - The created annotation queue object. + AnnotationQueue: The created annotation queue object. """ body = { "name": name, @@ -5268,10 +5509,10 @@ def read_annotation_queue(self, queue_id: ID_TYPE) -> ls_schemas.AnnotationQueue """Read an annotation queue with the specified queue ID. Args: - queue_id (ID_TYPE): The ID of the annotation queue to read. + queue_id (Union[UUID, str]): The ID of the annotation queue to read. Returns: - ls_schemas.AnnotationQueue: The annotation queue object. + AnnotationQueue: The annotation queue object. """ # TODO: Replace when actual endpoint is added return next(self.list_annotation_queues(queue_ids=[queue_id])) @@ -5282,10 +5523,13 @@ def update_annotation_queue( """Update an annotation queue with the specified queue_id. Args: - queue_id (ID_TYPE): The ID of the annotation queue to update. + queue_id (Union[UUID, str]): The ID of the annotation queue to update. name (str): The new name for the annotation queue. - description (Optional[str], optional): The new description for the + description (Optional[str]): The new description for the annotation queue. Defaults to None. + + Returns: + None """ response = self.request_with_retries( "PATCH", @@ -5301,7 +5545,10 @@ def delete_annotation_queue(self, queue_id: ID_TYPE) -> None: """Delete an annotation queue with the specified queue ID. Args: - queue_id (ID_TYPE): The ID of the annotation queue to delete. + queue_id (Union[UUID, str]): The ID of the annotation queue to delete. + + Returns: + None """ response = self.request_with_retries( "DELETE", @@ -5316,9 +5563,12 @@ def add_runs_to_annotation_queue( """Add runs to an annotation queue with the specified queue ID. Args: - queue_id (ID_TYPE): The ID of the annotation queue. - run_ids (List[ID_TYPE]): The IDs of the runs to be added to the annotation + queue_id (Union[UUID, str]): The ID of the annotation queue. + run_ids (List[Union[UUID, str]]): The IDs of the runs to be added to the annotation queue. + + Returns: + None """ response = self.request_with_retries( "POST", @@ -5333,9 +5583,12 @@ def delete_run_from_annotation_queue( """Delete a run from an annotation queue with the specified queue ID and run ID. Args: - queue_id (ID_TYPE): The ID of the annotation queue. - run_id (ID_TYPE): The ID of the run to be added to the annotation + queue_id (Union[UUID, str]): The ID of the annotation queue. + run_id (Union[UUID, str]): The ID of the run to be added to the annotation queue. + + Returns: + None """ response = self.request_with_retries( "DELETE", @@ -5349,15 +5602,15 @@ def get_run_from_annotation_queue( """Get a run from an annotation queue at the specified index. Args: - queue_id (ID_TYPE): The ID of the annotation queue. + queue_id (Union[UUID, str]): The ID of the annotation queue. index (int): The index of the run to retrieve. Returns: - ls_schemas.RunWithAnnotationQueueInfo: The run at the specified index. + RunWithAnnotationQueueInfo: The run at the specified index. Raises: - ls_utils.LangSmithNotFoundError: If the run is not found at the given index. - ls_utils.LangSmithError: For other API-related errors. + LangSmithNotFoundError: If the run is not found at the given index. + LangSmithError: For other API-related errors. """ base_url = f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}/run" response = self.request_with_retries( @@ -5384,15 +5637,16 @@ def create_comparative_experiment( These experiments compare 2 or more experiment results over a shared dataset. Args: - name: The name of the comparative experiment. - experiments: The IDs of the experiments to compare. - reference_dataset: The ID of the dataset these experiments are compared on. - description: The description of the comparative experiment. - created_at: The creation time of the comparative experiment. - metadata: Additional metadata for the comparative experiment. + name (str): The name of the comparative experiment. + experiments (Sequence[Union[UUID, str]]): The IDs of the experiments to compare. + reference_dataset (Optional[Union[UUID, str]]): The ID of the dataset these experiments are compared on. + description (Optional[str]): The description of the comparative experiment. + created_at (Optional[datetime.datetime]): The creation time of the comparative experiment. + metadata (Optional[Dict[str, Any]]): Additional metadata for the comparative experiment. + id (Optional[Union[UUID, str]]): The ID of the comparative experiment. Returns: - The created comparative experiment object. + ComparativeExperiment: The created comparative experiment object. """ if not experiments: raise ValueError("At least one experiment is required.") @@ -5445,7 +5699,6 @@ async def arun_on_dataset( .. deprecated:: 0.1.0 This method is deprecated. Use :func:`langsmith.aevaluate` instead. - """ # noqa: E501 warnings.warn( "The `arun_on_dataset` method is deprecated and" @@ -5494,7 +5747,6 @@ def run_on_dataset( .. deprecated:: 0.1.0 This method is deprecated. Use :func:`langsmith.aevaluate` instead. - """ # noqa: E501 # noqa: E501 warnings.warn( "The `run_on_dataset` method is deprecated and" @@ -5554,8 +5806,8 @@ def _get_latest_commit_hash( Args: prompt_owner_and_name (str): The owner and name of the prompt. - limit (int): The maximum number of commits to fetch. Defaults to 1. - offset (int): The number of commits to skip. Defaults to 0. + limit (int, default=1): The maximum number of commits to fetch. Defaults to 1. + offset (int, default=0): The number of commits to skip. Defaults to 0. Returns: Optional[str]: The latest commit hash, or None if no commits are found. @@ -5633,7 +5885,7 @@ def like_prompt(self, prompt_identifier: str) -> Dict[str, int]: prompt_identifier (str): The identifier of the prompt. Returns: - A dictionary with the key 'likes' and the count of likes as the value. + Dict[str, int]: A dictionary with the key 'likes' and the count of likes as the value. """ return self._like_or_unlike_prompt(prompt_identifier, like=True) @@ -5645,7 +5897,7 @@ def unlike_prompt(self, prompt_identifier: str) -> Dict[str, int]: prompt_identifier (str): The identifier of the prompt. Returns: - A dictionary with the key 'likes' and the count of likes as the value. + Dict[str, int]: A dictionary with the key 'likes' and the count of likes as the value. """ return self._like_or_unlike_prompt(prompt_identifier, like=False) @@ -5664,18 +5916,18 @@ def list_prompts( """List prompts with pagination. Args: - limit (int): The maximum number of prompts to return. Defaults to 100. - offset (int): The number of prompts to skip. Defaults to 0. + limit (int, default=100): The maximum number of prompts to return. Defaults to 100. + offset (int, default=0): The number of prompts to skip. Defaults to 0. is_public (Optional[bool]): Filter prompts by if they are public. is_archived (Optional[bool]): Filter prompts by if they are archived. - sort_field (ls_schemas.PromptsSortField): The field to sort by. + sort_field (PromptSortField): The field to sort by. Defaults to "updated_at". - sort_direction (Literal["desc", "asc"]): The order to sort by. + sort_direction (Literal["desc", "asc"], default="desc"): The order to sort by. Defaults to "desc". query (Optional[str]): Filter prompts by a search query. Returns: - ls_schemas.ListPromptsResponse: A response object containing + ListPromptsResponse: A response object containing the list of prompts. """ params = { @@ -5699,14 +5951,14 @@ def get_prompt(self, prompt_identifier: str) -> Optional[ls_schemas.Prompt]: Args: prompt_identifier (str): The identifier of the prompt. - The identifier should be in the format "prompt_name" or "owner/prompt_name". + The identifier should be in the format "prompt_name" or "owner/prompt_name". Returns: - Optional[ls_schemas.Prompt]: The prompt object. + Optional[Prompt]: The prompt object. Raises: requests.exceptions.HTTPError: If the prompt is not found or - another error occurs. + another error occurs. """ owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier) try: @@ -5729,14 +5981,15 @@ def create_prompt( Does not attach prompt object, just creates an empty prompt. Args: - prompt_name (str): The name of the prompt. + prompt_identifier (str): The identifier of the prompt. + The identifier should be in the formatof owner/name:hash, name:hash, owner/name, or name description (Optional[str]): A description of the prompt. readme (Optional[str]): A readme for the prompt. tags (Optional[Sequence[str]]): A list of tags for the prompt. is_public (bool): Whether the prompt should be public. Defaults to False. Returns: - ls_schemas.Prompt: The created prompt object. + Prompt: The created prompt object. Raises: ValueError: If the current tenant is not the owner. @@ -5909,7 +6162,7 @@ def pull_prompt_commit( prompt_identifier (str): The identifier of the prompt. Returns: - ls_schemas.PromptObject: The prompt object. + PromptCommit: The prompt object. Raises: ValueError: If no commits are found for the prompt. @@ -5940,15 +6193,12 @@ def list_prompt_commits( Args: prompt_identifier (str): The identifier of the prompt in the format 'owner/repo_name'. - limit (Optional[int], optional): The maximum number of commits to return. If None, returns all commits. Defaults to None. - offset (int, optional): The number of commits to skip before starting to return results. Defaults to 0. - include_model (bool, optional): Whether to include the model information in the commit data. Defaults to False. - - Returns: - Iterator[ls_schemas.ListedPromptCommit]: An iterator of ListedPromptCommit objects representing the commits. + limit (Optional[int]): The maximum number of commits to return. If None, returns all commits. Defaults to None. + offset (int, default=0): The number of commits to skip before starting to return results. Defaults to 0. + include_model (bool, default=False): Whether to include the model information in the commit data. Defaults to False. Yields: - ls_schemas.ListedPromptCommit: A ListedPromptCommit object for each commit. + A ListedPromptCommit object for each commit. Note: This method uses pagination to retrieve commits. It will make multiple API calls if necessary to retrieve all commits @@ -5996,6 +6246,7 @@ def pull_prompt( Args: prompt_identifier (str): The identifier of the prompt. + include_model (Optional[bool], default=False): Whether to include the model information in the prompt data. Returns: Any: The prompt object in the specified format. @@ -6121,7 +6372,6 @@ def push_prompt( Returns: str: The URL of the prompt. - """ # Create or update prompt metadata if self._prompt_exists(prompt_identifier): @@ -6221,41 +6471,38 @@ def evaluate( r"""Evaluate a target system on a given dataset. Args: - target (TARGET_T | Runnable | EXPERIMENT_T | Tuple[EXPERIMENT_T, EXPERIMENT_T]): + target (Union[TARGET_T, Runnable, EXPERIMENT_T, Tuple[EXPERIMENT_T, EXPERIMENT_T]]): The target system or experiment(s) to evaluate. Can be a function that takes a dict and returns a dict, a langchain Runnable, an existing experiment ID, or a two-tuple of experiment IDs. data (DATA_T): The dataset to evaluate on. Can be a dataset name, a list of examples, or a generator of examples. - evaluators (Sequence[EVALUATOR_T] | Sequence[COMPARATIVE_EVALUATOR_T] | None): + evaluators (Optional[Union[Sequence[EVALUATOR_T], Sequence[COMPARATIVE_EVALUATOR_T]]]): A list of evaluators to run on each example. The evaluator signature depends on the target type. Default to None. - summary_evaluators (Sequence[SUMMARY_EVALUATOR_T] | None): A list of summary + summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): A list of summary evaluators to run on the entire dataset. Should not be specified if comparing two existing experiments. Defaults to None. - metadata (dict | None): Metadata to attach to the experiment. + metadata (Optional[dict]): Metadata to attach to the experiment. Defaults to None. - experiment_prefix (str | None): A prefix to provide for your experiment name. + experiment_prefix (Optional[str]): A prefix to provide for your experiment name. Defaults to None. - description (str | None): A free-form text description for the experiment. - max_concurrency (int | None): The maximum number of concurrent + description (Optional[str]): A free-form text description for the experiment. + max_concurrency (Optional[int], default=0): The maximum number of concurrent evaluations to run. If None then no limit is set. If 0 then no concurrency. Defaults to 0. - blocking (bool): Whether to block until the evaluation is complete. + blocking (bool, default=True): Whether to block until the evaluation is complete. Defaults to True. - num_repetitions (int): The number of times to run the evaluation. + num_repetitions (int, default=1): The number of times to run the evaluation. Each item in the dataset will be run and evaluated this many times. Defaults to 1. - experiment (schemas.TracerSession | None): An existing experiment to + experiment (Optional[EXPERIMENT_T]): An existing experiment to extend. If provided, experiment_prefix is ignored. For advanced usage only. Should not be specified if target is an existing experiment or two-tuple fo experiments. - load_nested (bool): Whether to load all child runs for the experiment. - Default is to only load the top-level root runs. Should only be specified - when target is an existing experiment or two-tuple of experiments. - randomize_order (bool): Whether to randomize the order of the outputs for each - evaluation. Default is False. Should only be specified when target is a - two-tuple of existing experiments. + upload_results (bool, default=True): Whether to upload the results to LangSmith. + Defaults to True. + **kwargs (Any): Additional keyword arguments to pass to the evaluator. Returns: ExperimentResults: If target is a function, Runnable, or existing experiment. @@ -6264,124 +6511,157 @@ def evaluate( Examples: Prepare the dataset: - >>> from langsmith import Client - >>> client = Client() - >>> dataset = client.clone_public_dataset( - ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" - ... ) - >>> dataset_name = "Evaluate Examples" + .. code-block:: python + + from langsmith import Client + client = Client() + dataset = client.clone_public_dataset( + "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" + ) + dataset_name = "Evaluate Examples" Basic usage: - >>> def accuracy(outputs: dict, reference_outputs: dict) -> dict: - ... # Row-level evaluator for accuracy. - ... pred = outputs["response"] - ... expected = reference_outputs["answer"] - ... return {"score": expected.lower() == pred.lower()} - - >>> def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict: - ... # Experiment-level evaluator for precision. - ... # TP / (TP + FP) - ... predictions = [out["response"].lower() for out in outputs] - ... expected = [ref["answer"].lower() for ref in reference_outputs] - ... # yes and no are the only possible answers - ... tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"]) - ... fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)]) - ... return {"score": tp / (tp + fp)} - >>> def predict(inputs: dict) -> dict: - ... # This can be any function or just an API call to your app. - ... return {"response": "Yes"} - >>> results = client.evaluate( - ... predict, - ... data=dataset_name, - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... experiment_prefix="My Experiment", - ... description="Evaluating the accuracy of a simple prediction model.", - ... metadata={ - ... "my-prompt-version": "abcd-1234", - ... }, - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + def accuracy(outputs: dict, reference_outputs: dict) -> dict: + # Row-level evaluator for accuracy. + pred = outputs["response"] + expected = reference_outputs["answer"] + return {"score": expected.lower() == pred.lower()} + + .. code-block:: python + + def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict: + # Experiment-level evaluator for precision. + # TP / (TP + FP) + predictions = [out["response"].lower() for out in outputs] + expected = [ref["answer"].lower() for ref in reference_outputs] + # yes and no are the only possible answers + tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"]) + fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)]) + return {"score": tp / (tp + fp)} + def predict(inputs: dict) -> dict: + # This can be any function or just an API call to your app. + return {"response": "Yes"} + results = client.evaluate( + predict, + data=dataset_name, + evaluators=[accuracy], + summary_evaluators=[precision], + experiment_prefix="My Experiment", + description="Evaluating the accuracy of a simple prediction model.", + metadata={ + "my-prompt-version": "abcd-1234", + }, + ) Evaluating over only a subset of the examples - >>> experiment_name = results.experiment_name - >>> examples = client.list_examples(dataset_name=dataset_name, limit=5) - >>> results = client.evaluate( - ... predict, - ... data=examples, - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... experiment_prefix="My Experiment", - ... description="Just testing a subset synchronously.", - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + experiment_name = results.experiment_name + examples = client.list_examples(dataset_name=dataset_name, limit=5) + results = client.evaluate( + predict, + data=examples, + evaluators=[accuracy], + summary_evaluators=[precision], + experiment_prefix="My Experiment", + description="Just testing a subset synchronously.", + ) Streaming each prediction to more easily + eagerly debug. - >>> results = client.evaluate( - ... predict, - ... data=dataset_name, - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... description="I don't even have to block!", - ... blocking=False, - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... - >>> for i, result in enumerate(results): # doctest: +ELLIPSIS - ... pass + .. code-block:: python + + results = client.evaluate( + predict, + data=dataset_name, + evaluators=[accuracy], + summary_evaluators=[precision], + description="I don't even have to block!", + blocking=False, + ) + for i, result in enumerate(results): # doctest: +ELLIPSIS + pass Using the `evaluate` API with an off-the-shelf LangChain evaluator: - >>> from langsmith.evaluation import LangChainStringEvaluator - >>> from langchain.chat_models import init_chat_model - >>> def prepare_criteria_data(run: Run, example: Example): - ... return { - ... "prediction": run.outputs["output"], - ... "reference": example.outputs["answer"], - ... "input": str(example.inputs), - ... } - >>> results = client.evaluate( - ... predict, - ... data=dataset_name, - ... evaluators=[ - ... accuracy, - ... LangChainStringEvaluator("embedding_distance"), - ... LangChainStringEvaluator( - ... "labeled_criteria", - ... config={ - ... "criteria": { - ... "usefulness": "The prediction is useful if it is correct" - ... " and/or asks a useful followup question." - ... }, - ... "llm": init_chat_model("gpt-4o"), - ... }, - ... prepare_data=prepare_criteria_data, - ... ), - ... ], - ... description="Evaluating with off-the-shelf LangChain evaluators.", - ... summary_evaluators=[precision], - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + from langsmith.evaluation import LangChainStringEvaluator + from langchain.chat_models import init_chat_model + def prepare_criteria_data(run: Run, example: Example): + return { + "prediction": run.outputs["output"], + "reference": example.outputs["answer"], + "input": str(example.inputs), + } + results = client.evaluate( + predict, + data=dataset_name, + evaluators=[ + accuracy, + LangChainStringEvaluator("embedding_distance"), + LangChainStringEvaluator( + "labeled_criteria", + config={ + "criteria": { + "usefulness": "The prediction is useful if it is correct" + " and/or asks a useful followup question." + }, + "llm": init_chat_model("gpt-4o"), + }, + prepare_data=prepare_criteria_data, + ), + ], + description="Evaluating with off-the-shelf LangChain evaluators.", + summary_evaluators=[precision], + ) + View the evaluation results for experiment:... Evaluating a LangChain object: - >>> from langchain_core.runnables import chain as as_runnable - >>> @as_runnable - ... def nested_predict(inputs): - ... return {"response": "Yes"} - >>> @as_runnable - ... def lc_predict(inputs): - ... return nested_predict.invoke(inputs) - >>> results = client.evaluate( - ... lc_predict, - ... data=dataset_name, - ... evaluators=[accuracy], - ... description="This time we're evaluating a LangChain object.", - ... summary_evaluators=[precision], - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + from langchain_core.runnables import chain as as_runnable + @as_runnable + def nested_predict(inputs): + return {"response": "Yes"} + @as_runnable + def lc_predict(inputs): + return nested_predict.invoke(inputs) + results = client.evaluate( + lc_predict, + data=dataset_name, + evaluators=[accuracy], + description="This time we're evaluating a LangChain object.", + summary_evaluators=[precision], + ) + + Comparative evaluation: + + .. code-block:: python + + results = client.evaluate( + # The target is a tuple of the experiment IDs to compare + target=("12345678-1234-1234-1234-123456789012", "98765432-1234-1234-1234-123456789012"), + evaluators=[accuracy], + summary_evaluators=[precision], + ) + + Evaluate an existing experiment: + + .. code-block:: python + + results = client.evaluate( + # The target is the ID of the experiment we are evaluating + target="12345678-1234-1234-1234-123456789012", + evaluators=[accuracy], + summary_evaluators=[precision], + ) + .. versionadded:: 0.2.0 """ # noqa: E501 @@ -6435,11 +6715,11 @@ async def aevaluate( r"""Evaluate an async target system on a given dataset. Args: - target (AsyncCallable[[dict], dict] | AsyncIterable[dict] | Runnable | EXPERIMENT_T | Tuple[EXPERIMENT_T, EXPERIMENT_T]): + target (Union[ATARGET_T, AsyncIterable[dict], Runnable, str, uuid.UUID, TracerSession]): The target system or experiment(s) to evaluate. Can be an async function that takes a dict and returns a dict, a langchain Runnable, an existing experiment ID, or a two-tuple of experiment IDs. - data (Union[DATA_T, AsyncIterable[schemas.Example]]): The dataset to evaluate on. Can be a dataset name, a list of + data (Union[DATA_T, AsyncIterable[Example]]): The dataset to evaluate on. Can be a dataset name, a list of examples, an async generator of examples, or an async iterable of examples. evaluators (Optional[Sequence[EVALUATOR_T]]): A list of evaluators to run on each example. Defaults to None. @@ -6450,20 +6730,20 @@ async def aevaluate( experiment_prefix (Optional[str]): A prefix to provide for your experiment name. Defaults to None. description (Optional[str]): A description of the experiment. - max_concurrency (int | None): The maximum number of concurrent + max_concurrency (Optional[int], default=0): The maximum number of concurrent evaluations to run. If None then no limit is set. If 0 then no concurrency. Defaults to 0. - num_repetitions (int): The number of times to run the evaluation. + num_repetitions (int, default=1): The number of times to run the evaluation. Each item in the dataset will be run and evaluated this many times. Defaults to 1. - blocking (bool): Whether to block until the evaluation is complete. + blocking (bool, default=True): Whether to block until the evaluation is complete. Defaults to True. - experiment (Optional[schemas.TracerSession]): An existing experiment to + experiment (Optional[TracerSession]): An existing experiment to extend. If provided, experiment_prefix is ignored. For advanced usage only. - load_nested: Whether to load all child runs for the experiment. - Default is to only load the top-level root runs. Should only be specified - when evaluating an existing experiment. + upload_results (bool, default=True): Whether to upload the results to LangSmith. + Defaults to True. + **kwargs (Any): Additional keyword arguments to pass to the evaluator. Returns: AsyncIterator[ExperimentResultRow]: An async iterator over the experiment results. @@ -6475,122 +6755,144 @@ async def aevaluate( Requires the 'langsmith[vcr]' package to be installed. Examples: - >>> import asyncio - >>> from langsmith import Client - >>> client = Client() - >>> dataset = client.clone_public_dataset( - ... "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" - ... ) - >>> dataset_name = "Evaluate Examples" + Prepare the dataset: + + .. code-block:: python + + import asyncio + from langsmith import Client + client = Client() + dataset = client.clone_public_dataset( + "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d" + ) + dataset_name = "Evaluate Examples" Basic usage: - >>> def accuracy(outputs: dict, reference_outputs: dict) -> dict: - ... # Row-level evaluator for accuracy. - ... pred = outputs["resposen"] - ... expected = reference_outputs["answer"] - ... return {"score": expected.lower() == pred.lower()} - - >>> def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict: - ... # Experiment-level evaluator for precision. - ... # TP / (TP + FP) - ... predictions = [out["response"].lower() for out in outputs] - ... expected = [ref["answer"].lower() for ref in reference_outputs] - ... # yes and no are the only possible answers - ... tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"]) - ... fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)]) - ... return {"score": tp / (tp + fp)} - - >>> async def apredict(inputs: dict) -> dict: - ... # This can be any async function or just an API call to your app. - ... await asyncio.sleep(0.1) - ... return {"response": "Yes"} - >>> results = asyncio.run( - ... client.aevaluate( - ... apredict, - ... data=dataset_name, - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... experiment_prefix="My Experiment", - ... description="Evaluate the accuracy of the model asynchronously.", - ... metadata={ - ... "my-prompt-version": "abcd-1234", - ... }, - ... ) - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + def accuracy(outputs: dict, reference_outputs: dict) -> dict: + # Row-level evaluator for accuracy. + pred = outputs["resposen"] + expected = reference_outputs["answer"] + return {"score": expected.lower() == pred.lower()} + + def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict: + # Experiment-level evaluator for precision. + # TP / (TP + FP) + predictions = [out["response"].lower() for out in outputs] + expected = [ref["answer"].lower() for ref in reference_outputs] + # yes and no are the only possible answers + tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"]) + fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)]) + return {"score": tp / (tp + fp)} + + async def apredict(inputs: dict) -> dict: + # This can be any async function or just an API call to your app. + await asyncio.sleep(0.1) + return {"response": "Yes"} + results = asyncio.run( + client.aevaluate( + apredict, + data=dataset_name, + evaluators=[accuracy], + summary_evaluators=[precision], + experiment_prefix="My Experiment", + description="Evaluate the accuracy of the model asynchronously.", + metadata={ + "my-prompt-version": "abcd-1234", + }, + ) + ) Evaluating over only a subset of the examples using an async generator: - >>> async def example_generator(): - ... examples = client.list_examples(dataset_name=dataset_name, limit=5) - ... for example in examples: - ... yield example - >>> results = asyncio.run( - ... client.aevaluate( - ... apredict, - ... data=example_generator(), - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... experiment_prefix="My Subset Experiment", - ... description="Evaluate a subset of examples asynchronously.", - ... ) - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + async def example_generator(): + examples = client.list_examples(dataset_name=dataset_name, limit=5) + for example in examples: + yield example + results = asyncio.run( + client.aevaluate( + apredict, + data=example_generator(), + evaluators=[accuracy], + summary_evaluators=[precision], + experiment_prefix="My Subset Experiment", + description="Evaluate a subset of examples asynchronously.", + ) + ) Streaming each prediction to more easily + eagerly debug. - >>> results = asyncio.run( - ... client.aevaluate( - ... apredict, - ... data=dataset_name, - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... experiment_prefix="My Streaming Experiment", - ... description="Streaming predictions for debugging.", - ... blocking=False, - ... ) - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + results = asyncio.run( + client.aevaluate( + apredict, + data=dataset_name, + evaluators=[accuracy], + summary_evaluators=[precision], + experiment_prefix="My Streaming Experiment", + description="Streaming predictions for debugging.", + blocking=False, + ) + ) - >>> async def aenumerate(iterable): - ... async for elem in iterable: - ... print(elem) - >>> asyncio.run(aenumerate(results)) + async def aenumerate(iterable): + async for elem in iterable: + print(elem) + asyncio.run(aenumerate(results)) Running without concurrency: - >>> results = asyncio.run( - ... client.aevaluate( - ... apredict, - ... data=dataset_name, - ... evaluators=[accuracy], - ... summary_evaluators=[precision], - ... experiment_prefix="My Experiment Without Concurrency", - ... description="This was run without concurrency.", - ... max_concurrency=0, - ... ) - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + results = asyncio.run( + client.aevaluate( + apredict, + data=dataset_name, + evaluators=[accuracy], + summary_evaluators=[precision], + experiment_prefix="My Experiment Without Concurrency", + description="This was run without concurrency.", + max_concurrency=0, + ) + ) Using Async evaluators: - >>> async def helpfulness(outputs: dict) -> dict: - ... # Row-level evaluator for helpfulness. - ... await asyncio.sleep(5) # Replace with your LLM API call - ... return {"score": outputs["output"] == "Yes"} - - >>> results = asyncio.run( - ... client.aevaluate( - ... apredict, - ... data=dataset_name, - ... evaluators=[helpfulness], - ... summary_evaluators=[precision], - ... experiment_prefix="My Helpful Experiment", - ... description="Applying async evaluators example.", - ... ) - ... ) # doctest: +ELLIPSIS - View the evaluation results for experiment:... + .. code-block:: python + + async def helpfulness(outputs: dict) -> dict: + # Row-level evaluator for helpfulness. + await asyncio.sleep(5) # Replace with your LLM API call + return {"score": outputs["output"] == "Yes"} + + results = asyncio.run( + client.aevaluate( + apredict, + data=dataset_name, + evaluators=[helpfulness], + summary_evaluators=[precision], + experiment_prefix="My Helpful Experiment", + description="Applying async evaluators example.", + ) + ) + + Evaluate an existing experiment: + + .. code-block:: python + + results = asyncio.run( + client.aevaluate( + # The target is the ID of the experiment we are evaluating + target="419dcab2-1d66-4b94-8901-0357ead390df", + evaluators=[accuracy, helpfulness], + summary_evaluators=[precision], + ) + ) .. versionadded:: 0.2.0