From 34d0dfe639e7e657cacf06189a043e921875388c Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Mon, 4 Mar 2024 21:06:56 -0800 Subject: [PATCH] Add dataset taggingg (#496) --- python/langsmith/client.py | 133 ++++++++++++++++++++++++++++++++++++ python/langsmith/schemas.py | 25 ++++--- python/pyproject.toml | 2 +- 3 files changed, 150 insertions(+), 10 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index ad9069d55..8d1ca71be 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -2295,6 +2295,139 @@ def delete_dataset( ) ls_utils.raise_for_status_with_text(response) + def update_dataset_tag( + self, + *, + dataset_id: Optional[ID_TYPE] = None, + dataset_name: Optional[str] = None, + as_of: datetime.datetime, + tag: str, + ) -> None: + """Update the tags of a dataset. + + If the tag is already assigned to a different version of this dataset, + the tag will be moved to the new version. The as_of parameter is used to + determine which version of the dataset to apply the new tags to. + It must be an exact version of the dataset to succeed. You can + use the read_dataset_version method to find the exact version + to apply the tags to. + + Parameters + ---------- + dataset_id : UUID + The ID of the dataset to update. + as_of : datetime.datetime + The timestamp of the dataset to apply the new tags to. + tag : str + The new tag to apply to the dataset. + + Examples: + -------- + .. code-block:: python + dataset_name = "my-dataset" + # Get the version of a dataset <= a given timestamp + dataset_version = client.read_dataset_version( + dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1) + ) + # Assign that version a new tag + client.update_dataset_tags( + dataset_name="my-dataset", + as_of=dataset_version.as_of, + tag="prod", + ) + """ + if dataset_name is not None: + dataset_id = self.read_dataset(dataset_name=dataset_name).id + if dataset_id is None: + raise ValueError("Must provide either dataset name or ID") + response = self.session.put( + f"{self.api_url}/datasets/{_as_uuid(dataset_id, 'dataset_id')}/tags", + headers=self._headers, + json={ + "as_of": as_of.isoformat(), + "tag": tag, + }, + ) + ls_utils.raise_for_status_with_text(response) + + def list_dataset_versions( + self, + *, + dataset_id: Optional[ID_TYPE] = None, + dataset_name: Optional[str] = None, + search: Optional[str] = None, + ) -> Iterator[ls_schemas.DatasetVersion]: + """List dataset versions. + + Args: + dataset_id (Optional[ID_TYPE]): The ID of the dataset. + dataset_name (Optional[str]): The name of the dataset. + search (Optional[str]): The search query. + + Returns: + Iterator[ls_schemas.DatasetVersion]: An iterator of dataset versions. + """ + if dataset_id is None: + dataset_id = self.read_dataset(dataset_name=dataset_name).id + params = {"search": search} + yield from ( + ls_schemas.DatasetVersion(**version) + for version in self._get_paginated_list( + f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/versions", + params=params, + ) + ) + + def read_dataset_version( + self, + *, + dataset_id: Optional[ID_TYPE] = None, + dataset_name: Optional[str] = None, + as_of: Optional[datetime.datetime] = None, + tag: Optional[str] = None, + ) -> ls_schemas.DatasetVersion: + """Get dataset version by as_of or exact tag. + + Ues this to resolve the nearest version to a given timestamp or for a given tag. + + Args: + dataset_id (Optional[ID_TYPE]): The ID of the dataset. + dataset_name (Optional[str]): The name of the dataset. + as_of (Optional[datetime.datetime]): The timestamp of the dataset + to retrieve. + tag (Optional[str]): The tag of the dataset to retrieve. + + Returns: + ls_schemas.DatasetVersion: The dataset version. + + + Examples: + -------- + .. code-block:: python + + # Get the latest version of a dataset + client.read_dataset_version(dataset_name="my-dataset", tag="latest") + + # Get the version of a dataset <= a given timestamp + client.read_dataset_version( + dataset_name="my-dataset", + as_of=datetime.datetime(2024, 1, 1), + ) + + + # Get the version of a dataset with a specific tag + client.read_dataset_version(dataset_name="my-dataset", tag="prod") + """ + if dataset_id is None: + dataset_id = self.read_dataset(dataset_name=dataset_name).id + if (as_of and tag) or (as_of is None and tag is None): + raise ValueError("Exactly one of as_of and tag must be specified.") + response = self._get_with_retries( + f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/version", + params={"as_of": as_of, "tag": tag}, + ) + return ls_schemas.DatasetVersion(**response.json()) + def clone_public_dataset( self, token_or_url: str, diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index b82199a36..480470bd5 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -176,16 +176,11 @@ def url(self) -> Optional[str]: return None -class RunTypeEnum(str, Enum): - """(Deprecated) Enum for run types. Use string directly.""" +class DatasetVersion(BaseModel): + """Class representing a dataset version.""" - tool = "tool" - chain = "chain" - llm = "llm" - retriever = "retriever" - embedding = "embedding" - prompt = "prompt" - parser = "parser" + tags: Optional[List[str]] = None + as_of: datetime class RunBase(BaseModel): @@ -319,6 +314,18 @@ def url(self) -> Optional[str]: return None +class RunTypeEnum(str, Enum): + """(Deprecated) Enum for run types. Use string directly.""" + + tool = "tool" + chain = "chain" + llm = "llm" + retriever = "retriever" + embedding = "embedding" + prompt = "prompt" + parser = "parser" + + class RunLikeDict(TypedDict, total=False): """Run-like dictionary, for type-hinting.""" diff --git a/python/pyproject.toml b/python/pyproject.toml index 1c11506c0..6a641a4ad 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langsmith" -version = "0.1.18" +version = "0.1.19" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." authors = ["LangChain "] license = "MIT"