Skip to content

Commit

Permalink
Add dataset taggingg (#496)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Mar 5, 2024
1 parent 40c4470 commit 34d0dfe
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 10 deletions.
133 changes: 133 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2295,6 +2295,139 @@ def delete_dataset(
)
ls_utils.raise_for_status_with_text(response)

def update_dataset_tag(
self,
*,
dataset_id: Optional[ID_TYPE] = None,
dataset_name: Optional[str] = None,
as_of: datetime.datetime,
tag: str,
) -> None:
"""Update the tags of a dataset.
If the tag is already assigned to a different version of this dataset,
the tag will be moved to the new version. The as_of parameter is used to
determine which version of the dataset to apply the new tags to.
It must be an exact version of the dataset to succeed. You can
use the read_dataset_version method to find the exact version
to apply the tags to.
Parameters
----------
dataset_id : UUID
The ID of the dataset to update.
as_of : datetime.datetime
The timestamp of the dataset to apply the new tags to.
tag : str
The new tag to apply to the dataset.
Examples:
--------
.. code-block:: python
dataset_name = "my-dataset"
# Get the version of a dataset <= a given timestamp
dataset_version = client.read_dataset_version(
dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1)
)
# Assign that version a new tag
client.update_dataset_tags(
dataset_name="my-dataset",
as_of=dataset_version.as_of,
tag="prod",
)
"""
if dataset_name is not None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
if dataset_id is None:
raise ValueError("Must provide either dataset name or ID")
response = self.session.put(
f"{self.api_url}/datasets/{_as_uuid(dataset_id, 'dataset_id')}/tags",
headers=self._headers,
json={
"as_of": as_of.isoformat(),
"tag": tag,
},
)
ls_utils.raise_for_status_with_text(response)

def list_dataset_versions(
self,
*,
dataset_id: Optional[ID_TYPE] = None,
dataset_name: Optional[str] = None,
search: Optional[str] = None,
) -> Iterator[ls_schemas.DatasetVersion]:
"""List dataset versions.
Args:
dataset_id (Optional[ID_TYPE]): The ID of the dataset.
dataset_name (Optional[str]): The name of the dataset.
search (Optional[str]): The search query.
Returns:
Iterator[ls_schemas.DatasetVersion]: An iterator of dataset versions.
"""
if dataset_id is None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
params = {"search": search}
yield from (
ls_schemas.DatasetVersion(**version)
for version in self._get_paginated_list(
f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/versions",
params=params,
)
)

def read_dataset_version(
self,
*,
dataset_id: Optional[ID_TYPE] = None,
dataset_name: Optional[str] = None,
as_of: Optional[datetime.datetime] = None,
tag: Optional[str] = None,
) -> ls_schemas.DatasetVersion:
"""Get dataset version by as_of or exact tag.
Ues this to resolve the nearest version to a given timestamp or for a given tag.
Args:
dataset_id (Optional[ID_TYPE]): The ID of the dataset.
dataset_name (Optional[str]): The name of the dataset.
as_of (Optional[datetime.datetime]): The timestamp of the dataset
to retrieve.
tag (Optional[str]): The tag of the dataset to retrieve.
Returns:
ls_schemas.DatasetVersion: The dataset version.
Examples:
--------
.. code-block:: python
# Get the latest version of a dataset
client.read_dataset_version(dataset_name="my-dataset", tag="latest")
# Get the version of a dataset <= a given timestamp
client.read_dataset_version(
dataset_name="my-dataset",
as_of=datetime.datetime(2024, 1, 1),
)
# Get the version of a dataset with a specific tag
client.read_dataset_version(dataset_name="my-dataset", tag="prod")
"""
if dataset_id is None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
if (as_of and tag) or (as_of is None and tag is None):
raise ValueError("Exactly one of as_of and tag must be specified.")
response = self._get_with_retries(
f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/version",
params={"as_of": as_of, "tag": tag},
)
return ls_schemas.DatasetVersion(**response.json())

def clone_public_dataset(
self,
token_or_url: str,
Expand Down
25 changes: 16 additions & 9 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,11 @@ def url(self) -> Optional[str]:
return None


class RunTypeEnum(str, Enum):
"""(Deprecated) Enum for run types. Use string directly."""
class DatasetVersion(BaseModel):
"""Class representing a dataset version."""

tool = "tool"
chain = "chain"
llm = "llm"
retriever = "retriever"
embedding = "embedding"
prompt = "prompt"
parser = "parser"
tags: Optional[List[str]] = None
as_of: datetime


class RunBase(BaseModel):
Expand Down Expand Up @@ -319,6 +314,18 @@ def url(self) -> Optional[str]:
return None


class RunTypeEnum(str, Enum):
"""(Deprecated) Enum for run types. Use string directly."""

tool = "tool"
chain = "chain"
llm = "llm"
retriever = "retriever"
embedding = "embedding"
prompt = "prompt"
parser = "parser"


class RunLikeDict(TypedDict, total=False):
"""Run-like dictionary, for type-hinting."""

Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langsmith"
version = "0.1.18"
version = "0.1.19"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
authors = ["LangChain <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 34d0dfe

Please sign in to comment.