Skip to content

Commit

Permalink
allow filtering datasets by metadata (#942)
Browse files Browse the repository at this point in the history
  • Loading branch information
samnoyes authored Aug 22, 2024
2 parents 5f1b4ef + b775912 commit b846a4c
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 4 deletions.
2 changes: 1 addition & 1 deletion js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "langsmith",
"version": "0.1.44",
"version": "0.1.45",
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
"packageManager": "[email protected]",
"files": [
Expand Down
8 changes: 8 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1910,16 +1910,19 @@ export class Client {
dataType,
inputsSchema,
outputsSchema,
metadata,
}: {
description?: string;
dataType?: DataType;
inputsSchema?: KVMap;
outputsSchema?: KVMap;
metadata?: RecordStringAny;
} = {}
): Promise<Dataset> {
const body: KVMap = {
name,
description,
extra: metadata ? { metadata } : undefined,
};
if (dataType) {
body.data_type = dataType;
Expand Down Expand Up @@ -2065,12 +2068,14 @@ export class Client {
datasetIds,
datasetName,
datasetNameContains,
metadata,
}: {
limit?: number;
offset?: number;
datasetIds?: string[];
datasetName?: string;
datasetNameContains?: string;
metadata?: RecordStringAny;
} = {}): AsyncIterable<Dataset> {
const path = "/datasets";
const params = new URLSearchParams({
Expand All @@ -2088,6 +2093,9 @@ export class Client {
if (datasetNameContains !== undefined) {
params.append("name_contains", datasetNameContains);
}
if (metadata !== undefined) {
params.append("metadata", JSON.stringify(metadata));
}
for await (const datasets of this._getPaginated<Dataset>(path, params)) {
yield* datasets;
}
Expand Down
2 changes: 1 addition & 1 deletion js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ export type {
export { RunTree, type RunTreeConfig } from "./run_trees.js";

// Update using yarn bump-version
export const __version__ = "0.1.44";
export const __version__ = "0.1.45";
7 changes: 7 additions & 0 deletions js/src/tests/client.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ test.concurrent(
});
const dataset = await langchainClient.createDataset(datasetName, {
dataType: "llm",
metadata: { key: "valuefoo" },
});
await langchainClient.createExample(
{ input: "hello world" },
Expand All @@ -193,6 +194,12 @@ test.concurrent(
);
const loadedDataset = await langchainClient.readDataset({ datasetName });
expect(loadedDataset.data_type).toEqual("llm");

const datasetsByMetadata = await toArray(
langchainClient.listDatasets({ metadata: { key: "valuefoo" } })
);
expect(datasetsByMetadata.length).toEqual(1);
expect(datasetsByMetadata.map((d) => d.id)).toContain(dataset.id);
await langchainClient.deleteDataset({ datasetName });
},
180_000
Expand Down
7 changes: 7 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2504,6 +2504,7 @@ def create_dataset(
data_type: ls_schemas.DataType = ls_schemas.DataType.kv,
inputs_schema: Optional[Dict[str, Any]] = None,
outputs_schema: Optional[Dict[str, Any]] = None,
metadata: Optional[dict] = None,
) -> ls_schemas.Dataset:
"""Create a dataset in the LangSmith API.
Expand All @@ -2515,6 +2516,8 @@ def create_dataset(
The description of the dataset.
data_type : DataType or None, default=DataType.kv
The data type of the dataset.
metadata: dict or None, default=None
Additional metadata to associate with the dataset.
Returns:
-------
Expand All @@ -2525,6 +2528,7 @@ def create_dataset(
"name": dataset_name,
"data_type": data_type.value,
"created_at": datetime.datetime.now().isoformat(),
"extra": {"metadata": metadata} if metadata else None,
}
if description is not None:
dataset["description"] = description
Expand Down Expand Up @@ -2737,6 +2741,7 @@ def list_datasets(
data_type: Optional[str] = None,
dataset_name: Optional[str] = None,
dataset_name_contains: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
limit: Optional[int] = None,
) -> Iterator[ls_schemas.Dataset]:
"""List the datasets on the LangSmith API.
Expand All @@ -2757,6 +2762,8 @@ def list_datasets(
params["name"] = dataset_name
if dataset_name_contains is not None:
params["name_contains"] = dataset_name_contains
if metadata is not None:
params["metadata"] = json.dumps(metadata)
for i, dataset in enumerate(
self._get_paginated_list("/datasets", params=params)
):
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langsmith"
version = "0.1.102"
version = "0.1.103"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
authors = ["LangChain <[email protected]>"]
license = "MIT"
Expand Down
11 changes: 10 additions & 1 deletion python/tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,9 @@ def test_list_datasets(langchain_client: Client) -> None:
ds1n = "__test_list_datasets1" + uuid4().hex[:4]
ds2n = "__test_list_datasets2" + uuid4().hex[:4]
try:
dataset1 = langchain_client.create_dataset(ds1n, data_type=DataType.llm)
dataset1 = langchain_client.create_dataset(
ds1n, data_type=DataType.llm, metadata={"foo": "barqux"}
)
dataset2 = langchain_client.create_dataset(ds2n, data_type=DataType.kv)
assert dataset1.url is not None
assert dataset2.url is not None
Expand All @@ -484,6 +486,13 @@ def test_list_datasets(langchain_client: Client) -> None:
)
)
assert len(datasets) == 1
# Sub-filter on metadata
datasets = list(
langchain_client.list_datasets(
dataset_ids=[dataset1.id, dataset2.id], metadata={"foo": "barqux"}
)
)
assert len(datasets) == 1
finally:
# Delete datasets
for name in [ds1n, ds2n]:
Expand Down

0 comments on commit b846a4c

Please sign in to comment.