langchain-ai · samnoyes · May 16, 2024 · May 15, 2024 · May 15, 2024 · May 15, 2024
diff --git a/js/package.json b/js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "langsmith",
-  "version": "0.1.25",
+  "version": "0.1.26",
   "description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
   "packageManager": "[email protected]",
   "files": [
@@ -197,4 +197,4 @@
     },
     "./package.json": "./package.json"
   }
-}
+}
diff --git a/js/src/client.ts b/js/src/client.ts
@@ -239,6 +239,7 @@
   exampleId?: string;
 
   metadata?: KVMap;
+  split?: string;
 };
 
 type AutoBatchQueueItem = {
@@ -1989,6 +1990,7 @@
       createdAt,
       exampleId,
       metadata,
+      split,
     }: CreateExampleOptions
   ): Promise<Example> {
     let datasetId_ = datasetId;
@@ -2009,6 +2011,7 @@
       created_at: createdAt_?.toISOString(),
       id: exampleId,
       metadata,
+      split,
     };
 
     const response = await this.caller.call(fetch, `${this.apiUrl}/examples`, {
@@ -2033,6 +2036,7 @@
     inputs: Array<KVMap>;
     outputs?: Array<KVMap>;
     metadata?: Array<KVMap>;
+    splits?: Array<string>;
     sourceRunIds?: Array<string>;
     exampleIds?: Array<string>;
     datasetId?: string;
@@ -2063,6 +2067,7 @@
         inputs: input,
         outputs: outputs ? outputs[idx] : undefined,
         metadata: metadata ? metadata[idx] : undefined,
+        split: props.splits ? props.splits[idx] : undefined,
         id: exampleIds ? exampleIds[idx] : undefined,
         source_run_id: sourceRunIds ? sourceRunIds[idx] : undefined,
       };
@@ -2130,13 +2135,15 @@
     datasetName,
     exampleIds,
     asOf,
+    splits,
     inlineS3Urls,
     metadata,
   }: {
     datasetId?: string;
     datasetName?: string;
     exampleIds?: string[];
     asOf?: string | Date;
+    splits?: string[];
     inlineS3Urls?: boolean;
     metadata?: KVMap;
   } = {}): AsyncIterable<Example> {
@@ -2167,6 +2174,11 @@
         params.append("id", id_);
       }
     }
+    if (splits !== undefined) {
+      for (const split of splits) {
+        params.append("splits", split);
+      }
+    }
     if (metadata !== undefined) {
       const serializedMetadata = JSON.stringify(metadata);
       params.append("metadata", serializedMetadata);
@@ -2590,7 +2602,7 @@
  public async logEvaluationFeedback(
    evaluatorResponse: EvaluationResult | EvaluationResults,
    run?: Run,
    sourceInfo?: { [key: string]: any }
  ): Promise<EvaluationResult[]> {
    const results: Array<EvaluationResult> =
      this._selectEvalResults(evaluatorResponse);

diff --git a/js/src/index.ts b/js/src/index.ts
@@ -12,4 +12,4 @@ export type {
 export { RunTree, type RunTreeConfig } from "./run_trees.js";
 
 // Update using yarn bump-version
-export const __version__ = "0.1.25";
+export const __version__ = "0.1.26";
diff --git a/js/src/schemas.ts b/js/src/schemas.ts
@@ -229,6 +229,7 @@ export interface RunUpdate {
 export interface ExampleCreate extends BaseExample {
   id?: string;
   created_at?: string;
+  split?: string;
 }
 
 export interface Example extends BaseExample {
@@ -244,6 +245,7 @@ export interface ExampleUpdate {
   inputs?: KVMap;
   outputs?: KVMap;
   metadata?: KVMap;
+  split?: string;
 }
 export interface BaseDataset {
   name: string;

diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts
@@ -76,7 +76,7 @@ test.concurrent("Test LangSmith Client Dataset CRD", async () => {
   const example = await client.createExample(
     { col1: "addedExampleCol1" },
     { col2: "addedExampleCol2" },
-    { datasetId: newDataset.id }
+    { datasetId: newDataset.id, split: "my_split" }
   );
   const exampleValue = await client.readExample(example.id);
   expect(exampleValue.inputs.col1).toBe("addedExampleCol1");
@@ -88,13 +88,21 @@ test.concurrent("Test LangSmith Client Dataset CRD", async () => {
   expect(examples.length).toBe(2);
   expect(examples.map((e) => e.id)).toContain(example.id);
 
+  const _examples = await toArray(
+    client.listExamples({ datasetId: newDataset.id, splits: ["my_split"] })
+  );
+  expect(_examples.length).toBe(1);
+  expect(_examples.map((e) => e.id)).toContain(example.id);
+
   await client.updateExample(example.id, {
     inputs: { col1: "updatedExampleCol1" },
     outputs: { col2: "updatedExampleCol2" },
+    split: "my_split2",
   });
   // Says 'example updated' or something similar
   const newExampleValue = await client.readExample(example.id);
   expect(newExampleValue.inputs.col1).toBe("updatedExampleCol1");
+  expect(newExampleValue.metadata?.dataset_split).toBe("my_split2");
   await client.deleteExample(example.id);
   const examples2 = await toArray(
     client.listExamples({ datasetId: newDataset.id })
@@ -481,6 +489,7 @@ test.concurrent(
         { output: "hi there 3" },
       ],
       metadata: [{ key: "value 1" }, { key: "value 2" }, { key: "value 3" }],
+      splits: ["train", "test", "train"],
       datasetId: dataset.id,
     });
     const initialExamplesList = await toArray(
@@ -511,16 +520,19 @@ test.concurrent(
     );
     expect(example1?.outputs?.output).toEqual("hi there 1");
     expect(example1?.metadata?.key).toEqual("value 1");
+    expect(example1?.metadata?.dataset_split).toEqual("train");
     const example2 = examplesList2.find(
       (e) => e.inputs.input === "hello world 2"
     );
     expect(example2?.outputs?.output).toEqual("hi there 2");
     expect(example2?.metadata?.key).toEqual("value 2");
+    expect(example2?.metadata?.dataset_split).toEqual("test");
     const example3 = examplesList2.find(
       (e) => e.inputs.input === "hello world 3"
     );
     expect(example3?.outputs?.output).toEqual("hi there 3");
     expect(example3?.metadata?.key).toEqual("value 3");
+    expect(example3?.metadata?.dataset_split).toEqual("train");
 
     await client.createExample(
       { input: "hello world" },
@@ -560,6 +572,30 @@ test.concurrent(
     expect(examplesList3[0].metadata?.foo).toEqual("bar");
     expect(examplesList3[0].metadata?.baz).toEqual("qux");
 
+    examplesList3 = await toArray(
+      client.listExamples({
+        datasetId: dataset.id,
+        splits: ["train"],
+      })
+    );
+    expect(examplesList3.length).toEqual(2);
+
+    examplesList3 = await toArray(
+      client.listExamples({
+        datasetId: dataset.id,
+        splits: ["test"],
+      })
+    );
+    expect(examplesList3.length).toEqual(1);
+
+    examplesList3 = await toArray(
+      client.listExamples({
+        datasetId: dataset.id,
+        splits: ["train", "test"],
+      })
+    );
+    expect(examplesList3.length).toEqual(3);
+
     await client.deleteDataset({ datasetId: dataset.id });
   },
   180_000

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -2936,6 +2936,7 @@ def create_examples(
         inputs: Sequence[Mapping[str, Any]],
         outputs: Optional[Sequence[Optional[Mapping[str, Any]]]] = None,
         metadata: Optional[Sequence[Optional[Mapping[str, Any]]]] = None,
+        splits: Optional[Sequence[Optional[str]]] = None,
         source_run_ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
         ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
         dataset_id: Optional[ID_TYPE] = None,
@@ -2981,13 +2982,15 @@ def create_examples(
                 "outputs": out_,
                 "dataset_id": dataset_id,
                 "metadata": metadata_,
+                "split": split_,
                 "id": id_,
                 "source_run_id": source_run_id_,
             }
-            for in_, out_, metadata_, id_, source_run_id_ in zip(
+            for in_, out_, metadata_, split_, id_, source_run_id_ in zip(
                 inputs,
                 outputs or [None] * len(inputs),
                 metadata or [None] * len(inputs),
+                splits or [None] * len(inputs),
                 ids or [None] * len(inputs),
                 source_run_ids or [None] * len(inputs),
             )
@@ -3009,6 +3012,7 @@ def create_example(
         created_at: Optional[datetime.datetime] = None,
         outputs: Optional[Mapping[str, Any]] = None,
         metadata: Optional[Mapping[str, Any]] = None,
+        split: Optional[str] = None,
         example_id: Optional[ID_TYPE] = None,
     ) -> ls_schemas.Example:
         """Create a dataset example in the LangSmith API.
@@ -3045,6 +3049,7 @@ def create_example(
             "outputs": outputs,
             "dataset_id": dataset_id,
             "metadata": metadata,
+            "split": split,
         }
         if created_at:
             data["created_at"] = created_at.isoformat()
@@ -3094,6 +3099,7 @@ def list_examples(
         dataset_name: Optional[str] = None,
         example_ids: Optional[Sequence[ID_TYPE]] = None,
         as_of: Optional[Union[datetime.datetime, str]] = None,
+        splits: Optional[Sequence[str]] = None,
         inline_s3_urls: bool = True,
         limit: Optional[int] = None,
         metadata: Optional[dict] = None,
@@ -3112,6 +3118,9 @@ def list_examples(
                 timestamp to retrieve the examples as of.
                 Response examples will only be those that were present at the time
                 of the tagged (or timestamped) version.
+            splits (List[str], optional): A list of dataset splits, which are
+                divisions of your dataset such as 'train', 'test', or 'validation'.
+                Returns examples only from the specified splits.
             inline_s3_urls (bool, optional): Whether to inline S3 URLs.
                 Defaults to True.
             limit (int, optional): The maximum number of examples to return.
@@ -3125,6 +3134,7 @@ def list_examples(
             "as_of": (
                 as_of.isoformat() if isinstance(as_of, datetime.datetime) else as_of
             ),
+            "splits": splits,
             "inline_s3_urls": inline_s3_urls,
             "limit": min(limit, 100) if limit is not None else 100,
         }
@@ -3155,6 +3165,7 @@ def update_example(
         inputs: Optional[Dict[str, Any]] = None,
         outputs: Optional[Mapping[str, Any]] = None,
         metadata: Optional[Dict] = None,
+        split: Optional[str] = None,
         dataset_id: Optional[ID_TYPE] = None,
     ) -> Dict[str, Any]:
         """Update a specific example.
@@ -3182,6 +3193,7 @@ def update_example(
             outputs=outputs,
             dataset_id=dataset_id,
             metadata=metadata,
+            split=split,
         )
         response = self.session.patch(
             f"{self.api_url}/examples/{_as_uuid(example_id, 'example_id')}",

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -63,6 +63,7 @@ class ExampleCreate(ExampleBase):
 
     id: Optional[UUID]
     created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    split: Optional[str] = None
 
 
 class Example(ExampleBase):
@@ -105,6 +106,7 @@ class ExampleUpdate(BaseModel):
     inputs: Optional[Dict[str, Any]] = None
     outputs: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
+    split: Optional[str] = None
 
     class Config:
         """Configuration class for the schema."""

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.1.58"
+version = "0.1.59"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <[email protected]>"]
 license = "MIT"

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
@@ -109,25 +109,50 @@ def test_datasets(langchain_client: Client) -> None:
 def test_list_examples(langchain_client: Client) -> None:
     """Test list_examples."""
     examples = [
-        ("Shut up, idiot", "Toxic"),
-        ("You're a wonderful person", "Not toxic"),
-        ("This is the worst thing ever", "Toxic"),
-        ("I had a great day today", "Not toxic"),
-        ("Nobody likes you", "Toxic"),
-        ("This is unacceptable. I want to speak to the manager.", "Not toxic"),
+        ("Shut up, idiot", "Toxic", "train"),
+        ("You're a wonderful person", "Not toxic", "test"),
+        ("This is the worst thing ever", "Toxic", "train"),
+        ("I had a great day today", "Not toxic", "test"),
+        ("Nobody likes you", "Toxic", "train"),
+        ("This is unacceptable. I want to speak to the manager.", "Not toxic", None),
     ]
 
     dataset_name = "__test_list_examples" + uuid4().hex[:4]
     dataset = langchain_client.create_dataset(dataset_name=dataset_name)
-    inputs, outputs = zip(
-        *[({"text": text}, {"label": label}) for text, label in examples]
+    inputs, outputs, splits = zip(
+        *[({"text": text}, {"label": label}, split) for text, label, split in examples]
     )
     langchain_client.create_examples(
-        inputs=inputs, outputs=outputs, dataset_id=dataset.id
+        inputs=inputs, outputs=outputs, splits=splits, dataset_id=dataset.id
     )
     example_list = list(langchain_client.list_examples(dataset_id=dataset.id))
     assert len(example_list) == len(examples)
 
+    example_list = list(
+        langchain_client.list_examples(dataset_id=dataset.id, splits=["train"])
+    )
+    assert len(example_list) == 3
+
+    example_list = list(
+        langchain_client.list_examples(dataset_id=dataset.id, splits=["test"])
+    )
+    assert len(example_list) == 2
+
+    example_list = list(
+        langchain_client.list_examples(dataset_id=dataset.id, splits=["train", "test"])
+    )
+    assert len(example_list) == 5
+
+    langchain_client.update_example(
+        example_id=[
+            example.id
+            for example in example_list
+            if example.metadata is not None
+            and example.metadata.get("dataset_split") == "test"
+        ][0],
+        split="train",
+    )
+
     langchain_client.create_example(
         inputs={"text": "What's up!"},
         outputs={"label": "Not toxic"},