diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index f28d73ede..e862470f7 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -42,7 +42,7 @@ jobs:
       - name: Install dependencies
         run: |
           poetry install --with dev
-          poetry run pip install -U langchain
+          poetry run pip install -U langchain langchain_anthropic langchain_openai rapidfuzz
       - name: Run Python integration tests
         uses: ./.github/actions/python-integration-tests
         with:
diff --git a/js/src/client.ts b/js/src/client.ts
index df427edbb..995d59668 100644
--- a/js/src/client.ts
+++ b/js/src/client.ts
@@ -239,7 +239,7 @@ export type CreateExampleOptions = {
   exampleId?: string;
 
   metadata?: KVMap;
-  split?: string;
+  split?: string | string[];
 };
 
 type AutoBatchQueueItem = {
@@ -2036,7 +2036,7 @@ export class Client {
     inputs: Array<KVMap>;
     outputs?: Array<KVMap>;
     metadata?: Array<KVMap>;
-    splits?: Array<string>;
+    splits?: Array<string | Array<string>>;
     sourceRunIds?: Array<string>;
     exampleIds?: Array<string>;
     datasetId?: string;
diff --git a/js/src/evaluation/_runner.ts b/js/src/evaluation/_runner.ts
index cdced3c67..f3f981dae 100644
--- a/js/src/evaluation/_runner.ts
+++ b/js/src/evaluation/_runner.ts
@@ -694,6 +694,21 @@ class _ExperimentManager {
     ).date;
   }
 
+  async _getDatasetSplits(): Promise<string[] | undefined> {
+    const examples = await this.getExamples();
+    const allSplits = examples.reduce((acc, ex) => {
+      if (ex.metadata && ex.metadata.dataset_split) {
+        if (Array.isArray(ex.metadata.dataset_split)) {
+          ex.metadata.dataset_split.forEach((split) => acc.add(split));
+        } else if (typeof ex.metadata.dataset_split === "string") {
+          acc.add(ex.metadata.dataset_split);
+        }
+      }
+      return acc;
+    }, new Set<string>());
+    return allSplits.size ? Array.from(allSplits) : undefined;
+  }
+
   async _end(): Promise<void> {
     const experiment = this._experiment;
     if (!experiment) {
@@ -701,6 +716,7 @@ class _ExperimentManager {
     }
     const projectMetadata = await this._getExperimentMetadata();
     projectMetadata["dataset_version"] = await this._getDatasetVersion();
+    projectMetadata["dataset_splits"] = await this._getDatasetSplits();
     // Update revision_id if not already set
     if (!projectMetadata["revision_id"]) {
       projectMetadata["revision_id"] = await getDefaultRevisionId();
diff --git a/js/src/schemas.ts b/js/src/schemas.ts
index ee8a11036..f33ee1f80 100644
--- a/js/src/schemas.ts
+++ b/js/src/schemas.ts
@@ -229,7 +229,7 @@ export interface RunUpdate {
 export interface ExampleCreate extends BaseExample {
   id?: string;
   created_at?: string;
-  split?: string;
+  split?: string | string[];
 }
 
 export interface Example extends BaseExample {
@@ -245,7 +245,7 @@ export interface ExampleUpdate {
   inputs?: KVMap;
   outputs?: KVMap;
   metadata?: KVMap;
-  split?: string;
+  split?: string | string[];
 }
 export interface BaseDataset {
   name: string;
diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts
index 7637bb821..0b87522e9 100644
--- a/js/src/tests/client.int.test.ts
+++ b/js/src/tests/client.int.test.ts
@@ -97,12 +97,22 @@ test.concurrent("Test LangSmith Client Dataset CRD", async () => {
   await client.updateExample(example.id, {
     inputs: { col1: "updatedExampleCol1" },
     outputs: { col2: "updatedExampleCol2" },
-    split: "my_split2",
+    split: ["my_split2"],
   });
   // Says 'example updated' or something similar
   const newExampleValue = await client.readExample(example.id);
   expect(newExampleValue.inputs.col1).toBe("updatedExampleCol1");
-  expect(newExampleValue.metadata?.dataset_split).toBe("my_split2");
+  expect(newExampleValue.metadata?.dataset_split).toStrictEqual(["my_split2"]);
+
+  await client.updateExample(example.id, {
+    inputs: { col1: "updatedExampleCol3" },
+    outputs: { col2: "updatedExampleCol4" },
+    split: "my_split3",
+  });
+  // Says 'example updated' or something similar
+  const newExampleValue2 = await client.readExample(example.id);
+  expect(newExampleValue2.inputs.col1).toBe("updatedExampleCol3");
+  expect(newExampleValue2.metadata?.dataset_split).toStrictEqual(["my_split3"]);
   await client.deleteExample(example.id);
   const examples2 = await toArray(
     client.listExamples({ datasetId: newDataset.id })
@@ -489,7 +499,7 @@ test.concurrent(
         { output: "hi there 3" },
       ],
       metadata: [{ key: "value 1" }, { key: "value 2" }, { key: "value 3" }],
-      splits: ["train", "test", "train"],
+      splits: ["train", "test", ["train", "validation"]],
       datasetId: dataset.id,
     });
     const initialExamplesList = await toArray(
@@ -520,19 +530,20 @@ test.concurrent(
     );
     expect(example1?.outputs?.output).toEqual("hi there 1");
     expect(example1?.metadata?.key).toEqual("value 1");
-    expect(example1?.metadata?.dataset_split).toEqual("train");
+    expect(example1?.metadata?.dataset_split).toEqual(["train"]);
     const example2 = examplesList2.find(
       (e) => e.inputs.input === "hello world 2"
     );
     expect(example2?.outputs?.output).toEqual("hi there 2");
     expect(example2?.metadata?.key).toEqual("value 2");
-    expect(example2?.metadata?.dataset_split).toEqual("test");
+    expect(example2?.metadata?.dataset_split).toEqual(["test"]);
     const example3 = examplesList2.find(
       (e) => e.inputs.input === "hello world 3"
     );
     expect(example3?.outputs?.output).toEqual("hi there 3");
     expect(example3?.metadata?.key).toEqual("value 3");
-    expect(example3?.metadata?.dataset_split).toEqual("train");
+    expect(example3?.metadata?.dataset_split).toContain("train");
+    expect(example3?.metadata?.dataset_split).toContain("validation");
 
     await client.createExample(
       { input: "hello world" },
diff --git a/js/src/tests/evaluate.int.test.ts b/js/src/tests/evaluate.int.test.ts
index 198f66473..5e1321d06 100644
--- a/js/src/tests/evaluate.int.test.ts
+++ b/js/src/tests/evaluate.int.test.ts
@@ -1,6 +1,6 @@
 import { EvaluationResult } from "../evaluation/evaluator.js";
 import { evaluate } from "../evaluation/_runner.js";
-import { Example, Run } from "../schemas.js";
+import { Example, Run, TracerSession } from "../schemas.js";
 import { Client } from "../index.js";
 import { afterAll, beforeAll } from "@jest/globals";
 import { RunnableLambda } from "@langchain/core/runnables";
@@ -30,6 +30,13 @@ afterAll(async () => {
   await client.deleteDataset({
     datasetName: TESTING_DATASET_NAME,
   });
+  try {
+    await client.deleteDataset({
+      datasetName: "my_splits_ds2",
+    });
+  } catch (_) {
+    //pass
+  }
 });
 
 test("evaluate can evaluate", async () => {
@@ -351,6 +358,82 @@ test("can pass multiple evaluators", async () => {
   );
 });
 
+test("split info saved correctly", async () => {
+  const client = new Client();
+  // create a new dataset
+  await client.createDataset("my_splits_ds2", {
+    description:
+      "For testing purposed. Is created & deleted for each test run.",
+  });
+  // create examples
+  await client.createExamples({
+    inputs: [{ input: 1 }, { input: 2 }, { input: 3 }],
+    outputs: [{ output: 2 }, { output: 3 }, { output: 4 }],
+    splits: [["test"], ["train"], ["validation", "test"]],
+    datasetName: "my_splits_ds2",
+  });
+
+  const targetFunc = (input: Record<string, any>) => {
+    console.log("__input__", input);
+    return {
+      foo: input.input + 1,
+    };
+  };
+  await evaluate(targetFunc, {
+    data: client.listExamples({ datasetName: "my_splits_ds2" }),
+    description: "splits info saved correctly",
+  });
+
+  const exp = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
+  let myExp: TracerSession | null = null;
+  for await (const session of exp) {
+    myExp = session;
+  }
+  expect(myExp?.extra?.metadata?.dataset_splits.sort()).toEqual(
+    ["test", "train", "validation"].sort()
+  );
+
+  await evaluate(targetFunc, {
+    data: client.listExamples({
+      datasetName: "my_splits_ds2",
+      splits: ["test"],
+    }),
+    description: "splits info saved correctly",
+  });
+
+  const exp2 = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
+  let myExp2: TracerSession | null = null;
+  for await (const session of exp2) {
+    if (myExp2 === null || session.start_time > myExp2.start_time) {
+      myExp2 = session;
+    }
+  }
+
+  expect(myExp2?.extra?.metadata?.dataset_splits.sort()).toEqual(
+    ["test", "validation"].sort()
+  );
+
+  await evaluate(targetFunc, {
+    data: client.listExamples({
+      datasetName: "my_splits_ds2",
+      splits: ["train"],
+    }),
+    description: "splits info saved correctly",
+  });
+
+  const exp3 = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
+  let myExp3: TracerSession | null = null;
+  for await (const session of exp3) {
+    if (myExp3 === null || session.start_time > myExp3.start_time) {
+      myExp3 = session;
+    }
+  }
+
+  expect(myExp3?.extra?.metadata?.dataset_splits.sort()).toEqual(
+    ["train"].sort()
+  );
+});
+
 test("can pass multiple summary evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
     console.log("__input__", input);
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 9a822fd90..f63a47f65 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -2936,7 +2936,7 @@ def create_examples(
         inputs: Sequence[Mapping[str, Any]],
         outputs: Optional[Sequence[Optional[Mapping[str, Any]]]] = None,
         metadata: Optional[Sequence[Optional[Mapping[str, Any]]]] = None,
-        splits: Optional[Sequence[Optional[str]]] = None,
+        splits: Optional[Sequence[Optional[str | List[str]]]] = None,
         source_run_ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
         ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
         dataset_id: Optional[ID_TYPE] = None,
@@ -2953,6 +2953,9 @@ def create_examples(
             The output values for the examples.
         metadata : Optional[Sequence[Optional[Mapping[str, Any]]]], default=None
             The metadata for the examples.
+        split :  Optional[Sequence[Optional[str | List[str]]]], default=None
+            The splits for the examples, which are divisions
+            of your dataset such as 'train', 'test', or 'validation'.
         source_run_ids : Optional[Sequence[Optional[ID_TYPE]]], default=None
                 The IDs of the source runs associated with the examples.
         ids : Optional[Sequence[ID_TYPE]], default=None
@@ -3012,7 +3015,7 @@ def create_example(
         created_at: Optional[datetime.datetime] = None,
         outputs: Optional[Mapping[str, Any]] = None,
         metadata: Optional[Mapping[str, Any]] = None,
-        split: Optional[str] = None,
+        split: Optional[str | List[str]] = None,
         example_id: Optional[ID_TYPE] = None,
     ) -> ls_schemas.Example:
         """Create a dataset example in the LangSmith API.
@@ -3034,6 +3037,9 @@ def create_example(
                 The output values for the example.
             metadata : Mapping[str, Any] or None, default=None
                 The metadata for the example.
+            split : str or List[str] or None, default=None
+                The splits for the example, which are divisions
+                of your dataset such as 'train', 'test', or 'validation'.
             exemple_id : UUID or None, default=None
                 The ID of the example to create. If not provided, a new
                 example will be created.
@@ -3165,7 +3171,7 @@ def update_example(
         inputs: Optional[Dict[str, Any]] = None,
         outputs: Optional[Mapping[str, Any]] = None,
         metadata: Optional[Dict] = None,
-        split: Optional[str] = None,
+        split: Optional[str | List[str]] = None,
         dataset_id: Optional[ID_TYPE] = None,
     ) -> Dict[str, Any]:
         """Update a specific example.
@@ -3180,6 +3186,9 @@ def update_example(
             The output values to update.
         metadata : Dict or None, default=None
             The metadata to update.
+        split : str or List[str] or None, default=None
+            The dataset split to update, such as
+            'train', 'test', or 'validation'.
         dataset_id : UUID or None, default=None
             The ID of the dataset to update.
 
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 3c07ed165..27910b90b 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1322,6 +1322,23 @@ def _get_dataset_version(self) -> Optional[str]:
         max_modified_at = max(modified_at) if modified_at else None
         return max_modified_at.isoformat() if max_modified_at else None
 
+    def _get_dataset_splits(self) -> Optional[list[str]]:
+        examples = list(self.examples)
+        splits = set()
+        for example in examples:
+            if (
+                example.metadata
+                and example.metadata.get("dataset_split")
+                and isinstance(example.metadata["dataset_split"], list)
+            ):
+                for split in example.metadata["dataset_split"]:
+                    if isinstance(split, str):
+                        splits.add(split)
+            else:
+                splits.add("base")
+
+        return list(splits)
+
     def _end(self) -> None:
         experiment = self._experiment
         if experiment is None:
@@ -1329,6 +1346,7 @@ def _end(self) -> None:
 
         project_metadata = self._get_experiment_metadata()
         project_metadata["dataset_version"] = self._get_dataset_version()
+        project_metadata["dataset_splits"] = self._get_dataset_splits()
         self.client.update_project(
             experiment.id,
             end_time=datetime.datetime.now(datetime.timezone.utc),
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index ee57ffd32..758530e03 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -63,7 +63,7 @@ class ExampleCreate(ExampleBase):
 
     id: Optional[UUID]
     created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
-    split: Optional[str] = None
+    split: Optional[Union[str, List[str]]] = None
 
 
 class Example(ExampleBase):
@@ -106,7 +106,7 @@ class ExampleUpdate(BaseModel):
     inputs: Optional[Dict[str, Any]] = None
     outputs: Optional[Dict[str, Any]] = None
     metadata: Optional[Dict[str, Any]] = None
-    split: Optional[str] = None
+    split: Optional[Union[str, List[str]]] = None
 
     class Config:
         """Configuration class for the schema."""
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index c037bfd65..9107cc9f9 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -109,9 +109,9 @@ def test_datasets(langchain_client: Client) -> None:
 def test_list_examples(langchain_client: Client) -> None:
     """Test list_examples."""
     examples = [
-        ("Shut up, idiot", "Toxic", "train"),
+        ("Shut up, idiot", "Toxic", ["train", "validation"]),
         ("You're a wonderful person", "Not toxic", "test"),
-        ("This is the worst thing ever", "Toxic", "train"),
+        ("This is the worst thing ever", "Toxic", ["train"]),
         ("I had a great day today", "Not toxic", "test"),
         ("Nobody likes you", "Toxic", "train"),
         ("This is unacceptable. I want to speak to the manager.", "Not toxic", None),
@@ -133,6 +133,11 @@ def test_list_examples(langchain_client: Client) -> None:
     )
     assert len(example_list) == 3
 
+    example_list = list(
+        langchain_client.list_examples(dataset_id=dataset.id, splits=["validation"])
+    )
+    assert len(example_list) == 1
+
     example_list = list(
         langchain_client.list_examples(dataset_id=dataset.id, splits=["test"])
     )
@@ -148,11 +153,21 @@ def test_list_examples(langchain_client: Client) -> None:
             example.id
             for example in example_list
             if example.metadata is not None
-            and example.metadata.get("dataset_split") == "test"
+            and "test" in example.metadata.get("dataset_split", [])
         ][0],
         split="train",
     )
 
+    example_list = list(
+        langchain_client.list_examples(dataset_id=dataset.id, splits=["test"])
+    )
+    assert len(example_list) == 1
+
+    example_list = list(
+        langchain_client.list_examples(dataset_id=dataset.id, splits=["train"])
+    )
+    assert len(example_list) == 4
+
     langchain_client.create_example(
         inputs={"text": "What's up!"},
         outputs={"label": "Not toxic"},