From 1e5d1a911cf59e32b2f3940f085e82364f0faa47 Mon Sep 17 00:00:00 2001
From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:37:45 -0700
Subject: [PATCH] test flakiness

---
 js/src/tests/evaluate.int.test.ts | 39 ++++++++++++++-----------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/js/src/tests/evaluate.int.test.ts b/js/src/tests/evaluate.int.test.ts
index 733b68a6d..98ab6c6c8 100644
--- a/js/src/tests/evaluate.int.test.ts
+++ b/js/src/tests/evaluate.int.test.ts
@@ -7,8 +7,9 @@ import { Example, Run, TracerSession } from "../schemas.js";
 import { Client } from "../index.js";
 import { afterAll, beforeAll } from "@jest/globals";
 import { RunnableLambda, RunnableSequence } from "@langchain/core/runnables";
-
-const TESTING_DATASET_NAME = "test_dataset_js_evaluate_123";
+import { v4 as uuidv4 } from "uuid";
+const TESTING_DATASET_NAME = `test_dataset_js_evaluate_${uuidv4()}`;
+const TESTING_DATASET_NAME2 = `my_splits_ds_${uuidv4()}`;
 
 beforeAll(async () => {
   const client = new Client();
@@ -46,7 +47,6 @@ afterAll(async () => {
 
 test("evaluate can evaluate", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -84,7 +84,6 @@ test("evaluate can evaluate", async () => {
 
 test("evaluate can repeat", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -184,7 +183,6 @@ test("evaluate can evaluate with RunEvaluator evaluators", async () => {
 
 test("evaluate can evaluate with custom evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -256,7 +254,6 @@ test("evaluate can evaluate with custom evaluators", async () => {
 
 test("evaluate can evaluate with summary evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -314,7 +311,6 @@ test("evaluate can evaluate with summary evaluators", async () => {
 
 test.skip("can iterate over evaluate results", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -343,7 +339,6 @@ test.skip("can iterate over evaluate results", async () => {
 
 test("can pass multiple evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -391,7 +386,7 @@ test("can pass multiple evaluators", async () => {
 test("split info saved correctly", async () => {
   const client = new Client();
   // create a new dataset
-  await client.createDataset("my_splits_ds2", {
+  await client.createDataset(TESTING_DATASET_NAME2, {
     description:
       "For testing purposed. Is created & deleted for each test run.",
   });
@@ -400,21 +395,22 @@ test("split info saved correctly", async () => {
     inputs: [{ input: 1 }, { input: 2 }, { input: 3 }],
     outputs: [{ output: 2 }, { output: 3 }, { output: 4 }],
     splits: [["test"], ["train"], ["validation", "test"]],
-    datasetName: "my_splits_ds2",
+    datasetName: TESTING_DATASET_NAME2,
   });
 
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
   };
   await evaluate(targetFunc, {
-    data: client.listExamples({ datasetName: "my_splits_ds2" }),
+    data: client.listExamples({ datasetName: TESTING_DATASET_NAME2 }),
     description: "splits info saved correctly",
   });
 
-  const exp = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
+  const exp = client.listProjects({
+    referenceDatasetName: TESTING_DATASET_NAME2,
+  });
   let myExp: TracerSession | null = null;
   for await (const session of exp) {
     myExp = session;
@@ -425,13 +421,15 @@ test("split info saved correctly", async () => {
 
   await evaluate(targetFunc, {
     data: client.listExamples({
-      datasetName: "my_splits_ds2",
+      datasetName: TESTING_DATASET_NAME2,
       splits: ["test"],
     }),
     description: "splits info saved correctly",
   });
 
-  const exp2 = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
+  const exp2 = client.listProjects({
+    referenceDatasetName: TESTING_DATASET_NAME2,
+  });
   let myExp2: TracerSession | null = null;
   for await (const session of exp2) {
     if (myExp2 === null || session.start_time > myExp2.start_time) {
@@ -445,13 +443,15 @@ test("split info saved correctly", async () => {
 
   await evaluate(targetFunc, {
     data: client.listExamples({
-      datasetName: "my_splits_ds2",
+      datasetName: TESTING_DATASET_NAME2,
       splits: ["train"],
     }),
     description: "splits info saved correctly",
   });
 
-  const exp3 = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
+  const exp3 = client.listProjects({
+    referenceDatasetName: TESTING_DATASET_NAME2,
+  });
   let myExp3: TracerSession | null = null;
   for await (const session of exp3) {
     if (myExp3 === null || session.start_time > myExp3.start_time) {
@@ -466,7 +466,6 @@ test("split info saved correctly", async () => {
 
 test("can pass multiple summary evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -517,7 +516,6 @@ test("can pass AsyncIterable of Example's to evaluator instead of dataset name",
   });
 
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -551,7 +549,6 @@ test("can pass AsyncIterable of Example's to evaluator instead of dataset name",
 
 test("max concurrency works with custom evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -587,7 +584,6 @@ test("max concurrency works with custom evaluators", async () => {
 
 test("max concurrency works with summary evaluators", async () => {
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };
@@ -704,7 +700,6 @@ test("evaluate can accept array of examples", async () => {
   }
 
   const targetFunc = (input: Record<string, any>) => {
-    console.log("__input__", input);
     return {
       foo: input.input + 1,
     };