Skip to content

Commit

Permalink
test flakiness
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw committed Aug 19, 2024
1 parent 75f7db0 commit 1e5d1a9
Showing 1 changed file with 17 additions and 22 deletions.
39 changes: 17 additions & 22 deletions js/src/tests/evaluate.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ import { Example, Run, TracerSession } from "../schemas.js";
import { Client } from "../index.js";
import { afterAll, beforeAll } from "@jest/globals";
import { RunnableLambda, RunnableSequence } from "@langchain/core/runnables";

const TESTING_DATASET_NAME = "test_dataset_js_evaluate_123";
import { v4 as uuidv4 } from "uuid";
const TESTING_DATASET_NAME = `test_dataset_js_evaluate_${uuidv4()}`;
const TESTING_DATASET_NAME2 = `my_splits_ds_${uuidv4()}`;

beforeAll(async () => {
const client = new Client();
Expand Down Expand Up @@ -46,7 +47,6 @@ afterAll(async () => {

test("evaluate can evaluate", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -84,7 +84,6 @@ test("evaluate can evaluate", async () => {

test("evaluate can repeat", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -184,7 +183,6 @@ test("evaluate can evaluate with RunEvaluator evaluators", async () => {

test("evaluate can evaluate with custom evaluators", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -256,7 +254,6 @@ test("evaluate can evaluate with custom evaluators", async () => {

test("evaluate can evaluate with summary evaluators", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -314,7 +311,6 @@ test("evaluate can evaluate with summary evaluators", async () => {

test.skip("can iterate over evaluate results", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -343,7 +339,6 @@ test.skip("can iterate over evaluate results", async () => {

test("can pass multiple evaluators", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -391,7 +386,7 @@ test("can pass multiple evaluators", async () => {
test("split info saved correctly", async () => {
const client = new Client();
// create a new dataset
await client.createDataset("my_splits_ds2", {
await client.createDataset(TESTING_DATASET_NAME2, {
description:
"For testing purposed. Is created & deleted for each test run.",
});
Expand All @@ -400,21 +395,22 @@ test("split info saved correctly", async () => {
inputs: [{ input: 1 }, { input: 2 }, { input: 3 }],
outputs: [{ output: 2 }, { output: 3 }, { output: 4 }],
splits: [["test"], ["train"], ["validation", "test"]],
datasetName: "my_splits_ds2",
datasetName: TESTING_DATASET_NAME2,
});

const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
};
await evaluate(targetFunc, {
data: client.listExamples({ datasetName: "my_splits_ds2" }),
data: client.listExamples({ datasetName: TESTING_DATASET_NAME2 }),
description: "splits info saved correctly",
});

const exp = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
const exp = client.listProjects({
referenceDatasetName: TESTING_DATASET_NAME2,
});
let myExp: TracerSession | null = null;
for await (const session of exp) {
myExp = session;
Expand All @@ -425,13 +421,15 @@ test("split info saved correctly", async () => {

await evaluate(targetFunc, {
data: client.listExamples({
datasetName: "my_splits_ds2",
datasetName: TESTING_DATASET_NAME2,
splits: ["test"],
}),
description: "splits info saved correctly",
});

const exp2 = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
const exp2 = client.listProjects({
referenceDatasetName: TESTING_DATASET_NAME2,
});
let myExp2: TracerSession | null = null;
for await (const session of exp2) {
if (myExp2 === null || session.start_time > myExp2.start_time) {
Expand All @@ -445,13 +443,15 @@ test("split info saved correctly", async () => {

await evaluate(targetFunc, {
data: client.listExamples({
datasetName: "my_splits_ds2",
datasetName: TESTING_DATASET_NAME2,
splits: ["train"],
}),
description: "splits info saved correctly",
});

const exp3 = client.listProjects({ referenceDatasetName: "my_splits_ds2" });
const exp3 = client.listProjects({
referenceDatasetName: TESTING_DATASET_NAME2,
});
let myExp3: TracerSession | null = null;
for await (const session of exp3) {
if (myExp3 === null || session.start_time > myExp3.start_time) {
Expand All @@ -466,7 +466,6 @@ test("split info saved correctly", async () => {

test("can pass multiple summary evaluators", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -517,7 +516,6 @@ test("can pass AsyncIterable of Example's to evaluator instead of dataset name",
});

const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -551,7 +549,6 @@ test("can pass AsyncIterable of Example's to evaluator instead of dataset name",

test("max concurrency works with custom evaluators", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -587,7 +584,6 @@ test("max concurrency works with custom evaluators", async () => {

test("max concurrency works with summary evaluators", async () => {
const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down Expand Up @@ -704,7 +700,6 @@ test("evaluate can accept array of examples", async () => {
}

const targetFunc = (input: Record<string, any>) => {
console.log("__input__", input);
return {
foo: input.input + 1,
};
Expand Down

0 comments on commit 1e5d1a9

Please sign in to comment.