diff --git a/js/src/client.ts b/js/src/client.ts index 1c34f731b..23f33d515 100644 --- a/js/src/client.ts +++ b/js/src/client.ts @@ -35,6 +35,12 @@ import { AnnotationQueue, RunWithAnnotationQueueInfo, Attachments, + ExampleUploadWithAttachments, + UploadExamplesResponse, + ExampleUpdateWithAttachments, + UpdateExamplesResponse, + RawExample, + AttachmentInfo, } from "./schemas.js"; import { convertLangChainMessageToExample, @@ -760,6 +766,13 @@ export class Client implements LangSmithTracingClientInterface { ); } + private async _getMultiPartSupport(): Promise { + const serverInfo = await this._ensureServerInfo(); + return ( + serverInfo.instance_flags?.dataset_examples_multipart_enabled ?? false + ); + } + private drainAutoBatchQueue(batchSizeLimit: number) { while (this.autoBatchQueue.items.length > 0) { const [batch, done] = this.autoBatchQueue.pop(batchSizeLimit); @@ -2715,7 +2728,22 @@ export class Client implements LangSmithTracingClientInterface { public async readExample(exampleId: string): Promise { assertUuid(exampleId); const path = `/examples/${exampleId}`; - return await this._get(path); + const rawExample: RawExample = await this._get(path); + const { attachment_urls, ...rest } = rawExample; + const example: Example = rest; + if (attachment_urls) { + // add attachments back to the example + example.attachments = Object.entries(attachment_urls).reduce( + (acc, [key, value]) => { + acc[key] = { + presigned_url: value.presigned_url, + }; + return acc; + }, + {} as Record + ); + } + return example; } public async *listExamples({ @@ -2729,6 +2757,7 @@ export class Client implements LangSmithTracingClientInterface { limit, offset, filter, + includeAttachments, }: { datasetId?: string; datasetName?: string; @@ -2740,6 +2769,7 @@ export class Client implements LangSmithTracingClientInterface { limit?: number; offset?: number; filter?: string; + includeAttachments?: boolean; } = {}): AsyncIterable { let datasetId_; if (datasetId !== undefined && datasetName !== undefined) { @@ -2786,12 +2816,30 @@ export class Client implements LangSmithTracingClientInterface { if (filter !== undefined) { params.append("filter", filter); } + if (includeAttachments === true) { + ["attachment_urls", "outputs", "metadata"].forEach((field) => + params.append("select", field) + ); + } let i = 0; - for await (const examples of this._getPaginated( + for await (const rawExamples of this._getPaginated( "/examples", params )) { - for (const example of examples) { + for (const rawExample of rawExamples) { + const { attachment_urls, ...rest } = rawExample; + const example: Example = rest; + if (attachment_urls) { + example.attachments = Object.entries(attachment_urls).reduce( + (acc, [key, value]) => { + acc[key] = { + presigned_url: value.presigned_url, + }; + return acc; + }, + {} as Record + ); + } yield example; i++; } @@ -3847,6 +3895,173 @@ export class Client implements LangSmithTracingClientInterface { ); } + /** + * Update examples with attachments using multipart form data. + * @param updates List of ExampleUpdateWithAttachments objects to upsert + * @returns Promise with the update response + */ + public async updateExamplesMultipart( + datasetId: string, + updates: ExampleUpdateWithAttachments[] = [] + ): Promise { + if (!(await this._getMultiPartSupport())) { + throw new Error( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ); + } + const formData = new FormData(); + + for (const example of updates) { + const exampleId = example.id; + + // Prepare the main example body + const exampleBody = { + ...(example.metadata && { metadata: example.metadata }), + ...(example.split && { split: example.split }), + }; + + // Add main example data + const stringifiedExample = stringifyForTracing(exampleBody); + const exampleBlob = new Blob([stringifiedExample], { + type: "application/json", + }); + formData.append(exampleId, exampleBlob); + + // Add inputs + if (example.inputs) { + const stringifiedInputs = stringifyForTracing(example.inputs); + const inputsBlob = new Blob([stringifiedInputs], { + type: "application/json", + }); + formData.append(`${exampleId}.inputs`, inputsBlob); + } + + // Add outputs if present + if (example.outputs) { + const stringifiedOutputs = stringifyForTracing(example.outputs); + const outputsBlob = new Blob([stringifiedOutputs], { + type: "application/json", + }); + formData.append(`${exampleId}.outputs`, outputsBlob); + } + + // Add attachments if present + if (example.attachments) { + for (const [name, [mimeType, data]] of Object.entries( + example.attachments + )) { + const attachmentBlob = new Blob([data], { + type: `${mimeType}; length=${data.byteLength}`, + }); + formData.append(`${exampleId}.attachment.${name}`, attachmentBlob); + } + } + + if (example.attachments_operations) { + const stringifiedAttachmentsOperations = stringifyForTracing( + example.attachments_operations + ); + const attachmentsOperationsBlob = new Blob( + [stringifiedAttachmentsOperations], + { + type: "application/json", + } + ); + formData.append( + `${exampleId}.attachments_operations`, + attachmentsOperationsBlob + ); + } + } + + const response = await this.caller.call( + _getFetchImplementation(), + `${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`, + { + method: "PATCH", + headers: this.headers, + body: formData, + } + ); + const result = await response.json(); + return result; + } + + /** + * Upload examples with attachments using multipart form data. + * @param uploads List of ExampleUploadWithAttachments objects to upload + * @returns Promise with the upload response + */ + public async uploadExamplesMultipart( + datasetId: string, + uploads: ExampleUploadWithAttachments[] = [] + ): Promise { + if (!(await this._getMultiPartSupport())) { + throw new Error( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ); + } + const formData = new FormData(); + + for (const example of uploads) { + const exampleId = (example.id ?? uuid.v4()).toString(); + + // Prepare the main example body + const exampleBody = { + created_at: example.created_at, + ...(example.metadata && { metadata: example.metadata }), + ...(example.split && { split: example.split }), + }; + + // Add main example data + const stringifiedExample = stringifyForTracing(exampleBody); + const exampleBlob = new Blob([stringifiedExample], { + type: "application/json", + }); + formData.append(exampleId, exampleBlob); + + // Add inputs + const stringifiedInputs = stringifyForTracing(example.inputs); + const inputsBlob = new Blob([stringifiedInputs], { + type: "application/json", + }); + formData.append(`${exampleId}.inputs`, inputsBlob); + + // Add outputs if present + if (example.outputs) { + const stringifiedOutputs = stringifyForTracing(example.outputs); + const outputsBlob = new Blob([stringifiedOutputs], { + type: "application/json", + }); + formData.append(`${exampleId}.outputs`, outputsBlob); + } + + // Add attachments if present + if (example.attachments) { + for (const [name, [mimeType, data]] of Object.entries( + example.attachments + )) { + const attachmentBlob = new Blob([data], { + type: `${mimeType}; length=${data.byteLength}`, + }); + formData.append(`${exampleId}.attachment.${name}`, attachmentBlob); + } + } + } + + const response = await this.caller.call( + _getFetchImplementation(), + `${this.apiUrl}/v1/platform/datasets/${datasetId}/examples`, + { + method: "POST", + headers: this.headers, + body: formData, + } + ); + const result = await response.json(); + return result; + } + public async updatePrompt( promptIdentifier: string, options?: { diff --git a/js/src/evaluation/_runner.ts b/js/src/evaluation/_runner.ts index cac6b5f4c..6c74afa34 100644 --- a/js/src/evaluation/_runner.ts +++ b/js/src/evaluation/_runner.ts @@ -1,5 +1,12 @@ import { Client, RunTree, RunTreeConfig } from "../index.js"; -import { BaseRun, Example, KVMap, Run, TracerSession } from "../schemas.js"; +import { + AttachmentInfo, + BaseRun, + Example, + KVMap, + Run, + TracerSession, +} from "../schemas.js"; import { traceable } from "../traceable.js"; import { getDefaultRevisionId, getGitInfo } from "../utils/_git.js"; import { assertUuid } from "../utils/_uuid.js"; @@ -22,11 +29,15 @@ import { ComparativeEvaluator, } from "./evaluate_comparative.js"; +export type TargetConfigT = KVMap & { + attachments?: Record; + callbacks?: any; +}; type StandardTargetT = - | ((input: TInput, config?: KVMap) => Promise) - | ((input: TInput, config?: KVMap) => TOutput) - | { invoke: (input: TInput, config?: KVMap) => TOutput } - | { invoke: (input: TInput, config?: KVMap) => Promise }; + | ((input: TInput, config?: TargetConfigT) => Promise) + | ((input: TInput, config?: TargetConfigT) => TOutput) + | { invoke: (input: TInput, config?: TargetConfigT) => TOutput } + | { invoke: (input: TInput, config?: TargetConfigT) => Promise }; type ComparativeTargetT = | Array @@ -98,6 +109,7 @@ export type EvaluatorT = inputs: Record; outputs: Record; referenceOutputs?: Record; + attachments?: Record; }) => EvaluationResult | EvaluationResults) | ((args: { run: Run; @@ -105,6 +117,7 @@ export type EvaluatorT = inputs: Record; outputs: Record; referenceOutputs?: Record; + attachments?: Record; }) => Promise); interface _ForwardResults { @@ -127,6 +140,7 @@ interface _ExperimentManagerArgs { examples?: Example[]; numRepetitions?: number; _runsArray?: Run[]; + includeAttachments?: boolean; } type BaseEvaluateOptions = { @@ -178,6 +192,11 @@ export interface EvaluateOptions extends BaseEvaluateOptions { * examples, or a generator of examples. */ data: DataT; + /** + * Whether to use attachments for the experiment. + * @default false + */ + includeAttachments?: boolean; } export interface ComparativeEvaluateOptions extends BaseEvaluateOptions { @@ -256,6 +275,8 @@ export class _ExperimentManager { _metadata: KVMap; _description?: string; + _includeAttachments?: boolean; + get experimentName(): string { if (this._experimentName) { return this._experimentName; @@ -271,7 +292,10 @@ export class _ExperimentManager { if (!this._data) { throw new Error("Data not provided in this experiment."); } - const unresolvedData = _resolveData(this._data, { client: this.client }); + const unresolvedData = _resolveData(this._data, { + client: this.client, + includeAttachments: this._includeAttachments, + }); if (!this._examples) { this._examples = []; } @@ -369,6 +393,7 @@ export class _ExperimentManager { this._evaluationResults = args.evaluationResults; this._summaryResults = args.summaryResults; this._numRepetitions = args.numRepetitions; + this._includeAttachments = args.includeAttachments; } _getExperiment(): TracerSession { @@ -465,6 +490,7 @@ export class _ExperimentManager { client: this.client, evaluationResults: this._evaluationResults, summaryResults: this._summaryResults, + includeAttachments: this._includeAttachments, }); } @@ -485,6 +511,7 @@ export class _ExperimentManager { yield pred.run; } })(), + includeAttachments: this._includeAttachments, }); } @@ -515,6 +542,7 @@ export class _ExperimentManager { } })(), summaryResults: this._summaryResults, + includeAttachments: this._includeAttachments, }); } @@ -532,6 +560,7 @@ export class _ExperimentManager { _runsArray: this._runsArray, evaluationResults: this._evaluationResults, summaryResults: aggregateFeedbackGen, + includeAttachments: this._includeAttachments, }); } @@ -603,7 +632,8 @@ export class _ExperimentManager { example, this.experimentName, this._metadata, - this.client + this.client, + this._includeAttachments ); } } else { @@ -621,7 +651,8 @@ export class _ExperimentManager { example, this.experimentName, this._metadata, - this.client + this.client, + this._includeAttachments ) ); } @@ -794,9 +825,9 @@ export class _ExperimentManager { return strMiliseconds ?? ""; } - const jsDate = new Date(date); + const jsDate = new Date(date!); - let source = getMiliseconds(date); + let source = getMiliseconds(date!); let parsed = getMiliseconds(jsDate.toISOString()); const length = Math.max(source.length, parsed.length); @@ -943,6 +974,7 @@ async function _evaluate( experiment: experiment_ ?? fields.experimentPrefix, runs: newRuns ?? undefined, numRepetitions: fields.numRepetitions ?? 1, + includeAttachments: standardFields.includeAttachments, }).start(); if (_isCallable(target)) { @@ -972,7 +1004,8 @@ async function _forward( example: Example, experimentName: string, metadata: KVMap, - client: Client + client: Client, + includeAttachments?: boolean ): Promise<_ForwardResults> { let run: BaseRun | null = null; @@ -1006,16 +1039,29 @@ async function _forward( // no-op } // Issue with retrieving LangChain callbacks, rely on interop - if (langChainCallbacks === undefined) { + if (langChainCallbacks === undefined && !includeAttachments) { return await fn.invoke(inputs); - } else { + } else if (langChainCallbacks === undefined && includeAttachments) { + return await fn.invoke(inputs, { + attachments: example.attachments, + }); + } else if (!includeAttachments) { return await fn.invoke(inputs, { callbacks: langChainCallbacks }); + } else { + return await fn.invoke(inputs, { + attachments: example.attachments, + callbacks: langChainCallbacks, + }); } }, options) : traceable(fn, options); try { - await wrappedFn(example.inputs); + if (includeAttachments && !("invoke" in fn)) { + await wrappedFn(example.inputs, { attachments: example.attachments }); + } else { + await wrappedFn(example.inputs); + } } catch (e) { console.error(`Error running target function: ${e}`); printErrorStackTrace(e); @@ -1037,6 +1083,7 @@ function _resolveData( data: DataT, options: { client: Client; + includeAttachments?: boolean; } ): AsyncGenerator { let isUUID = false; @@ -1052,11 +1099,13 @@ function _resolveData( if (typeof data === "string" && isUUID) { return options.client.listExamples({ datasetId: data, + includeAttachments: options.includeAttachments, }) as AsyncGenerator; } if (typeof data === "string") { return options.client.listExamples({ datasetName: data, + includeAttachments: options.includeAttachments, }) as AsyncGenerator; } return data as AsyncGenerator; diff --git a/js/src/evaluation/evaluator.ts b/js/src/evaluation/evaluator.ts index 4e64460d3..cd7ce1fdd 100644 --- a/js/src/evaluation/evaluator.ts +++ b/js/src/evaluation/evaluator.ts @@ -138,6 +138,7 @@ export class DynamicRunEvaluator any> inputs: example?.inputs, outputs: run?.outputs, referenceOutputs: example?.outputs, + attachments: example?.attachments, }, example ); diff --git a/js/src/schemas.ts b/js/src/schemas.ts index 26afd7fc0..af7848e59 100644 --- a/js/src/schemas.ts +++ b/js/src/schemas.ts @@ -63,6 +63,10 @@ export interface BaseExample { source_run_id?: string; } +export interface AttachmentInfo { + presigned_url: string; +} + export type AttachmentData = Uint8Array | ArrayBuffer; export type Attachments = Record; @@ -248,12 +252,54 @@ export interface ExampleCreate extends BaseExample { split?: string | string[]; } +export interface ExampleUploadWithAttachments { + id?: string; + inputs: KVMap; + outputs?: KVMap; + metadata?: KVMap; + split?: string | string[]; + attachments?: Attachments; + created_at?: string; +} + +export interface ExampleUpdateWithAttachments { + id: string; + inputs?: KVMap; + outputs?: KVMap; + metadata?: KVMap; + split?: string | string[]; + attachments?: Attachments; + attachments_operations?: KVMap; +} + +export interface UploadExamplesResponse { + count: number; + example_ids: string[]; +} + +export interface UpdateExamplesResponse extends UploadExamplesResponse {} + export interface Example extends BaseExample { + id: string; + created_at: string; + modified_at?: string; + source_run_id?: string; + runs: Run[]; + attachments?: Record; + split?: string | string[]; +} + +interface RawAttachmentInfo { + presigned_url: string; + s3_url: string; +} +export interface RawExample extends BaseExample { id: string; created_at: string; modified_at: string; source_run_id?: string; runs: Run[]; + attachment_urls?: Record; } export interface ExampleUpdate { diff --git a/js/src/tests/client.int.test.ts b/js/src/tests/client.int.test.ts index 7b5d63f89..18582a01b 100644 --- a/js/src/tests/client.int.test.ts +++ b/js/src/tests/client.int.test.ts @@ -1,4 +1,10 @@ -import { Dataset, Example, Run, TracerSession } from "../schemas.js"; +import { + Dataset, + Example, + ExampleUpdateWithAttachments, + Run, + TracerSession, +} from "../schemas.js"; import { FunctionMessage, HumanMessage, @@ -7,6 +13,10 @@ import { import { Client } from "../client.js"; import { v4 as uuidv4 } from "uuid"; +import { ExampleUploadWithAttachments } from "../schemas.js"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; import { createRunsFactory, deleteDataset, @@ -583,7 +593,7 @@ test.concurrent( expect(examplesList2.length).toEqual(3); const datasetDiff = await client.diffDatasetVersions({ datasetId: dataset.id, - fromVersion: initialVersion, + fromVersion: initialVersion!, toVersion: "latest", }); expect(datasetDiff.examples_added.length).toEqual(3); @@ -1240,3 +1250,240 @@ test("annotationqueue crud", async () => { } } }); + +test("upload examples multipart", async () => { + const client = new Client(); + const datasetName = `__test_upload_examples_multipart${uuidv4().slice(0, 4)}`; + + // Clean up existing dataset if it exists + if (await client.hasDataset({ datasetName })) { + await client.deleteDataset({ datasetName }); + } + + // Create actual dataset + const dataset = await client.createDataset(datasetName, { + description: "Test dataset for multipart example upload", + dataType: "kv", + }); + + const pathname = path.join( + path.dirname(fileURLToPath(import.meta.url)), + "test_data", + "parrot-icon.png" + ); + // Create test examples + const exampleId = uuidv4(); + const example1: ExampleUploadWithAttachments = { + id: exampleId, + inputs: { text: "hello world" }, + // check that passing no outputs works fine + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const example2: ExampleUploadWithAttachments = { + inputs: { text: "foo bar" }, + outputs: { response: "baz" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + // Test creating examples + const createdExamples = await client.uploadExamplesMultipart(dataset.id, [ + example1, + example2, + ]); + + expect(createdExamples.count).toBe(2); + + const createdExample1 = await client.readExample(exampleId); + expect(createdExample1.inputs["text"]).toBe("hello world"); + + const createdExample2 = await client.readExample( + createdExamples.example_ids.find((id) => id !== exampleId)! + ); + expect(createdExample2.inputs["text"]).toBe("foo bar"); + expect(createdExample2.outputs?.["response"]).toBe("baz"); + + // Test examples were sent to correct dataset + const allExamplesInDataset = []; + for await (const example of client.listExamples({ + datasetId: dataset.id, + })) { + allExamplesInDataset.push(example); + } + expect(allExamplesInDataset.length).toBe(2); + + // Test invalid example fails + const example3: ExampleUploadWithAttachments = { + inputs: { text: "foo bar" }, + outputs: { response: "baz" }, + attachments: { + my_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + const errorResponse = await client.uploadExamplesMultipart(uuidv4(), [ + example3, + ]); + expect(errorResponse).toHaveProperty("error"); + + // Clean up + await client.deleteDataset({ datasetName }); +}); + +test("update examples multipart", async () => { + const client = new Client(); + const datasetName = `__test_update_examples_multipart${uuidv4().slice(0, 4)}`; + + // Clean up existing dataset if it exists + if (await client.hasDataset({ datasetName })) { + await client.deleteDataset({ datasetName }); + } + + // Create actual dataset + const dataset = await client.createDataset(datasetName, { + description: "Test dataset for multipart example upload", + dataType: "kv", + }); + + const pathname = path.join( + path.dirname(fileURLToPath(import.meta.url)), + "test_data", + "parrot-icon.png" + ); + // Create test examples + const exampleId = uuidv4(); + const example: ExampleUploadWithAttachments = { + id: exampleId, + metadata: { bar: "foo" }, + inputs: { text: "hello world" }, + // check that passing no outputs works fine + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + foo: ["image/png", fs.readFileSync(pathname)], + }, + }; + + // Create examples + await client.uploadExamplesMultipart(dataset.id, [example]); + + const exampleUpdate1: ExampleUpdateWithAttachments = { + id: exampleId, + inputs: { text: "hello world" }, + attachments_operations: { + retain: ["test_file"], + rename: { foo: "test_file" }, + }, + }; + + let response = await client.updateExamplesMultipart(dataset.id, [ + exampleUpdate1, + ]); + expect(response).toHaveProperty("error"); + + const exampleUpdate2: ExampleUpdateWithAttachments = { + id: exampleId, + inputs: { text: "hello world" }, + attachments_operations: { + retain: ["test_file"], + rename: { test_file: "test_file2" }, + }, + }; + + response = await client.updateExamplesMultipart(dataset.id, [exampleUpdate2]); + expect(response).toHaveProperty("error"); + + const exampleUpdate3: ExampleUpdateWithAttachments = { + id: exampleId, + inputs: { text: "hello world2" }, + attachments_operations: { + retain: ["test_file"], + rename: { foo: "bar" }, + }, + }; + + await client.updateExamplesMultipart(dataset.id, [exampleUpdate3]); + + let updatedExample = await client.readExample(exampleId); + expect(updatedExample.inputs.text).toEqual("hello world2"); + expect(Object.keys(updatedExample.attachments ?? {}).sort()).toEqual( + ["attachment.bar", "attachment.test_file"].sort() + ); + expect(updatedExample.metadata).toEqual({ bar: "foo" }); + let attachmentData: Uint8Array | undefined = updatedExample.attachments?.[ + "attachment.test_file" + ].presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.test_file"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); + attachmentData = updatedExample.attachments?.["attachment.bar"].presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.bar"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); + + const exampleUpdate4: ExampleUpdateWithAttachments = { + id: exampleId, + metadata: { foo: "bar" }, + attachments: { + test_file2: ["image/png", fs.readFileSync(pathname)], + }, + }; + + await client.updateExamplesMultipart(dataset.id, [exampleUpdate4]); + updatedExample = await client.readExample(exampleId); + expect(updatedExample.metadata).toEqual({ foo: "bar" }); + expect(Object.keys(updatedExample.attachments ?? {})).toEqual([ + "attachment.test_file2", + ]); + attachmentData = updatedExample.attachments?.["attachment.test_file2"] + .presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.test_file2"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); + + const exampleUpdate5: ExampleUpdateWithAttachments = { + id: exampleId, + split: ["foo", "bar"], + attachments: { + test_file: ["image/png", fs.readFileSync(pathname)], + }, + }; + + await client.updateExamplesMultipart(dataset.id, [exampleUpdate5]); + + updatedExample = await client.readExample(exampleId); + expect(updatedExample.metadata).toEqual({ + foo: "bar", + dataset_split: ["foo", "bar"], + }); + expect(Object.keys(updatedExample.attachments ?? {})).toEqual([ + "attachment.test_file", + ]); + attachmentData = updatedExample.attachments?.["attachment.test_file"] + .presigned_url + ? new Uint8Array( + (await fetch( + updatedExample.attachments?.["attachment.test_file"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + expect(attachmentData).toEqual(new Uint8Array(fs.readFileSync(pathname))); + + // Clean up + await client.deleteDataset({ datasetName }); +}); diff --git a/js/src/tests/evaluate_attachments.int.test.ts b/js/src/tests/evaluate_attachments.int.test.ts new file mode 100644 index 000000000..9a78a3f9a --- /dev/null +++ b/js/src/tests/evaluate_attachments.int.test.ts @@ -0,0 +1,465 @@ +import { evaluate, TargetConfigT } from "../evaluation/_runner.js"; +import { ExampleUploadWithAttachments } from "../schemas.js"; +import { Client } from "../index.js"; +import { v4 as uuidv4 } from "uuid"; +import { RunnableLambda } from "@langchain/core/runnables"; + +function arraysEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +test("evaluate can handle examples with attachments", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + // Define target function that processes attachments + const targetFunc = async ( + _inputs: Record, + config?: TargetConfigT + ) => { + // Verify we receive the attachment data + if (!config?.attachments?.["attachment.image"]) { + throw new Error("Image attachment not found"); + } + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = config?.attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch( + config?.attachments?.["attachment.image"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { answer: "test image" }; + }; + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // TODO: Uncomment when attachments are traced correctly + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(1); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult = evalResults.find((r) => r.key === "key"); + expect(evalResult?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("evaluate with attachments not in target function", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + // Define target function that processes attachments + const targetFunc = async (_inputs: Record) => { + return { answer: "test image" }; + }; + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // TODO: Uncomment when attachments are traced correctly + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(1); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult = evalResults.find((r) => r.key === "key"); + expect(evalResult?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("multiple evaluators with attachments", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + // Define target function that processes attachments + const targetFunc = async ( + _inputs: Record, + config?: TargetConfigT + ) => { + // Verify we receive the attachment data + if (!config?.attachments?.["attachment.image"]) { + throw new Error("Image attachment not found"); + } + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = config?.attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch( + config?.attachments?.["attachment.image"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { answer: "test image" }; + }; + + const customEvaluatorOne = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key1", + score: 1, + }; + }; + + const customEvaluatorTwo = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key2", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluatorOne, customEvaluatorTwo], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // TODO: Uncomment when attachments are traced correctly + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(2); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult1 = evalResults.find((r) => r.key === "key1"); + expect(evalResult1?.score).toBe(1); + const evalResult2 = evalResults.find((r) => r.key === "key2"); + expect(evalResult2?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("evaluate with attachments runnable target function", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + const myFunction = async (_input: any, config?: any) => { + if (!config?.attachments?.["attachment.image"]) { + throw new Error("Image attachment not found"); + } + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = config?.attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch( + config?.attachments?.["attachment.image"].presigned_url + ).then((res) => res.arrayBuffer())) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { answer: "test image" }; + }; + + // Define target function that processes attachments + const targetFunc = RunnableLambda.from(myFunction); + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeDefined(); + expect(attachments?.["attachment.image"]).toBeDefined(); + const expectedData = new Uint8Array( + Buffer.from("fake image data for testing") + ); + const attachmentData: Uint8Array | undefined = attachments?.[ + "attachment.image" + ].presigned_url + ? new Uint8Array( + (await fetch(attachments?.["attachment.image"].presigned_url).then( + (res) => res.arrayBuffer() + )) as ArrayBuffer + ) + : undefined; + if (!arraysEqual(attachmentData ?? new Uint8Array(), expectedData)) { + throw new Error("Image data does not match expected data"); + } + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + includeAttachments: true, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + for (const result of evalRes.results) { + // TODO: Uncomment when attachments are traced correctly + // expect(result.run.attachments).toBeDefined(); + expect(result.run).toBeDefined(); + expect(result.example).toBeDefined(); + expect(result.evaluationResults).toBeDefined(); + + // Verify evaluator results + const evalResults = result.evaluationResults.results; + expect(evalResults).toHaveLength(1); // Should have attachment_presence and attachment_processing results + + // Check that attachments were properly processed + const evalResult = evalResults.find((r) => r.key === "key"); + expect(evalResult?.score).toBe(1); + } + + // Cleanup + await client.deleteDataset({ datasetName }); +}); + +test("attachments don't appear without includeAttachments", async () => { + const client = new Client(); + const datasetName = `test_dataset_attachments_${uuidv4()}`; + const dataset = await client.createDataset(datasetName); + + // Create examples with attachments + const example: ExampleUploadWithAttachments = { + inputs: { question: "What is shown in the image?" }, + outputs: { answer: "test image" }, + attachments: { + image: ["image/png", Buffer.from("fake image data for testing")], + }, + }; + + await client.uploadExamplesMultipart(dataset.id, [example]); + + const myFunction = async (_input: any, config?: any) => { + if (config?.attachments) { + throw new Error("Attachments should not exist!"); + } + return { answer: "test image" }; + }; + + // Define target function that processes attachments + const targetFunc = RunnableLambda.from(myFunction); + + const customEvaluator = async ({ attachments }: { attachments?: any }) => { + expect(attachments).toBeUndefined(); + return { + key: "key", + score: 1, + }; + }; + + // Run evaluation + const evalRes = await evaluate(targetFunc, { + data: datasetName, + description: "Testing attachment handling in evaluation", + client: client, + evaluators: [customEvaluator], + numRepetitions: 2, + }); + + // Verify results + expect(evalRes.results).toHaveLength(2); + + // Cleanup + await client.deleteDataset({ datasetName }); +});