Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

js[minor]: Add clonePublicDataset and listSharedExamples #937

Merged
merged 7 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,4 @@
},
"./package.json": "./package.json"
}
}
}
156 changes: 156 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1289,7 +1289,7 @@
treeFilter?: string;
isRoot?: boolean;
dataSourceType?: string;
}): Promise<any> {

Check warning on line 1292 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
let projectIds_ = projectIds || [];
if (projectNames) {
projectIds_ = [
Expand Down Expand Up @@ -1528,6 +1528,61 @@
return dataset as Dataset;
}

/**
* Get shared examples.
*
* @param {string} shareToken The share token to get examples for. A share token is the UUID (or LangSmith URL, including UUID) generated when explicitly marking an example as public.
* @param {Object} [options] Additional options for listing the examples.
* @param {string[] | undefined} [options.exampleIds] A list of example IDs to filter by.
* @returns {Promise<Example[]>} The shared examples.
*/
public async listSharedExamples(
shareToken: string,
options?: { exampleIds?: string[] }
): Promise<Example[]> {
const params: Record<string, string | string[]> = {};
if (options?.exampleIds) {
params.id = options.exampleIds;
}

const urlParams = new URLSearchParams();
Object.entries(params).forEach(([key, value]) => {
if (Array.isArray(value)) {
value.forEach((v) => urlParams.append(key, v));
} else {
urlParams.append(key, value);
}
});

const response = await this.caller.call(
fetch,
`${this.apiUrl}/public/${shareToken}/examples?${urlParams.toString()}`,
{
method: "GET",
headers: this.headers,
signal: AbortSignal.timeout(this.timeout_ms),
...this.fetchOptions,
}
);
const result = await response.json();
if (!response.ok) {
if ("detail" in result) {
throw new Error(
`Failed to list shared examples.\nStatus: ${
response.status
}\nMessage: ${result.detail.join("\n")}`
);
}
throw new Error(
`Failed to list shared examples: ${response.status} ${response.statusText}`
bracesproul marked this conversation as resolved.
Show resolved Hide resolved
);
}
return result.map((example: any) => ({

Check warning on line 1580 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
...example,
_hostUrl: this.getHostUrl(),
}));
}

public async createProject({
projectName,
description = null,
Expand Down Expand Up @@ -2681,7 +2736,7 @@
}

const feedbackResult = await evaluator.evaluateRun(run_, referenceExample);
const [_, feedbacks] = await this._logEvaluationFeedback(

Check warning on line 2739 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
feedbackResult,
run_,
sourceInfo
Expand Down Expand Up @@ -3015,7 +3070,7 @@
async _logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3073 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<[results: EvaluationResult[], feedbacks: Feedback[]]> {
const evalResults: Array<EvaluationResult> =
this._selectEvalResults(evaluatorResponse);
Expand Down Expand Up @@ -3054,7 +3109,7 @@
public async logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3112 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<EvaluationResult[]> {
const [results] = await this._logEvaluationFeedback(
evaluatorResponse,
Expand Down Expand Up @@ -3120,7 +3175,7 @@
promptIdentifier: string,
like: boolean
): Promise<LikePromptResponse> {
const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3178 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
const response = await this.caller.call(
fetch,
`${this.apiUrl}/likes/${owner}/${promptName}`,
Expand Down Expand Up @@ -3232,7 +3287,7 @@
}

public async getPrompt(promptIdentifier: string): Promise<Prompt | null> {
const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3290 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
const response = await this.caller.call(
fetch,
`${this.apiUrl}/repos/${owner}/${promptName}`,
Expand Down Expand Up @@ -3281,7 +3336,7 @@
);
}

const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3339 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
if (!(await this._currentTenantIsOwner(owner))) {
throw await this._ownerConflictError("create a prompt", owner);
}
Expand Down Expand Up @@ -3314,7 +3369,7 @@

public async createCommit(
promptIdentifier: string,
object: any,

Check warning on line 3372 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
options?: {
parentCommitHash?: string;
}
Expand All @@ -3323,7 +3378,7 @@
throw new Error("Prompt does not exist, you must create it first.");
}

const [owner, promptName, _] = parsePromptIdentifier(promptIdentifier);

Check warning on line 3381 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'_' is assigned a value but never used
const resolvedParentCommitHash =
options?.parentCommitHash === "latest" || !options?.parentCommitHash
? await this._getLatestCommitHash(`${owner}/${promptName}`)
Expand Down Expand Up @@ -3561,4 +3616,105 @@
});
return url;
}

/**
* Clone a public dataset to your own langsmith tenant.
* This operation is idempotent. If you already have a dataset with the given name,
* this function will do nothing.

* @param {string} tokenOrUrl The token of the public dataset to clone.
* @param {Object} [options] Additional options for cloning the dataset.
* @param {string} [options.sourceApiUrl] The URL of the langsmith server where the data is hosted. Defaults to the API URL of your current client.
* @param {string} [options.datasetName] The name of the dataset to create in your tenant. Defaults to the name of the public dataset.
* @returns {Promise<void>}
*/
async clonePublicDataset(
tokenOrUrl: string,
options: {
sourceApiUrl?: string;
datasetName?: string;
} = {}
): Promise<void> {
const { sourceApiUrl = this.apiUrl, datasetName } = options;
const [parsedApiUrl, tokenUuid] = this.parseTokenOrUrl(
tokenOrUrl,
sourceApiUrl
);
const sourceClient = new Client({
apiUrl: parsedApiUrl,
// Placeholder API key not needed anymore in most cases, but
// some private deployments may have API key-based rate limiting
// that would cause this to fail if we provide no value.
apiKey: "placeholder",
});

const ds = await sourceClient.readSharedDataset(tokenUuid);
const finalDatasetName = datasetName || ds.name;

try {
if (await this.hasDataset({ datasetId: finalDatasetName })) {
console.log(
`Dataset ${finalDatasetName} already exists in your tenant. Skipping.`
);
return;
}
} catch (_) {
// `.hasDataset` will throw an error if the dataset does not exist.
// no-op in that case
}

// Fetch examples first, then create the dataset
const examples = await sourceClient.listSharedExamples(tokenUuid);
const dataset = await this.createDataset(finalDatasetName, {
description: ds.description,
dataType: ds.data_type || "kv",
inputsSchema: ds.inputs_schema_definition ?? undefined,
outputsSchema: ds.outputs_schema_definition ?? undefined,
});
try {
await this.createExamples({
inputs: examples.map((e) => e.inputs),
outputs: examples.flatMap((e) => (e.outputs ? [e.outputs] : [])),
datasetId: dataset.id,
});
} catch (e) {
console.error(
`An error occurred while creating dataset ${finalDatasetName}. ` +
"You should delete it manually."
);
throw e;
}
}

private parseTokenOrUrl(
urlOrToken: string,
apiUrl: string,
numParts = 2,
kind = "dataset"
): [string, string] {
// Try parsing as UUID
try {
assertUuid(urlOrToken); // Will throw if it's not a UUID.
return [apiUrl, urlOrToken];
} catch (_) {
// no-op if it's not a uuid
}

// Parse as URL
try {
const parsedUrl = new URL(urlOrToken);
const pathParts = parsedUrl.pathname
.split("/")
.filter((part) => part !== "");

if (pathParts.length >= numParts) {
const tokenUuid = pathParts[pathParts.length - numParts];
return [apiUrl, tokenUuid];
} else {
throw new Error(`Invalid public ${kind} URL: ${urlOrToken}`);
}
} catch (error) {
throw new Error(`Invalid public ${kind} URL or token: ${urlOrToken}`);
}
}
}
2 changes: 2 additions & 0 deletions js/src/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ export interface BaseDataset {
description: string;
tenant_id: string;
data_type?: DataType;
inputs_schema_definition?: KVMap;
outputs_schema_definition?: KVMap;
}

export interface Dataset extends BaseDataset {
Expand Down
41 changes: 40 additions & 1 deletion js/src/tests/client.int.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Dataset, Run, TracerSession } from "../schemas.js";
import { Dataset, Example, Run, TracerSession } from "../schemas.js";
import {
FunctionMessage,
HumanMessage,
Expand Down Expand Up @@ -1074,3 +1074,42 @@ test("Test pull prompt include model", async () => {

await client.deletePrompt(promptName);
});

test("list shared examples can list shared examples", async () => {
const client = new Client();
const multiverseMathPublicDatasetShareToken =
"620596ee-570b-4d2b-8c8f-f828adbe5242";
const sharedExamples = await client.listSharedExamples(
multiverseMathPublicDatasetShareToken
);
expect(sharedExamples.length).toBeGreaterThan(0);
});

test("clonePublicDataset method can clone a dataset", async () => {
const client = new Client();
const datasetName = "multiverse_math_public_testing";
const multiverseMathPublicDatasetURL =
"https://smith.langchain.com/public/620596ee-570b-4d2b-8c8f-f828adbe5242/d";

try {
await client.clonePublicDataset(multiverseMathPublicDatasetURL, {
datasetName,
});

const clonedDataset = await client.hasDataset({ datasetName });
expect(clonedDataset).toBe(true);

const examples: Example[] = [];
for await (const ex of client.listExamples({ datasetName })) {
examples.push(ex);
}
expect(examples.length).toBeGreaterThan(0);
} finally {
try {
// Attempt to remove the newly created dataset if successful.
await client.deleteDataset({ datasetName });
} catch (_) {
// no-op if failure
}
}
});
Loading