From ae5e4c5bf67b65f8b745f7877965b7bdd5f651d9 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Fri, 2 Aug 2024 14:50:07 -0700 Subject: [PATCH 1/3] Inline ml-distance functions due to type errors, fix type errors --- deno.json | 1 - .../docs/integrations/vectorstores/memory.mdx | 18 +- environment_tests/docker-compose.yml | 2 + examples/package.json | 1 - .../vector_stores/memory_custom_similarity.ts | 13 - langchain-core/package.json | 1 - .../src/language_models/chat_models.ts | 22 +- langchain-core/src/language_models/llms.ts | 24 +- langchain-core/src/utils/math.ts | 13 +- .../src/utils/ml-distance-euclidean/LICENSE | 21 ++ .../utils/ml-distance-euclidean/euclidean.ts | 11 + langchain-core/src/utils/ml-distance/LICENSE | 21 ++ .../src/utils/ml-distance/distances.ts | 14 + .../src/utils/ml-distance/similarities.ts | 17 ++ langchain-core/src/utils/testing/index.ts | 8 +- langchain/package.json | 1 - .../src/evaluation/embedding_distance/base.ts | 12 +- .../src/util/ml-distance-euclidean/LICENSE | 21 ++ .../util/ml-distance-euclidean/euclidean.ts | 11 + langchain/src/util/ml-distance/LICENSE | 21 ++ langchain/src/util/ml-distance/distances.ts | 49 ++++ .../src/util/ml-distance/similarities.ts | 17 ++ langchain/src/vectorstores/memory.ts | 8 +- .../src/vectorstores/tests/memory.test.ts | 6 +- libs/langchain-baidu-qianfan/package.json | 1 - yarn.lock | 246 ------------------ 26 files changed, 261 insertions(+), 319 deletions(-) delete mode 100644 examples/src/indexes/vector_stores/memory_custom_similarity.ts create mode 100644 langchain-core/src/utils/ml-distance-euclidean/LICENSE create mode 100644 langchain-core/src/utils/ml-distance-euclidean/euclidean.ts create mode 100644 langchain-core/src/utils/ml-distance/LICENSE create mode 100644 langchain-core/src/utils/ml-distance/distances.ts create mode 100644 langchain-core/src/utils/ml-distance/similarities.ts create mode 100644 langchain/src/util/ml-distance-euclidean/LICENSE create mode 100644 langchain/src/util/ml-distance-euclidean/euclidean.ts create mode 100644 langchain/src/util/ml-distance/LICENSE create mode 100644 langchain/src/util/ml-distance/distances.ts create mode 100644 langchain/src/util/ml-distance/similarities.ts diff --git a/deno.json b/deno.json index dbb94a073424..367aeecd135f 100644 --- a/deno.json +++ b/deno.json @@ -22,7 +22,6 @@ "zod": "npm:/zod", "zod-to-json-schema": "npm:/zod-to-json-schema", "node-llama-cpp": "npm:/node-llama-cpp", - "ml-distance": "npm:/ml-distance", "pdf-parse": "npm:/pdf-parse", "peggy": "npm:/peggy", "readline": "https://deno.land/x/readline@v1.1.0/mod.ts", diff --git a/docs/core_docs/docs/integrations/vectorstores/memory.mdx b/docs/core_docs/docs/integrations/vectorstores/memory.mdx index fc133e3feeb6..df34fb92a72c 100644 --- a/docs/core_docs/docs/integrations/vectorstores/memory.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/memory.mdx @@ -34,6 +34,18 @@ import ExampleLoader from "@examples/indexes/vector_stores/memory_fromdocs.ts"; ### Use a custom similarity metric -import ExampleCustom from "@examples/indexes/vector_stores/memory_custom_similarity.ts"; - -{ExampleCustom} +```ts +import { MemoryVectorStore } from "langchain/vectorstores/memory"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { similarity } from "ml-distance"; + +const vectorStore = await MemoryVectorStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [{ id: 2 }, { id: 1 }, { id: 3 }], + new OpenAIEmbeddings(), + { similarity: similarity.pearson } +); + +const resultOne = await vectorStore.similaritySearch("hello world", 1); +console.log(resultOne); +``` diff --git a/environment_tests/docker-compose.yml b/environment_tests/docker-compose.yml index aeaed8bd108b..fec57cef9a16 100644 --- a/environment_tests/docker-compose.yml +++ b/environment_tests/docker-compose.yml @@ -152,6 +152,8 @@ services: condition: service_completed_successfully test-exports-esm: condition: service_completed_successfully + test-exports-tsc: + condition: service_completed_successfully test-exports-cjs: condition: service_completed_successfully test-exports-cf: diff --git a/examples/package.json b/examples/package.json index 5be71f5be945..4c97b78e92c5 100644 --- a/examples/package.json +++ b/examples/package.json @@ -91,7 +91,6 @@ "js-yaml": "^4.1.0", "langchain": "workspace:*", "langsmith": "^0.1.30", - "ml-distance": "^4.0.0", "mongodb": "^6.3.0", "pg": "^8.11.0", "pickleparser": "^0.2.1", diff --git a/examples/src/indexes/vector_stores/memory_custom_similarity.ts b/examples/src/indexes/vector_stores/memory_custom_similarity.ts deleted file mode 100644 index 6d7453f0e356..000000000000 --- a/examples/src/indexes/vector_stores/memory_custom_similarity.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { similarity } from "ml-distance"; - -const vectorStore = await MemoryVectorStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [{ id: 2 }, { id: 1 }, { id: 3 }], - new OpenAIEmbeddings(), - { similarity: similarity.pearson } -); - -const resultOne = await vectorStore.similaritySearch("hello world", 1); -console.log(resultOne); diff --git a/langchain-core/package.json b/langchain-core/package.json index 8b4153454f47..2301a805215d 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -46,7 +46,6 @@ "decamelize": "1.2.0", "js-tiktoken": "^1.0.12", "langsmith": "~0.1.39", - "ml-distance": "^4.0.0", "mustache": "^4.2.0", "p-queue": "^6.6.2", "p-retry": "4", diff --git a/langchain-core/src/language_models/chat_models.ts b/langchain-core/src/language_models/chat_models.ts index 30256073c33a..89eca0b0329b 100644 --- a/langchain-core/src/language_models/chat_models.ts +++ b/langchain-core/src/language_models/chat_models.ts @@ -123,17 +123,6 @@ export type LangSmithParams = { ls_stop?: Array; }; -interface ChatModelGenerateCachedParameters< - T extends BaseChatModel, - CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions -> { - messages: BaseMessageLike[][]; - cache: BaseCache; - llmStringKey: string; - parsedOptions: T["ParsedCallOptions"]; - handledOptions: RunnableConfig; -} - /** * Base class for chat models. It extends the BaseLanguageModel class and * provides methods for generating chat based on input messages. @@ -449,9 +438,14 @@ export abstract class BaseChatModel< llmStringKey, parsedOptions, handledOptions, - }: ChatModelGenerateCachedParameters): Promise< - LLMResult & { missingPromptIndices: number[] } - > { + }: { + messages: BaseMessageLike[][]; + cache: BaseCache; + llmStringKey: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + parsedOptions: any; + handledOptions: RunnableConfig; + }): Promise { const baseMessages = messages.map((messageList) => messageList.map(coerceMessageLikeToMessage) ); diff --git a/langchain-core/src/language_models/llms.ts b/langchain-core/src/language_models/llms.ts index 20b0e812deb7..ef990198e1df 100644 --- a/langchain-core/src/language_models/llms.ts +++ b/langchain-core/src/language_models/llms.ts @@ -43,18 +43,6 @@ export interface BaseLLMParams extends BaseLanguageModelParams { export interface BaseLLMCallOptions extends BaseLanguageModelCallOptions {} -interface LLMGenerateCachedParameters< - T extends BaseLLM, - CallOptions extends BaseLLMCallOptions = BaseLLMCallOptions -> { - prompts: string[]; - cache: BaseCache; - llmStringKey: string; - parsedOptions: T["ParsedCallOptions"]; - handledOptions: RunnableConfig; - runId?: string; -} - /** * LLM Wrapper. Takes in a prompt (or prompts) and returns a string. */ @@ -351,9 +339,15 @@ export abstract class BaseLLM< parsedOptions, handledOptions, runId, - }: LLMGenerateCachedParameters): Promise< - LLMResult & { missingPromptIndices: number[] } - > { + }: { + prompts: string[]; + cache: BaseCache; + llmStringKey: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + parsedOptions: any; + handledOptions: RunnableConfig; + runId?: string; + }): Promise { const callbackManager_ = await CallbackManager.configure( handledOptions.callbacks, this.callbacks, diff --git a/langchain-core/src/utils/math.ts b/langchain-core/src/utils/math.ts index fe703c2d5f79..68cf439831d7 100644 --- a/langchain-core/src/utils/math.ts +++ b/langchain-core/src/utils/math.ts @@ -1,7 +1,6 @@ -import { - similarity as ml_distance_similarity, - distance as ml_distance, -} from "ml-distance"; +import { cosine } from "./ml-distance/similarities.js"; +import { innerProduct as innerProductDistance } from "./ml-distance/distances.js"; +import { euclidean } from "./ml-distance-euclidean/euclidean.js"; type VectorFunction = (xVector: number[], yVector: number[]) => number; @@ -65,15 +64,15 @@ export function normalize(M: number[][], similarity = false): number[][] { * @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y. */ export function cosineSimilarity(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance_similarity.cosine); + return matrixFunc(X, Y, cosine); } export function innerProduct(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance.innerProduct); + return matrixFunc(X, Y, innerProductDistance); } export function euclideanDistance(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance.euclidean); + return matrixFunc(X, Y, euclidean); } /** diff --git a/langchain-core/src/utils/ml-distance-euclidean/LICENSE b/langchain-core/src/utils/ml-distance-euclidean/LICENSE new file mode 100644 index 000000000000..fa5c2fc3349d --- /dev/null +++ b/langchain-core/src/utils/ml-distance-euclidean/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/langchain-core/src/utils/ml-distance-euclidean/euclidean.ts b/langchain-core/src/utils/ml-distance-euclidean/euclidean.ts new file mode 100644 index 000000000000..800638c60f0a --- /dev/null +++ b/langchain-core/src/utils/ml-distance-euclidean/euclidean.ts @@ -0,0 +1,11 @@ +export function squaredEuclidean(p: number[], q: number[]) { + let d = 0; + for (let i = 0; i < p.length; i++) { + d += (p[i] - q[i]) * (p[i] - q[i]); + } + return d; +} + +export function euclidean(p: number[], q: number[]) { + return Math.sqrt(squaredEuclidean(p, q)); +} diff --git a/langchain-core/src/utils/ml-distance/LICENSE b/langchain-core/src/utils/ml-distance/LICENSE new file mode 100644 index 000000000000..6b7d9fe61f96 --- /dev/null +++ b/langchain-core/src/utils/ml-distance/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/langchain-core/src/utils/ml-distance/distances.ts b/langchain-core/src/utils/ml-distance/distances.ts new file mode 100644 index 000000000000..98df550df9f5 --- /dev/null +++ b/langchain-core/src/utils/ml-distance/distances.ts @@ -0,0 +1,14 @@ +/** + *Returns the Inner Product similarity between vectors a and b + * @link [Inner Product Similarity algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf) + * @param a - first vector + * @param b - second vector + * + */ +export function innerProduct(a: number[], b: number[]): number { + let ans = 0; + for (let i = 0; i < a.length; i++) { + ans += a[i] * b[i]; + } + return ans; +} diff --git a/langchain-core/src/utils/ml-distance/similarities.ts b/langchain-core/src/utils/ml-distance/similarities.ts new file mode 100644 index 000000000000..9ea574e14a4f --- /dev/null +++ b/langchain-core/src/utils/ml-distance/similarities.ts @@ -0,0 +1,17 @@ +/** + * Returns the average of cosine distances between vectors a and b + * @param a - first vector + * @param b - second vector + * + */ +export function cosine(a: number[], b: number[]): number { + let p = 0; + let p2 = 0; + let q2 = 0; + for (let i = 0; i < a.length; i++) { + p += a[i] * b[i]; + p2 += a[i] * a[i]; + q2 += b[i] * b[i]; + } + return p / (Math.sqrt(p2) * Math.sqrt(q2)); +} diff --git a/langchain-core/src/utils/testing/index.ts b/langchain-core/src/utils/testing/index.ts index 685fae8d3749..65d197f6c23e 100644 --- a/langchain-core/src/utils/testing/index.ts +++ b/langchain-core/src/utils/testing/index.ts @@ -2,7 +2,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-unused-vars */ -import { similarity as ml_distance_similarity } from "ml-distance"; import { z } from "zod"; import { BaseCallbackConfig, @@ -46,6 +45,7 @@ import { StructuredOutputMethodOptions, } from "../../language_models/base.js"; import { VectorStore } from "../../vectorstores.js"; +import { cosine } from "../ml-distance/similarities.js"; /** * Parser for comma-separated values. It splits the input text by commas @@ -750,7 +750,7 @@ interface MemoryVector { * function. */ export interface FakeVectorStoreArgs { - similarity?: typeof ml_distance_similarity.cosine; + similarity?: typeof cosine; } /** @@ -763,7 +763,7 @@ export class FakeVectorStore extends VectorStore { memoryVectors: MemoryVector[] = []; - similarity: typeof ml_distance_similarity.cosine; + similarity: typeof cosine; _vectorstoreType(): string { return "memory"; @@ -775,7 +775,7 @@ export class FakeVectorStore extends VectorStore { ) { super(embeddings, rest); - this.similarity = similarity ?? ml_distance_similarity.cosine; + this.similarity = similarity ?? cosine; } /** diff --git a/langchain/package.json b/langchain/package.json index 641cb9980e49..90b14e2b4ed1 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -942,7 +942,6 @@ "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", "langsmith": "~0.1.40", - "ml-distance": "^4.0.0", "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^10.0.0", diff --git a/langchain/src/evaluation/embedding_distance/base.ts b/langchain/src/evaluation/embedding_distance/base.ts index ddecd7067c0f..a3d3b035032e 100644 --- a/langchain/src/evaluation/embedding_distance/base.ts +++ b/langchain/src/evaluation/embedding_distance/base.ts @@ -1,4 +1,3 @@ -import { distance, similarity } from "ml-distance"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { ChainValues } from "@langchain/core/utils/types"; import { OpenAIEmbeddings } from "@langchain/openai"; @@ -13,6 +12,9 @@ import { StringEvaluator, StringEvaluatorArgs, } from "../base.js"; +import { cosine } from "../../util/ml-distance/similarities.js"; +import { chebyshev, manhattan } from "../../util/ml-distance/distances.js"; +import { euclidean } from "../../util/ml-distance-euclidean/euclidean.js"; /** * @@ -58,10 +60,10 @@ export function getDistanceCalculationFunction( ): VectorFunction { const distanceFunctions: { [key in EmbeddingDistanceType]: VectorFunction } = { - cosine: (X: number[], Y: number[]) => 1.0 - similarity.cosine(X, Y), - euclidean: distance.euclidean, - manhattan: distance.manhattan, - chebyshev: distance.chebyshev, + cosine: (X: number[], Y: number[]) => 1.0 - cosine(X, Y), + euclidean: euclidean, + manhattan: manhattan, + chebyshev: chebyshev, }; return distanceFunctions[distanceType]; diff --git a/langchain/src/util/ml-distance-euclidean/LICENSE b/langchain/src/util/ml-distance-euclidean/LICENSE new file mode 100644 index 000000000000..fa5c2fc3349d --- /dev/null +++ b/langchain/src/util/ml-distance-euclidean/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/langchain/src/util/ml-distance-euclidean/euclidean.ts b/langchain/src/util/ml-distance-euclidean/euclidean.ts new file mode 100644 index 000000000000..800638c60f0a --- /dev/null +++ b/langchain/src/util/ml-distance-euclidean/euclidean.ts @@ -0,0 +1,11 @@ +export function squaredEuclidean(p: number[], q: number[]) { + let d = 0; + for (let i = 0; i < p.length; i++) { + d += (p[i] - q[i]) * (p[i] - q[i]); + } + return d; +} + +export function euclidean(p: number[], q: number[]) { + return Math.sqrt(squaredEuclidean(p, q)); +} diff --git a/langchain/src/util/ml-distance/LICENSE b/langchain/src/util/ml-distance/LICENSE new file mode 100644 index 000000000000..6b7d9fe61f96 --- /dev/null +++ b/langchain/src/util/ml-distance/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 ml.js + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/langchain/src/util/ml-distance/distances.ts b/langchain/src/util/ml-distance/distances.ts new file mode 100644 index 000000000000..2f717c854d40 --- /dev/null +++ b/langchain/src/util/ml-distance/distances.ts @@ -0,0 +1,49 @@ +/** + *Returns the Inner Product similarity between vectors a and b + * @link [Inner Product Similarity algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf) + * @param a - first vector + * @param b - second vector + * + */ +export function innerProduct(a: number[], b: number[]): number { + let ans = 0; + for (let i = 0; i < a.length; i++) { + ans += a[i] * b[i]; + } + return ans; +} + +/** + *Returns the Chebyshev distance between vectors a and b + * @link [Chebyshev algorithm](https://en.wikipedia.org/wiki/Chebyshev_distance) + * @param a - first vector + * @param b - second vector + * + */ +export function chebyshev(a: number[], b: number[]): number { + let max = 0; + let aux = 0; + for (let i = 0; i < a.length; i++) { + aux = Math.abs(a[i] - b[i]); + if (max < aux) { + max = aux; + } + } + return max; +} + +/** + *Returns the Manhattan distance between vectors a and b + * @link [Manhattan algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf) + * @param a - first vector + * @param b - second vector + * + */ + +export function manhattan(a: number[], b: number[]): number { + let d = 0; + for (let i = 0; i < a.length; i++) { + d += Math.abs(a[i] - b[i]); + } + return d; +} diff --git a/langchain/src/util/ml-distance/similarities.ts b/langchain/src/util/ml-distance/similarities.ts new file mode 100644 index 000000000000..9ea574e14a4f --- /dev/null +++ b/langchain/src/util/ml-distance/similarities.ts @@ -0,0 +1,17 @@ +/** + * Returns the average of cosine distances between vectors a and b + * @param a - first vector + * @param b - second vector + * + */ +export function cosine(a: number[], b: number[]): number { + let p = 0; + let p2 = 0; + let q2 = 0; + for (let i = 0; i < a.length; i++) { + p += a[i] * b[i]; + p2 += a[i] * a[i]; + q2 += b[i] * b[i]; + } + return p / (Math.sqrt(p2) * Math.sqrt(q2)); +} diff --git a/langchain/src/vectorstores/memory.ts b/langchain/src/vectorstores/memory.ts index a584aa5b4333..3d8ca92ba972 100644 --- a/langchain/src/vectorstores/memory.ts +++ b/langchain/src/vectorstores/memory.ts @@ -1,4 +1,4 @@ -import { similarity as ml_distance_similarity } from "ml-distance"; +import { cosine } from "../util/ml-distance/similarities.js"; import { VectorStore } from "@langchain/core/vectorstores"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { Document } from "@langchain/core/documents"; @@ -21,7 +21,7 @@ interface MemoryVector { * function. */ export interface MemoryVectorStoreArgs { - similarity?: typeof ml_distance_similarity.cosine; + similarity?: typeof cosine; } /** @@ -34,7 +34,7 @@ export class MemoryVectorStore extends VectorStore { memoryVectors: MemoryVector[] = []; - similarity: typeof ml_distance_similarity.cosine; + similarity: typeof cosine; _vectorstoreType(): string { return "memory"; @@ -46,7 +46,7 @@ export class MemoryVectorStore extends VectorStore { ) { super(embeddings, rest); - this.similarity = similarity ?? ml_distance_similarity.cosine; + this.similarity = similarity ?? cosine; } /** diff --git a/langchain/src/vectorstores/tests/memory.test.ts b/langchain/src/vectorstores/tests/memory.test.ts index d1f43bb4740e..a5134e0363c3 100644 --- a/langchain/src/vectorstores/tests/memory.test.ts +++ b/langchain/src/vectorstores/tests/memory.test.ts @@ -2,8 +2,8 @@ import { test, expect } from "@jest/globals"; import { Document, DocumentInterface } from "@langchain/core/documents"; import { SyntheticEmbeddings } from "@langchain/core/utils/testing"; -import { similarity } from "ml-distance"; import { MemoryVectorStore } from "../memory.js"; +import { cosine } from "../../util/ml-distance/similarities.js"; test("MemoryVectorStore with external ids", async () => { const embeddings = new SyntheticEmbeddings({ @@ -75,10 +75,10 @@ test("MemoryVectorStore with custom similarity", async () => { let similarityCalled = false; let similarityCalledCount = 0; const store = new MemoryVectorStore(embeddings, { - similarity: (a: number, b: number) => { + similarity: (a: number[], b: number[]) => { similarityCalledCount += 1; similarityCalled = true; - return similarity.cosine(a, b); + return cosine(a, b); }, }); diff --git a/libs/langchain-baidu-qianfan/package.json b/libs/langchain-baidu-qianfan/package.json index a78ca4b7674c..9f18d20ba845 100644 --- a/libs/langchain-baidu-qianfan/package.json +++ b/libs/langchain-baidu-qianfan/package.json @@ -57,7 +57,6 @@ "eslint-plugin-prettier": "^4.2.1", "jest": "^29.5.0", "jest-environment-node": "^29.6.4", - "langchain": "0.2.5", "prettier": "^2.8.3", "release-it": "^15.10.1", "rollup": "^4.5.2", diff --git a/yarn.lock b/yarn.lock index 905737d53470..131b835c6732 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10994,7 +10994,6 @@ __metadata: eslint-plugin-prettier: ^4.2.1 jest: ^29.5.0 jest-environment-node: ^29.6.4 - langchain: 0.2.5 prettier: ^2.8.3 release-it: ^15.10.1 rollup: ^4.5.2 @@ -11654,7 +11653,6 @@ __metadata: jest-environment-node: ^29.6.4 js-tiktoken: ^1.0.12 langsmith: ~0.1.39 - ml-distance: ^4.0.0 ml-matrix: ^6.10.4 mustache: ^4.2.0 p-queue: ^6.6.2 @@ -21134,13 +21132,6 @@ __metadata: languageName: node linkType: hard -"binary-search@npm:^1.3.5": - version: 1.3.6 - resolution: "binary-search@npm:1.3.6" - checksum: 2e6b3459a9c1ba1bd674a6a855a5ef7505f70707422244430e3510e989c0df6074a49fe60784a98b93b51545c9bcace1db1defee06ff861b124c036a2f2836bf - languageName: node - linkType: hard - "bindings@npm:^1.5.0": version: 1.5.0 resolution: "bindings@npm:1.5.0" @@ -26304,7 +26295,6 @@ __metadata: js-yaml: ^4.1.0 langchain: "workspace:*" langsmith: ^0.1.30 - ml-distance: ^4.0.0 mongodb: ^6.3.0 pg: ^8.11.0 pickleparser: ^0.2.1 @@ -31293,181 +31283,6 @@ __metadata: languageName: node linkType: hard -"langchain@npm:0.2.5": - version: 0.2.5 - resolution: "langchain@npm:0.2.5" - dependencies: - "@langchain/core": ~0.2.0 - "@langchain/openai": ~0.1.0 - "@langchain/textsplitters": ~0.0.0 - binary-extensions: ^2.2.0 - js-tiktoken: ^1.0.12 - js-yaml: ^4.1.0 - jsonpointer: ^5.0.1 - langchainhub: ~0.0.8 - langsmith: ~0.1.30 - ml-distance: ^4.0.0 - openapi-types: ^12.1.3 - p-retry: 4 - uuid: ^9.0.0 - yaml: ^2.2.1 - zod: ^3.22.4 - zod-to-json-schema: ^3.22.3 - peerDependencies: - "@aws-sdk/client-s3": ^3.310.0 - "@aws-sdk/client-sagemaker-runtime": ^3.310.0 - "@aws-sdk/client-sfn": ^3.310.0 - "@aws-sdk/credential-provider-node": ^3.388.0 - "@azure/storage-blob": ^12.15.0 - "@browserbasehq/sdk": "*" - "@gomomento/sdk": ^1.51.1 - "@gomomento/sdk-core": ^1.51.1 - "@gomomento/sdk-web": ^1.51.1 - "@mendable/firecrawl-js": ^0.0.13 - "@notionhq/client": ^2.2.10 - "@pinecone-database/pinecone": "*" - "@supabase/supabase-js": ^2.10.0 - "@vercel/kv": ^0.2.3 - "@xata.io/client": ^0.28.0 - apify-client: ^2.7.1 - assemblyai: ^4.0.0 - axios: "*" - cheerio: ^1.0.0-rc.12 - chromadb: "*" - convex: ^1.3.1 - couchbase: ^4.3.0 - d3-dsv: ^2.0.0 - epub2: ^3.0.1 - fast-xml-parser: "*" - handlebars: ^4.7.8 - html-to-text: ^9.0.5 - ignore: ^5.2.0 - ioredis: ^5.3.2 - jsdom: "*" - mammoth: ^1.6.0 - mongodb: ">=5.2.0" - node-llama-cpp: "*" - notion-to-md: ^3.1.0 - officeparser: ^4.0.4 - pdf-parse: 1.1.1 - peggy: ^3.0.2 - playwright: ^1.32.1 - puppeteer: ^19.7.2 - pyodide: ^0.24.1 - redis: ^4.6.4 - sonix-speech-recognition: ^2.1.1 - srt-parser-2: ^1.2.3 - typeorm: ^0.3.12 - weaviate-ts-client: "*" - web-auth-library: ^1.0.3 - ws: ^8.14.2 - youtube-transcript: ^1.0.6 - youtubei.js: ^9.1.0 - peerDependenciesMeta: - "@aws-sdk/client-s3": - optional: true - "@aws-sdk/client-sagemaker-runtime": - optional: true - "@aws-sdk/client-sfn": - optional: true - "@aws-sdk/credential-provider-node": - optional: true - "@azure/storage-blob": - optional: true - "@browserbasehq/sdk": - optional: true - "@gomomento/sdk": - optional: true - "@gomomento/sdk-core": - optional: true - "@gomomento/sdk-web": - optional: true - "@mendable/firecrawl-js": - optional: true - "@notionhq/client": - optional: true - "@pinecone-database/pinecone": - optional: true - "@supabase/supabase-js": - optional: true - "@vercel/kv": - optional: true - "@xata.io/client": - optional: true - apify-client: - optional: true - assemblyai: - optional: true - axios: - optional: true - cheerio: - optional: true - chromadb: - optional: true - convex: - optional: true - couchbase: - optional: true - d3-dsv: - optional: true - epub2: - optional: true - faiss-node: - optional: true - fast-xml-parser: - optional: true - handlebars: - optional: true - html-to-text: - optional: true - ignore: - optional: true - ioredis: - optional: true - jsdom: - optional: true - mammoth: - optional: true - mongodb: - optional: true - node-llama-cpp: - optional: true - notion-to-md: - optional: true - officeparser: - optional: true - pdf-parse: - optional: true - peggy: - optional: true - playwright: - optional: true - puppeteer: - optional: true - pyodide: - optional: true - redis: - optional: true - sonix-speech-recognition: - optional: true - srt-parser-2: - optional: true - typeorm: - optional: true - weaviate-ts-client: - optional: true - web-auth-library: - optional: true - ws: - optional: true - youtube-transcript: - optional: true - youtubei.js: - optional: true - checksum: 18078968f7a788052d02cc681e1a58d85ae7d8461db039a9e53ea7f8a17057c76a186788fceed0b0d2d02be732afa519305f16f12df57e90e734159cf7518df0 - languageName: node - linkType: hard - "langchain@workspace:*, langchain@workspace:langchain, langchain@~0.2.3": version: 0.0.0-use.local resolution: "langchain@workspace:langchain" @@ -31547,7 +31362,6 @@ __metadata: jsonpointer: ^5.0.1 langsmith: ~0.1.40 mammoth: ^1.5.1 - ml-distance: ^4.0.0 mongodb: ^5.2.0 node-llama-cpp: 2.7.3 notion-to-md: ^3.1.0 @@ -31760,13 +31574,6 @@ __metadata: languageName: unknown linkType: soft -"langchainhub@npm:~0.0.8": - version: 0.0.8 - resolution: "langchainhub@npm:0.0.8" - checksum: b46316adbbd5f1971892b423e6a7e9c7681f4c44e4ac3c3b79c6beef96a28fc9582a4ee14affb617fd887f3dac8cae55368e1b7c4a41bb43f86f17c5d63031e3 - languageName: node - linkType: hard - "langchainjs@workspace:.": version: 0.0.0-use.local resolution: "langchainjs@workspace:." @@ -33228,15 +33035,6 @@ __metadata: languageName: node linkType: hard -"ml-array-mean@npm:^1.1.6": - version: 1.1.6 - resolution: "ml-array-mean@npm:1.1.6" - dependencies: - ml-array-sum: ^1.1.6 - checksum: 81999dac8bad3bf2dafb23a9bc71883879b9d55889e48d00b91dd4a2568957a6f5373632ae57324760d1e1d7d29ad45ab4ea7ae32de67ce144d57a21e36dd9c2 - languageName: node - linkType: hard - "ml-array-min@npm:^1.2.3": version: 1.2.3 resolution: "ml-array-min@npm:1.2.3" @@ -33257,33 +33055,6 @@ __metadata: languageName: node linkType: hard -"ml-array-sum@npm:^1.1.6": - version: 1.1.6 - resolution: "ml-array-sum@npm:1.1.6" - dependencies: - is-any-array: ^2.0.0 - checksum: 369dbb3681e3f8b0d0facba9fcfc981656dac49a80924859c3ed8f0a5880fb6db2d6e534f8b7b9c3cda59248152e61b27d6419d19c69539de7c3aa6aea3094eb - languageName: node - linkType: hard - -"ml-distance-euclidean@npm:^2.0.0": - version: 2.0.0 - resolution: "ml-distance-euclidean@npm:2.0.0" - checksum: e31f98a947ce6971c35d74e6d2521800f0d219efb34c78b20b5f52debd206008d52e677685c09839e6bab5d2ed233aa009314236e4e548d5fafb60f2f71e2b3e - languageName: node - linkType: hard - -"ml-distance@npm:^4.0.0": - version: 4.0.0 - resolution: "ml-distance@npm:4.0.0" - dependencies: - ml-array-mean: ^1.1.6 - ml-distance-euclidean: ^2.0.0 - ml-tree-similarity: ^1.0.0 - checksum: 37d07d52c5f1d185833336d73764e97dcdf8abe37e276fd6a3f7d536cf373e7cbfd6497f433192bd7d100b0b95518a616a8ae6df53b74c5b0d5fbfa7a971ae4d - languageName: node - linkType: hard - "ml-matrix@npm:^6.10.4": version: 6.10.4 resolution: "ml-matrix@npm:6.10.4" @@ -33294,16 +33065,6 @@ __metadata: languageName: node linkType: hard -"ml-tree-similarity@npm:^1.0.0": - version: 1.0.0 - resolution: "ml-tree-similarity@npm:1.0.0" - dependencies: - binary-search: ^1.3.5 - num-sort: ^2.0.0 - checksum: f99e217dc94acf75c089469dc3c278f388146e43c82212160b6b75daa14309902f84eb0a00c67d502fc79dc171cf15a33d392326e024b2e89881adc585d15513 - languageName: node - linkType: hard - "mnemonist@npm:0.38.3": version: 0.38.3 resolution: "mnemonist@npm:0.38.3" @@ -34087,13 +33848,6 @@ __metadata: languageName: node linkType: hard -"num-sort@npm:^2.0.0": - version: 2.1.0 - resolution: "num-sort@npm:2.1.0" - checksum: 5a80cd0456c8847f71fb80ad3c3596714cebede76de585aa4fed2b9a4fb0907631edca1f7bb31c24dbb9928b66db3d03059994cc365d2ae011b80ddddac28f6e - languageName: node - linkType: hard - "nwsapi@npm:^2.2.4": version: 2.2.7 resolution: "nwsapi@npm:2.2.7" From 82dbcd71554aeac3f21de7fecbc575ce57239b93 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Fri, 2 Aug 2024 14:55:19 -0700 Subject: [PATCH 2/3] Remove lint config --- langchain-core/.eslintrc.cjs | 1 + langchain/.eslintrc.cjs | 1 + 2 files changed, 2 insertions(+) diff --git a/langchain-core/.eslintrc.cjs b/langchain-core/.eslintrc.cjs index 39b49268e8ee..e903d44e5c71 100644 --- a/langchain-core/.eslintrc.cjs +++ b/langchain-core/.eslintrc.cjs @@ -62,6 +62,7 @@ module.exports = { "no-use-before-define": 0, "no-useless-constructor": 0, "no-return-await": 0, + "no-plusplus": 0, "consistent-return": 0, "no-else-return": 0, "func-names": 0, diff --git a/langchain/.eslintrc.cjs b/langchain/.eslintrc.cjs index 2698aa6bf883..a0dc5fadc1ee 100644 --- a/langchain/.eslintrc.cjs +++ b/langchain/.eslintrc.cjs @@ -61,6 +61,7 @@ module.exports = { "no-use-before-define": 0, "no-useless-constructor": 0, "no-return-await": 0, + "no-plusplus": 0, "consistent-return": 0, "no-else-return": 0, "func-names": 0, From 8aa3dfe8d3c658564d9af934e2571ebd273812f7 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Fri, 2 Aug 2024 15:02:43 -0700 Subject: [PATCH 3/3] Lint --- langchain/src/evaluation/embedding_distance/base.ts | 6 +++--- langchain/src/vectorstores/memory.ts | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/langchain/src/evaluation/embedding_distance/base.ts b/langchain/src/evaluation/embedding_distance/base.ts index a3d3b035032e..eed8e453bb70 100644 --- a/langchain/src/evaluation/embedding_distance/base.ts +++ b/langchain/src/evaluation/embedding_distance/base.ts @@ -61,9 +61,9 @@ export function getDistanceCalculationFunction( const distanceFunctions: { [key in EmbeddingDistanceType]: VectorFunction } = { cosine: (X: number[], Y: number[]) => 1.0 - cosine(X, Y), - euclidean: euclidean, - manhattan: manhattan, - chebyshev: chebyshev, + euclidean, + manhattan, + chebyshev, }; return distanceFunctions[distanceType]; diff --git a/langchain/src/vectorstores/memory.ts b/langchain/src/vectorstores/memory.ts index 3d8ca92ba972..6d7c03db9b4f 100644 --- a/langchain/src/vectorstores/memory.ts +++ b/langchain/src/vectorstores/memory.ts @@ -1,7 +1,7 @@ -import { cosine } from "../util/ml-distance/similarities.js"; import { VectorStore } from "@langchain/core/vectorstores"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { Document } from "@langchain/core/documents"; +import { cosine } from "../util/ml-distance/similarities.js"; /** * Interface representing a vector in memory. It includes the content