Skip to content

Commit

Permalink
Inline ml-distance functions due to type errors, fix type errors
Browse files Browse the repository at this point in the history
  • Loading branch information
jacoblee93 committed Aug 2, 2024
1 parent 4ffa212 commit ae5e4c5
Show file tree
Hide file tree
Showing 26 changed files with 261 additions and 319 deletions.
1 change: 0 additions & 1 deletion deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
"zod": "npm:/zod",
"zod-to-json-schema": "npm:/zod-to-json-schema",
"node-llama-cpp": "npm:/node-llama-cpp",
"ml-distance": "npm:/ml-distance",
"pdf-parse": "npm:/pdf-parse",
"peggy": "npm:/peggy",
"readline": "https://deno.land/x/[email protected]/mod.ts",
Expand Down
18 changes: 15 additions & 3 deletions docs/core_docs/docs/integrations/vectorstores/memory.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ import ExampleLoader from "@examples/indexes/vector_stores/memory_fromdocs.ts";

### Use a custom similarity metric

import ExampleCustom from "@examples/indexes/vector_stores/memory_custom_similarity.ts";

<CodeBlock language="typescript">{ExampleCustom}</CodeBlock>
```ts
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { OpenAIEmbeddings } from "@langchain/openai";
import { similarity } from "ml-distance";

const vectorStore = await MemoryVectorStore.fromTexts(
["Hello world", "Bye bye", "hello nice world"],
[{ id: 2 }, { id: 1 }, { id: 3 }],
new OpenAIEmbeddings(),
{ similarity: similarity.pearson }
);

const resultOne = await vectorStore.similaritySearch("hello world", 1);
console.log(resultOne);
```
2 changes: 2 additions & 0 deletions environment_tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ services:
condition: service_completed_successfully
test-exports-esm:
condition: service_completed_successfully
test-exports-tsc:
condition: service_completed_successfully
test-exports-cjs:
condition: service_completed_successfully
test-exports-cf:
Expand Down
1 change: 0 additions & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
"js-yaml": "^4.1.0",
"langchain": "workspace:*",
"langsmith": "^0.1.30",
"ml-distance": "^4.0.0",
"mongodb": "^6.3.0",
"pg": "^8.11.0",
"pickleparser": "^0.2.1",
Expand Down
13 changes: 0 additions & 13 deletions examples/src/indexes/vector_stores/memory_custom_similarity.ts

This file was deleted.

1 change: 0 additions & 1 deletion langchain-core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
"decamelize": "1.2.0",
"js-tiktoken": "^1.0.12",
"langsmith": "~0.1.39",
"ml-distance": "^4.0.0",
"mustache": "^4.2.0",
"p-queue": "^6.6.2",
"p-retry": "4",
Expand Down
22 changes: 8 additions & 14 deletions langchain-core/src/language_models/chat_models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,6 @@ export type LangSmithParams = {
ls_stop?: Array<string>;
};

interface ChatModelGenerateCachedParameters<
T extends BaseChatModel<CallOptions>,
CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions
> {
messages: BaseMessageLike[][];
cache: BaseCache<Generation[]>;
llmStringKey: string;
parsedOptions: T["ParsedCallOptions"];
handledOptions: RunnableConfig;
}

/**
* Base class for chat models. It extends the BaseLanguageModel class and
* provides methods for generating chat based on input messages.
Expand Down Expand Up @@ -449,9 +438,14 @@ export abstract class BaseChatModel<
llmStringKey,
parsedOptions,
handledOptions,
}: ChatModelGenerateCachedParameters<typeof this>): Promise<
LLMResult & { missingPromptIndices: number[] }
> {
}: {
messages: BaseMessageLike[][];
cache: BaseCache<Generation[]>;
llmStringKey: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
parsedOptions: any;
handledOptions: RunnableConfig;
}): Promise<LLMResult & { missingPromptIndices: number[] }> {
const baseMessages = messages.map((messageList) =>
messageList.map(coerceMessageLikeToMessage)
);
Expand Down
24 changes: 9 additions & 15 deletions langchain-core/src/language_models/llms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,6 @@ export interface BaseLLMParams extends BaseLanguageModelParams {

export interface BaseLLMCallOptions extends BaseLanguageModelCallOptions {}

interface LLMGenerateCachedParameters<
T extends BaseLLM<CallOptions>,
CallOptions extends BaseLLMCallOptions = BaseLLMCallOptions
> {
prompts: string[];
cache: BaseCache<Generation[]>;
llmStringKey: string;
parsedOptions: T["ParsedCallOptions"];
handledOptions: RunnableConfig;
runId?: string;
}

/**
* LLM Wrapper. Takes in a prompt (or prompts) and returns a string.
*/
Expand Down Expand Up @@ -351,9 +339,15 @@ export abstract class BaseLLM<
parsedOptions,
handledOptions,
runId,
}: LLMGenerateCachedParameters<typeof this>): Promise<
LLMResult & { missingPromptIndices: number[] }
> {
}: {
prompts: string[];
cache: BaseCache<Generation[]>;
llmStringKey: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
parsedOptions: any;
handledOptions: RunnableConfig;
runId?: string;
}): Promise<LLMResult & { missingPromptIndices: number[] }> {
const callbackManager_ = await CallbackManager.configure(
handledOptions.callbacks,
this.callbacks,
Expand Down
13 changes: 6 additions & 7 deletions langchain-core/src/utils/math.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import {
similarity as ml_distance_similarity,
distance as ml_distance,
} from "ml-distance";
import { cosine } from "./ml-distance/similarities.js";
import { innerProduct as innerProductDistance } from "./ml-distance/distances.js";
import { euclidean } from "./ml-distance-euclidean/euclidean.js";

type VectorFunction = (xVector: number[], yVector: number[]) => number;

Expand Down Expand Up @@ -65,15 +64,15 @@ export function normalize(M: number[][], similarity = false): number[][] {
* @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
*/
export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
return matrixFunc(X, Y, ml_distance_similarity.cosine);
return matrixFunc(X, Y, cosine);
}

export function innerProduct(X: number[][], Y: number[][]): number[][] {
return matrixFunc(X, Y, ml_distance.innerProduct);
return matrixFunc(X, Y, innerProductDistance);
}

export function euclideanDistance(X: number[][], Y: number[][]): number[][] {
return matrixFunc(X, Y, ml_distance.euclidean);
return matrixFunc(X, Y, euclidean);
}

/**
Expand Down
21 changes: 21 additions & 0 deletions langchain-core/src/utils/ml-distance-euclidean/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2015 ml.js

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
11 changes: 11 additions & 0 deletions langchain-core/src/utils/ml-distance-euclidean/euclidean.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
export function squaredEuclidean(p: number[], q: number[]) {
let d = 0;
for (let i = 0; i < p.length; i++) {

Check failure on line 3 in langchain-core/src/utils/ml-distance-euclidean/euclidean.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unary operator '++' used
d += (p[i] - q[i]) * (p[i] - q[i]);
}
return d;
}

export function euclidean(p: number[], q: number[]) {
return Math.sqrt(squaredEuclidean(p, q));
}
21 changes: 21 additions & 0 deletions langchain-core/src/utils/ml-distance/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2014 ml.js

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
14 changes: 14 additions & 0 deletions langchain-core/src/utils/ml-distance/distances.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
*Returns the Inner Product similarity between vectors a and b
* @link [Inner Product Similarity algorithm](https://www.naun.org/main/NAUN/ijmmas/mmmas-49.pdf)
* @param a - first vector
* @param b - second vector
*
*/
export function innerProduct(a: number[], b: number[]): number {
let ans = 0;
for (let i = 0; i < a.length; i++) {

Check failure on line 10 in langchain-core/src/utils/ml-distance/distances.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unary operator '++' used
ans += a[i] * b[i];
}
return ans;
}
17 changes: 17 additions & 0 deletions langchain-core/src/utils/ml-distance/similarities.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Returns the average of cosine distances between vectors a and b
* @param a - first vector
* @param b - second vector
*
*/
export function cosine(a: number[], b: number[]): number {
let p = 0;
let p2 = 0;
let q2 = 0;
for (let i = 0; i < a.length; i++) {

Check failure on line 11 in langchain-core/src/utils/ml-distance/similarities.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unary operator '++' used
p += a[i] * b[i];
p2 += a[i] * a[i];
q2 += b[i] * b[i];
}
return p / (Math.sqrt(p2) * Math.sqrt(q2));
}
8 changes: 4 additions & 4 deletions langchain-core/src/utils/testing/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
/* eslint-disable @typescript-eslint/no-unused-vars */

import { similarity as ml_distance_similarity } from "ml-distance";
import { z } from "zod";
import {
BaseCallbackConfig,
Expand Down Expand Up @@ -46,6 +45,7 @@ import {
StructuredOutputMethodOptions,
} from "../../language_models/base.js";
import { VectorStore } from "../../vectorstores.js";
import { cosine } from "../ml-distance/similarities.js";

/**
* Parser for comma-separated values. It splits the input text by commas
Expand Down Expand Up @@ -750,7 +750,7 @@ interface MemoryVector {
* function.
*/
export interface FakeVectorStoreArgs {
similarity?: typeof ml_distance_similarity.cosine;
similarity?: typeof cosine;
}

/**
Expand All @@ -763,7 +763,7 @@ export class FakeVectorStore extends VectorStore {

memoryVectors: MemoryVector[] = [];

similarity: typeof ml_distance_similarity.cosine;
similarity: typeof cosine;

_vectorstoreType(): string {
return "memory";
Expand All @@ -775,7 +775,7 @@ export class FakeVectorStore extends VectorStore {
) {
super(embeddings, rest);

this.similarity = similarity ?? ml_distance_similarity.cosine;
this.similarity = similarity ?? cosine;
}

/**
Expand Down
1 change: 0 additions & 1 deletion langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,6 @@
"js-yaml": "^4.1.0",
"jsonpointer": "^5.0.1",
"langsmith": "~0.1.40",
"ml-distance": "^4.0.0",
"openapi-types": "^12.1.3",
"p-retry": "4",
"uuid": "^10.0.0",
Expand Down
12 changes: 7 additions & 5 deletions langchain/src/evaluation/embedding_distance/base.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { distance, similarity } from "ml-distance";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { ChainValues } from "@langchain/core/utils/types";
import { OpenAIEmbeddings } from "@langchain/openai";
Expand All @@ -13,6 +12,9 @@ import {
StringEvaluator,
StringEvaluatorArgs,
} from "../base.js";
import { cosine } from "../../util/ml-distance/similarities.js";
import { chebyshev, manhattan } from "../../util/ml-distance/distances.js";
import { euclidean } from "../../util/ml-distance-euclidean/euclidean.js";

/**
*
Expand Down Expand Up @@ -58,10 +60,10 @@ export function getDistanceCalculationFunction(
): VectorFunction {
const distanceFunctions: { [key in EmbeddingDistanceType]: VectorFunction } =
{
cosine: (X: number[], Y: number[]) => 1.0 - similarity.cosine(X, Y),
euclidean: distance.euclidean,
manhattan: distance.manhattan,
chebyshev: distance.chebyshev,
cosine: (X: number[], Y: number[]) => 1.0 - cosine(X, Y),
euclidean: euclidean,
manhattan: manhattan,
chebyshev: chebyshev,
};

return distanceFunctions[distanceType];
Expand Down
21 changes: 21 additions & 0 deletions langchain/src/util/ml-distance-euclidean/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2015 ml.js

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
11 changes: 11 additions & 0 deletions langchain/src/util/ml-distance-euclidean/euclidean.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
export function squaredEuclidean(p: number[], q: number[]) {
let d = 0;
for (let i = 0; i < p.length; i++) {
d += (p[i] - q[i]) * (p[i] - q[i]);
}
return d;
}

export function euclidean(p: number[], q: number[]) {
return Math.sqrt(squaredEuclidean(p, q));
}
Loading

0 comments on commit ae5e4c5

Please sign in to comment.