Skip to content

Commit

Permalink
feat: add mmr search to pgvector
Browse files Browse the repository at this point in the history
  • Loading branch information
anadi45 committed Dec 27, 2024
1 parent 2e19277 commit dd376f7
Showing 1 changed file with 62 additions and 5 deletions.
67 changes: 62 additions & 5 deletions libs/langchain-community/src/vectorstores/pgvector.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import pg, { type Pool, type PoolClient, type PoolConfig } from "pg";
import { VectorStore } from "@langchain/core/vectorstores";
import {
MaxMarginalRelevanceSearchOptions,
VectorStore,
} from "@langchain/core/vectorstores";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import { Document } from "@langchain/core/documents";
import { getEnvironmentVariable } from "@langchain/core/utils/env";
import { maximalMarginalRelevance } from "@langchain/core/utils/math";

type Metadata = Record<string, unknown>;

Expand Down Expand Up @@ -261,9 +265,15 @@ export class PGVectorStore extends VectorStore {
this.chunkSize = config.chunkSize ?? 500;
this.distanceStrategy = config.distanceStrategy ?? this.distanceStrategy;

this._verbose =
getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ??
!!config.verbose;
const langchainVerbose = getEnvironmentVariable("LANGCHAIN_VERBOSE");

if (langchainVerbose === "true") {
this._verbose = true;
} else if (langchainVerbose === "false") {
this._verbose = false;
} else {
this._verbose = config.verbose;
}
}

get computedTableName() {
Expand Down Expand Up @@ -603,12 +613,14 @@ export class PGVectorStore extends VectorStore {
* @param query - Query vector.
* @param k - Number of most similar documents to return.
* @param filter - Optional filter to apply to the search.
* @param includeEmbedding Whether to include the embedding vectors in the results.
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
filter?: this["FilterType"],
includeEmbedding?: boolean
): Promise<[Document, number][]> {
const embeddingString = `[${query.join(",")}]`;
const _filter: this["FilterType"] = filter ?? {};
Expand Down Expand Up @@ -688,6 +700,9 @@ export class PGVectorStore extends VectorStore {
metadata: doc[this.metadataColumnName],
id: doc[this.idColumnName],
});
if (includeEmbedding) {
document.metadata[this.vectorColumnName] = doc[this.vectorColumnName];
}
results.push([document, doc._distance]);
}
}
Expand Down Expand Up @@ -879,4 +894,46 @@ export class PGVectorStore extends VectorStore {
);
}
}

/**
* Return documents selected using the maximal marginal relevance.
* Maximal marginal relevance optimizes for similarity to the query AND
* diversity among selected documents.
* @param query Text to look up documents similar to.
* @param options.k=4 Number of documents to return.
* @param options.fetchK=20 Number of documents to fetch before passing to
* the MMR algorithm.
* @param options.lambda=0.5 Number between 0 and 1 that determines the
* degree of diversity among the results, where 0 corresponds to maximum
* diversity and 1 to minimum diversity.
* @returns List of documents selected by maximal marginal relevance.
*/
async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>
): Promise<Document[]> {
const { k = 4, fetchK = 20, lambda = 0.5, filter } = options;
const queryEmbedding = await this.embeddings.embedQuery(query);

const docs = await this.similaritySearchVectorWithScore(
queryEmbedding,
fetchK,
filter,
true
);

const embeddingList = docs.map(
(doc) => doc[0].metadata[this.vectorColumnName]
);

const mmrIndexes = maximalMarginalRelevance(
queryEmbedding,
embeddingList,
lambda,
k
);

const mmrDocs = mmrIndexes.map((index) => docs[index][0]);
return mmrDocs;
}
}

0 comments on commit dd376f7

Please sign in to comment.