From 7a9eac7bf760f8cd12690ad40e51e5d8dd8a2322 Mon Sep 17 00:00:00 2001 From: Pavlo Sobchuk Date: Tue, 5 Nov 2024 19:17:55 +0000 Subject: [PATCH] docs(core): VectorStore and Retriever: types, interfaces, classes (#7141) --- docs/core_docs/.gitignore | 34 +- langchain-core/src/retrievers/index.ts | 94 ++++- langchain-core/src/vectorstores.ts | 530 ++++++++++++++++++++++++- 3 files changed, 626 insertions(+), 32 deletions(-) diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore index 55864ace8b3e..099af2f2d642 100644 --- a/docs/core_docs/.gitignore +++ b/docs/core_docs/.gitignore @@ -252,12 +252,6 @@ docs/integrations/vectorstores/elasticsearch.md docs/integrations/vectorstores/elasticsearch.mdx docs/integrations/vectorstores/chroma.md docs/integrations/vectorstores/chroma.mdx -docs/integrations/toolkits/vectorstore.md -docs/integrations/toolkits/vectorstore.mdx -docs/integrations/toolkits/sql.md -docs/integrations/toolkits/sql.mdx -docs/integrations/toolkits/openapi.md -docs/integrations/toolkits/openapi.mdx docs/integrations/tools/tavily_search.md docs/integrations/tools/tavily_search.mdx docs/integrations/tools/serpapi.md @@ -266,6 +260,12 @@ docs/integrations/tools/exa_search.md docs/integrations/tools/exa_search.mdx docs/integrations/tools/duckduckgo_search.md docs/integrations/tools/duckduckgo_search.mdx +docs/integrations/toolkits/vectorstore.md +docs/integrations/toolkits/vectorstore.mdx +docs/integrations/toolkits/sql.md +docs/integrations/toolkits/sql.mdx +docs/integrations/toolkits/openapi.md +docs/integrations/toolkits/openapi.mdx docs/integrations/text_embedding/togetherai.md docs/integrations/text_embedding/togetherai.mdx docs/integrations/text_embedding/openai.md @@ -376,16 +376,6 @@ docs/integrations/retrievers/self_query/hnswlib.md docs/integrations/retrievers/self_query/hnswlib.mdx docs/integrations/retrievers/self_query/chroma.md docs/integrations/retrievers/self_query/chroma.mdx -docs/integrations/document_loaders/file_loaders/unstructured.md -docs/integrations/document_loaders/file_loaders/unstructured.mdx -docs/integrations/document_loaders/file_loaders/text.md -docs/integrations/document_loaders/file_loaders/text.mdx -docs/integrations/document_loaders/file_loaders/pdf.md -docs/integrations/document_loaders/file_loaders/pdf.mdx -docs/integrations/document_loaders/file_loaders/directory.md -docs/integrations/document_loaders/file_loaders/directory.mdx -docs/integrations/document_loaders/file_loaders/csv.md -docs/integrations/document_loaders/file_loaders/csv.mdx docs/integrations/document_loaders/web_loaders/web_puppeteer.md docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx docs/integrations/document_loaders/web_loaders/web_cheerio.md @@ -397,4 +387,14 @@ docs/integrations/document_loaders/web_loaders/pdf.mdx docs/integrations/document_loaders/web_loaders/langsmith.md docs/integrations/document_loaders/web_loaders/langsmith.mdx docs/integrations/document_loaders/web_loaders/firecrawl.md -docs/integrations/document_loaders/web_loaders/firecrawl.mdx \ No newline at end of file +docs/integrations/document_loaders/web_loaders/firecrawl.mdx +docs/integrations/document_loaders/file_loaders/unstructured.md +docs/integrations/document_loaders/file_loaders/unstructured.mdx +docs/integrations/document_loaders/file_loaders/text.md +docs/integrations/document_loaders/file_loaders/text.mdx +docs/integrations/document_loaders/file_loaders/pdf.md +docs/integrations/document_loaders/file_loaders/pdf.mdx +docs/integrations/document_loaders/file_loaders/directory.md +docs/integrations/document_loaders/file_loaders/directory.mdx +docs/integrations/document_loaders/file_loaders/csv.md +docs/integrations/document_loaders/file_loaders/csv.mdx \ No newline at end of file diff --git a/langchain-core/src/retrievers/index.ts b/langchain-core/src/retrievers/index.ts index a544dedce7b9..81798e364039 100644 --- a/langchain-core/src/retrievers/index.ts +++ b/langchain-core/src/retrievers/index.ts @@ -10,7 +10,24 @@ import { Runnable, type RunnableInterface } from "../runnables/base.js"; import { RunnableConfig, ensureConfig } from "../runnables/config.js"; /** - * Base Retriever class. All indexes should extend this class. + * Input configuration options for initializing a retriever that extends + * the `BaseRetriever` class. This interface provides base properties + * common to all retrievers, allowing customization of callback functions, + * tagging, metadata, and logging verbosity. + * + * Fields: + * - `callbacks` (optional): An array of callback functions that handle various + * events during retrieval, such as logging, error handling, or progress updates. + * + * - `tags` (optional): An array of strings used to add contextual tags to + * retrieval operations, allowing for easier categorization and tracking. + * + * - `metadata` (optional): A record of key-value pairs to store additional + * contextual information for retrieval operations, which can be useful + * for logging or auditing purposes. + * + * - `verbose` (optional): A boolean flag that, if set to `true`, enables + * detailed logging and output during the retrieval process. Defaults to `false`. */ export interface BaseRetrieverInput { callbacks?: Callbacks; @@ -19,10 +36,30 @@ export interface BaseRetrieverInput { verbose?: boolean; } +/** + * Interface for a base retriever that defines core functionality for + * retrieving relevant documents from a source based on a query. + * + * The `BaseRetrieverInterface` standardizes the `getRelevantDocuments` method, + * enabling retrieval of documents that match the query criteria. + * + * @template Metadata - The type of metadata associated with each document, + * defaulting to `Record`. + */ export interface BaseRetrieverInterface< // eslint-disable-next-line @typescript-eslint/no-explicit-any Metadata extends Record = Record > extends RunnableInterface[]> { + /** + * Retrieves documents relevant to a given query, allowing optional + * configurations for customization. + * + * @param query - A string representing the query to search for relevant documents. + * @param config - (optional) Configuration options for the retrieval process, + * which may include callbacks and additional context settings. + * @returns A promise that resolves to an array of `DocumentInterface` instances, + * each containing metadata specified by the `Metadata` type parameter. + */ getRelevantDocuments( query: string, config?: Callbacks | BaseCallbackConfig @@ -30,9 +67,16 @@ export interface BaseRetrieverInterface< } /** - * Abstract base class for a Document retrieval system. A retrieval system - * is defined as something that can take string queries and return the - * most 'relevant' Documents from some source. + * Abstract base class for a document retrieval system, designed to + * process string queries and return the most relevant documents from a source. + * + * `BaseRetriever` provides common properties and methods for derived retrievers, + * such as callbacks, tagging, and verbose logging. Custom retrieval systems + * should extend this class and implement `_getRelevantDocuments` to define + * the specific retrieval logic. + * + * @template Metadata - The type of metadata associated with each document, + * defaulting to `Record`. */ export abstract class BaseRetriever< // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -41,14 +85,33 @@ export abstract class BaseRetriever< extends Runnable[]> implements BaseRetrieverInterface { + /** + * Optional callbacks to handle various events in the retrieval process. + */ callbacks?: Callbacks; + /** + * Tags to label or categorize the retrieval operation. + */ tags?: string[]; + /** + * Metadata to provide additional context or information about the retrieval + * operation. + */ metadata?: Record; + /** + * If set to `true`, enables verbose logging for the retrieval process. + */ verbose?: boolean; + /** + * Constructs a new `BaseRetriever` instance with optional configuration fields. + * + * @param fields - Optional input configuration that can include `callbacks`, + * `tags`, `metadata`, and `verbose` settings for custom retriever behavior. + */ constructor(fields?: BaseRetrieverInput) { super(fields); this.callbacks = fields?.callbacks; @@ -62,6 +125,20 @@ export abstract class BaseRetriever< * changes to people currently using subclassed custom retrievers. * Change it on next major release. */ + /** + * Placeholder method for retrieving relevant documents based on a query. + * + * This method is intended to be implemented by subclasses and will be + * converted to an abstract method in the next major release. Currently, it + * throws an error if not implemented, ensuring that custom retrievers define + * the specific retrieval logic. + * + * @param _query - The query string used to search for relevant documents. + * @param _callbacks - (optional) Callback manager for managing callbacks + * during retrieval. + * @returns A promise resolving to an array of `DocumentInterface` instances relevant to the query. + * @throws {Error} Throws an error indicating the method is not implemented. + */ _getRelevantDocuments( _query: string, _callbacks?: CallbackManagerForRetrieverRun @@ -69,6 +146,15 @@ export abstract class BaseRetriever< throw new Error("Not implemented!"); } + /** + * Executes a retrieval operation. + * + * @param input - The query string used to search for relevant documents. + * @param options - (optional) Configuration options for the retrieval run, + * which may include callbacks, tags, and metadata. + * @returns A promise that resolves to an array of `DocumentInterface` instances + * representing the most relevant documents to the query. + */ async invoke( input: string, options?: RunnableConfig diff --git a/langchain-core/src/vectorstores.ts b/langchain-core/src/vectorstores.ts index da5ef1ce2e72..5bfa07464779 100644 --- a/langchain-core/src/vectorstores.ts +++ b/langchain-core/src/vectorstores.ts @@ -18,7 +18,34 @@ import { type AddDocumentOptions = Record; /** - * Type for options when performing a maximal marginal relevance search. + * Options for configuring a maximal marginal relevance (MMR) search. + * + * MMR search optimizes for both similarity to the query and diversity + * among the results, balancing the retrieval of relevant documents + * with variation in the content returned. + * + * Fields: + * + * - `fetchK` (optional): The initial number of documents to retrieve from the + * vector store before applying the MMR algorithm. This larger set provides a + * pool of documents from which the algorithm can select the most diverse + * results based on relevance to the query. + * + * - `filter` (optional): A filter of type `FilterType` to refine the search + * results, allowing additional conditions to target specific subsets + * of documents. + * + * - `k`: The number of documents to return in the final results. This is the + * primary count of documents that are most relevant to the query. + * + * - `lambda` (optional): A value between 0 and 1 that determines the balance + * between relevance and diversity: + * - A `lambda` of 0 emphasizes diversity, maximizing content variation. + * - A `lambda` of 1 emphasizes similarity to the query, focusing on relevance. + * Values between 0 and 1 provide a mix of relevance and diversity. + * + * @template FilterType - The type used for filtering results, as defined + * by the vector store. */ export type MaxMarginalRelevanceSearchOptions = { k: number; @@ -28,8 +55,23 @@ export type MaxMarginalRelevanceSearchOptions = { }; /** - * Type for options when performing a maximal marginal relevance search - * with the VectorStoreRetriever. + * Options for configuring a maximal marginal relevance (MMR) search + * when using the `VectorStoreRetriever`. + * + * These parameters control how the MMR algorithm balances relevance to the + * query and diversity among the retrieved documents. + * + * Fields: + * - `fetchK` (optional): Specifies the initial number of documents to fetch + * before applying the MMR algorithm. This larger set provides a pool of + * documents from which the algorithm can select the most diverse results + * based on relevance to the query. + * + * - `lambda` (optional): A value between 0 and 1 that determines the balance + * between relevance and diversity: + * - A `lambda` of 0 maximizes diversity among the results, prioritizing varied content. + * - A `lambda` of 1 maximizes similarity to the query, prioritizing relevance. + * Values between 0 and 1 provide a mix of relevance and diversity. */ export type VectorStoreRetrieverMMRSearchKwargs = { fetchK?: number; @@ -37,7 +79,50 @@ export type VectorStoreRetrieverMMRSearchKwargs = { }; /** - * Type for input when creating a VectorStoreRetriever instance. + * Input configuration options for creating a `VectorStoreRetriever` instance. + * + * This type combines properties from `BaseRetrieverInput` with specific settings + * for the `VectorStoreRetriever`, including options for similarity or maximal + * marginal relevance (MMR) search types. + * + * Fields: + * + * - `callbacks` (optional): An array of callback functions that handle various + * events during retrieval, such as logging, error handling, or progress updates. + * + * - `tags` (optional): An array of strings used to add contextual tags to + * retrieval operations, allowing for easier categorization and tracking. + * + * - `metadata` (optional): A record of key-value pairs to store additional + * contextual information for retrieval operations, which can be useful + * for logging or auditing purposes. + * + * - `verbose` (optional): A boolean flag that, if set to `true`, enables + * detailed logging and output during the retrieval process. Defaults to `false`. + * + * - `vectorStore`: The `VectorStore` instance implementing `VectorStoreInterface` + * that will be used for document storage and retrieval. + * + * - `k` (optional): Specifies the number of documents to retrieve per search + * query. Defaults to 4 if not specified. + * + * - `filter` (optional): A filter of type `FilterType` (defined by the vector store) + * to refine the set of documents returned, allowing for targeted search results. + * + * - `searchType`: Determines the type of search to perform: + * - `"similarity"`: Executes a similarity search, retrieving documents based purely + * on vector similarity to the query. + * - `"mmr"`: Executes a maximal marginal relevance (MMR) search, balancing similarity + * and diversity in the search results. + * + * - `searchKwargs` (optional): Used only if `searchType` is `"mmr"`, this object + * provides additional options for MMR search, including: + * - `fetchK`: Specifies the number of documents to initially fetch before applying + * the MMR algorithm, providing a pool from which the most diverse results are selected. + * - `lambda`: A diversity parameter, where 0 emphasizes diversity and 1 emphasizes + * relevance to the query. Values between 0 and 1 provide a balance of relevance and diversity. + * + * @template V - The type of vector store implementing `VectorStoreInterface`. */ export type VectorStoreRetrieverInput = BaseRetrieverInput & @@ -57,11 +142,34 @@ export type VectorStoreRetrieverInput = } ); +/** + * Interface for a retriever that uses a vector store to store and retrieve + * document embeddings. This retriever interface allows for adding documents + * to the underlying vector store and conducting retrieval operations. + * + * `VectorStoreRetrieverInterface` extends `BaseRetrieverInterface` to provide + * document retrieval capabilities based on vector similarity. + * + * @interface VectorStoreRetrieverInterface + * @extends BaseRetrieverInterface + */ export interface VectorStoreRetrieverInterface< V extends VectorStoreInterface = VectorStoreInterface > extends BaseRetrieverInterface { vectorStore: V; + /** + * Adds an array of documents to the vector store. + * + * This method embeds the provided documents and stores them within the + * vector store. Additional options can be specified for custom behavior + * during the addition process. + * + * @param documents - An array of documents to embed and add to the vector store. + * @param options - Optional settings to customize document addition. + * @returns A promise that resolves to an array of document IDs or `void`, + * depending on the implementation. + */ addDocuments( documents: DocumentInterface[], options?: AddDocumentOptions @@ -69,8 +177,17 @@ export interface VectorStoreRetrieverInterface< } /** - * Class for performing document retrieval from a VectorStore. Can perform - * similarity search or maximal marginal relevance search. + * Class for retrieving documents from a `VectorStore` based on vector similarity + * or maximal marginal relevance (MMR). + * + * `VectorStoreRetriever` extends `BaseRetriever`, implementing methods for + * adding documents to the underlying vector store and performing document + * retrieval with optional configurations. + * + * @class VectorStoreRetriever + * @extends BaseRetriever + * @implements VectorStoreRetrieverInterface + * @template V - Type of vector store implementing `VectorStoreInterface`. */ export class VectorStoreRetriever< V extends VectorStoreInterface = VectorStoreInterface @@ -86,20 +203,99 @@ export class VectorStoreRetriever< return ["langchain_core", "vectorstores"]; } + /** + * The instance of `VectorStore` used for storing and retrieving document embeddings. + * This vector store must implement the `VectorStoreInterface` to be compatible + * with the retriever’s operations. + */ vectorStore: V; + /** + * Specifies the number of documents to retrieve for each search query. + * Defaults to 4 if not specified, providing a basic result count for similarity or MMR searches. + */ k = 4; + /** + * Determines the type of search operation to perform on the vector store. + * + * - `"similarity"` (default): Conducts a similarity search based purely on vector similarity + * to the query. + * - `"mmr"`: Executes a maximal marginal relevance (MMR) search, balancing relevance and + * diversity in the retrieved results. + */ searchType = "similarity"; + /** + * Additional options specific to maximal marginal relevance (MMR) search, applicable + * only if `searchType` is set to `"mmr"`. + * + * Includes: + * - `fetchK`: The initial number of documents fetched before applying the MMR algorithm, + * allowing for a larger selection from which to choose the most diverse results. + * - `lambda`: A parameter between 0 and 1 to adjust the relevance-diversity balance, + * where 0 prioritizes diversity and 1 prioritizes relevance. + */ searchKwargs?: VectorStoreRetrieverMMRSearchKwargs; + /** + * Optional filter applied to search results, defined by the `FilterType` of the vector store. + * Allows for refined, targeted results by restricting the returned documents based + * on specified filter criteria. + */ filter?: V["FilterType"]; + /** + * Returns the type of vector store, as defined by the `vectorStore` instance. + * + * @returns {string} The vector store type. + */ _vectorstoreType(): string { return this.vectorStore._vectorstoreType(); } + /** + * Initializes a new instance of `VectorStoreRetriever` with the specified configuration. + * + * This constructor configures the retriever to interact with a given `VectorStore` + * and supports different retrieval strategies, including similarity search and maximal + * marginal relevance (MMR) search. Various options allow customization of the number + * of documents retrieved per query, filtering based on conditions, and fine-tuning + * MMR-specific parameters. + * + * @param fields - Configuration options for setting up the retriever: + * + * - `vectorStore` (required): The `VectorStore` instance implementing `VectorStoreInterface` + * that will be used to store and retrieve document embeddings. This is the core component + * of the retriever, enabling vector-based similarity and MMR searches. + * + * - `k` (optional): Specifies the number of documents to retrieve per search query. If not + * provided, defaults to 4. This count determines the number of most relevant documents returned + * for each search operation, balancing performance with comprehensiveness. + * + * - `searchType` (optional): Defines the search approach used by the retriever, allowing for + * flexibility between two methods: + * - `"similarity"` (default): A similarity-based search, retrieving documents with high vector + * similarity to the query. This type prioritizes relevance and is often used when diversity + * among results is less critical. + * - `"mmr"`: Maximal Marginal Relevance search, which combines relevance with diversity. MMR + * is useful for scenarios where varied content is essential, as it selects results that + * both match the query and introduce content diversity. + * + * - `filter` (optional): A filter of type `FilterType`, defined by the vector store, that allows + * for refined and targeted search results. This filter applies specified conditions to limit + * which documents are eligible for retrieval, offering control over the scope of results. + * + * - `searchKwargs` (optional, applicable only if `searchType` is `"mmr"`): Additional settings + * for configuring MMR-specific behavior. These parameters allow further tuning of the MMR + * search process: + * - `fetchK`: The initial number of documents fetched from the vector store before the MMR + * algorithm is applied. Fetching a larger set enables the algorithm to select a more + * diverse subset of documents. + * - `lambda`: A parameter controlling the relevance-diversity balance, where 0 emphasizes + * diversity and 1 prioritizes relevance. Intermediate values provide a blend of the two, + * allowing customization based on the importance of content variety relative to query relevance. + */ constructor(fields: VectorStoreRetrieverInput) { super(fields); this.vectorStore = fields.vectorStore; @@ -111,6 +307,22 @@ export class VectorStoreRetriever< } } + /** + * Retrieves relevant documents based on the specified query, using either + * similarity or maximal marginal relevance (MMR) search. + * + * If `searchType` is set to `"mmr"`, performs an MMR search to balance + * similarity and diversity among results. If `searchType` is `"similarity"`, + * retrieves results purely based on similarity to the query. + * + * @param query - The query string used to find relevant documents. + * @param runManager - Optional callback manager for tracking retrieval progress. + * @returns A promise that resolves to an array of `DocumentInterface` instances + * representing the most relevant documents to the query. + * @throws {Error} Throws an error if MMR search is requested but not supported + * by the vector store. + * @protected + */ async _getRelevantDocuments( query: string, runManager?: CallbackManagerForRetrieverRun @@ -139,6 +351,18 @@ export class VectorStoreRetriever< ); } + /** + * Adds an array of documents to the vector store, embedding them as part of + * the storage process. + * + * This method delegates document embedding and storage to the `addDocuments` + * method of the underlying vector store. + * + * @param documents - An array of documents to embed and add to the vector store. + * @param options - Optional settings to customize document addition. + * @returns A promise that resolves to an array of document IDs or `void`, + * depending on the vector store's implementation. + */ async addDocuments( documents: DocumentInterface[], options?: AddDocumentOptions @@ -147,33 +371,101 @@ export class VectorStoreRetriever< } } +/** + * Interface defining the structure and operations of a vector store, which + * facilitates the storage, retrieval, and similarity search of document vectors. + * + * `VectorStoreInterface` provides methods for adding, deleting, and searching + * documents based on vector embeddings, including support for similarity + * search with optional filtering and relevance-based retrieval. + * + * @extends Serializable + */ export interface VectorStoreInterface extends Serializable { + /** + * Defines the filter type used in search and delete operations. Can be an + * object for structured conditions or a string for simpler filtering. + */ FilterType: object | string; + /** + * Instance of `EmbeddingsInterface` used to generate vector embeddings for + * documents, enabling vector-based search operations. + */ embeddings: EmbeddingsInterface; + /** + * Returns a string identifying the type of vector store implementation, + * useful for distinguishing between different vector storage backends. + * + * @returns {string} A string indicating the vector store type. + */ _vectorstoreType(): string; + /** + * Adds precomputed vectors and their corresponding documents to the vector store. + * + * @param vectors - An array of vectors, with each vector representing a document. + * @param documents - An array of `DocumentInterface` instances corresponding to each vector. + * @param options - Optional configurations for adding documents, potentially covering indexing or metadata handling. + * @returns A promise that resolves to an array of document IDs or void, depending on implementation. + */ addVectors( vectors: number[][], documents: DocumentInterface[], options?: AddDocumentOptions ): Promise; + /** + * Adds an array of documents to the vector store. + * + * @param documents - An array of documents to be embedded and stored in the vector store. + * @param options - Optional configurations for embedding and storage operations. + * @returns A promise that resolves to an array of document IDs or void, depending on implementation. + */ addDocuments( documents: DocumentInterface[], options?: AddDocumentOptions ): Promise; + /** + * Deletes documents from the vector store based on the specified parameters. + * + * @param _params - A flexible object containing key-value pairs that define + * the conditions for selecting documents to delete. + * @returns A promise that resolves once the deletion operation is complete. + */ // eslint-disable-next-line @typescript-eslint/no-explicit-any delete(_params?: Record): Promise; + /** + * Searches for documents similar to a given vector query and returns them + * with similarity scores. + * + * @param query - A vector representing the query for similarity search. + * @param k - The number of similar documents to return. + * @param filter - Optional filter based on `FilterType` to restrict results. + * @returns A promise that resolves to an array of tuples, each containing a + * `DocumentInterface` and its corresponding similarity score. + */ similaritySearchVectorWithScore( query: number[], k: number, filter?: this["FilterType"] ): Promise<[DocumentInterface, number][]>; + /** + * Searches for documents similar to a text query, embedding the query + * and retrieving documents based on vector similarity. + * + * @param query - The text query to search for. + * @param k - Optional number of similar documents to return. + * @param filter - Optional filter based on `FilterType` to restrict results. + * @param callbacks - Optional callbacks for tracking progress or events + * during the search process. + * @returns A promise that resolves to an array of `DocumentInterface` + * instances representing similar documents. + */ similaritySearch( query: string, k?: number, @@ -181,6 +473,18 @@ export interface VectorStoreInterface extends Serializable { callbacks?: Callbacks ): Promise; + /** + * Searches for documents similar to a text query and includes similarity + * scores in the result. + * + * @param query - The text query to search for. + * @param k - Optional number of similar documents to return. + * @param filter - Optional filter based on `FilterType` to restrict results. + * @param callbacks - Optional callbacks for tracking progress or events + * during the search process. + * @returns A promise that resolves to an array of tuples, each containing + * a `DocumentInterface` and its similarity score. + */ similaritySearchWithScore( query: string, k?: number, @@ -209,6 +513,20 @@ export interface VectorStoreInterface extends Serializable { callbacks: Callbacks | undefined ): Promise; + /** + * Converts the vector store into a retriever, making it suitable for use in + * retrieval-based workflows and allowing additional configuration. + * + * @param kOrFields - Optional parameter for specifying either the number of + * documents to retrieve or partial retriever configurations. + * @param filter - Optional filter based on `FilterType` for retrieval restriction. + * @param callbacks - Optional callbacks for tracking retrieval events or progress. + * @param tags - General-purpose tags to add contextual information to the retriever. + * @param metadata - General-purpose metadata providing additional context + * for retrieval. + * @param verbose - If `true`, enables detailed logging during retrieval. + * @returns An instance of `VectorStoreRetriever` configured with the specified options. + */ asRetriever( kOrFields?: number | Partial>, filter?: this["FilterType"], @@ -220,9 +538,17 @@ export interface VectorStoreInterface extends Serializable { } /** - * Abstract class representing a store of vectors. Provides methods for - * adding vectors and documents, deleting from the store, and searching - * the store. + * Abstract class representing a vector storage system for performing + * similarity searches on embedded documents. + * + * `VectorStore` provides methods for adding precomputed vectors or documents, + * removing documents based on criteria, and performing similarity searches + * with optional scoring. Subclasses are responsible for implementing specific + * storage mechanisms and the exact behavior of certain abstract methods. + * + * @abstract + * @extends Serializable + * @implements VectorStoreInterface */ export abstract class VectorStore extends Serializable @@ -230,41 +556,108 @@ export abstract class VectorStore { declare FilterType: object | string; + /** + * Namespace within LangChain to uniquely identify this vector store's + * location, based on the vector store type. + * + * @internal + */ // Only ever instantiated in main LangChain lc_namespace = ["langchain", "vectorstores", this._vectorstoreType()]; + /** + * Embeddings interface for generating vector embeddings from text queries, + * enabling vector-based similarity searches. + */ embeddings: EmbeddingsInterface; + /** + * Initializes a new vector store with embeddings and database configuration. + * + * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries. + * @param dbConfig - Configuration settings for the database or storage system. + */ // eslint-disable-next-line @typescript-eslint/no-explicit-any constructor(embeddings: EmbeddingsInterface, dbConfig: Record) { super(dbConfig); this.embeddings = embeddings; } + /** + * Returns a string representing the type of vector store, which subclasses + * must implement to identify their specific vector storage type. + * + * @returns {string} A string indicating the vector store type. + * @abstract + */ abstract _vectorstoreType(): string; + /** + * Adds precomputed vectors and corresponding documents to the vector store. + * + * @param vectors - An array of vectors representing each document. + * @param documents - Array of documents associated with each vector. + * @param options - Optional configuration for adding vectors, such as indexing. + * @returns A promise resolving to an array of document IDs or void, based on implementation. + * @abstract + */ abstract addVectors( vectors: number[][], documents: DocumentInterface[], options?: AddDocumentOptions ): Promise; + /** + * Adds documents to the vector store, embedding them first through the + * `embeddings` instance. + * + * @param documents - Array of documents to embed and add. + * @param options - Optional configuration for embedding and storing documents. + * @returns A promise resolving to an array of document IDs or void, based on implementation. + * @abstract + */ abstract addDocuments( documents: DocumentInterface[], options?: AddDocumentOptions ): Promise; + /** + * Deletes documents from the vector store based on the specified parameters. + * + * @param _params - Flexible key-value pairs defining conditions for document deletion. + * @returns A promise that resolves once the deletion is complete. + */ // eslint-disable-next-line @typescript-eslint/no-explicit-any async delete(_params?: Record): Promise { throw new Error("Not implemented."); } + /** + * Performs a similarity search using a vector query and returns results + * along with their similarity scores. + * + * @param query - Vector representing the search query. + * @param k - Number of similar results to return. + * @param filter - Optional filter based on `FilterType` to restrict results. + * @returns A promise resolving to an array of tuples containing documents and their similarity scores. + * @abstract + */ abstract similaritySearchVectorWithScore( query: number[], k: number, filter?: this["FilterType"] ): Promise<[DocumentInterface, number][]>; + /** + * Searches for documents similar to a text query by embedding the query and + * performing a similarity search on the resulting vector. + * + * @param query - Text query for finding similar documents. + * @param k - Number of similar results to return. Defaults to 4. + * @param filter - Optional filter based on `FilterType`. + * @param _callbacks - Optional callbacks for monitoring search progress + * @returns A promise resolving to an array of `DocumentInterface` instances representing similar documents. + */ async similaritySearch( query: string, k = 4, @@ -280,6 +673,17 @@ export abstract class VectorStore return results.map((result) => result[0]); } + /** + * Searches for documents similar to a text query by embedding the query, + * and returns results with similarity scores. + * + * @param query - Text query for finding similar documents. + * @param k - Number of similar results to return. Defaults to 4. + * @param filter - Optional filter based on `FilterType`. + * @param _callbacks - Optional callbacks for monitoring search progress + * @returns A promise resolving to an array of tuples, each containing a + * document and its similarity score. + */ async similaritySearchWithScore( query: string, k = 4, @@ -314,6 +718,21 @@ export abstract class VectorStore _callbacks: Callbacks | undefined // implement passing to embedQuery later ): Promise; + /** + * Creates a `VectorStore` instance from an array of text strings and optional + * metadata, using the specified embeddings and database configuration. + * + * Subclasses must implement this method to define how text and metadata + * are embedded and stored in the vector store. Throws an error if not overridden. + * + * @param _texts - Array of strings representing the text documents to be stored. + * @param _metadatas - Metadata for the texts, either as an array (one for each text) + * or a single object (applied to all texts). + * @param _embeddings - Instance of `EmbeddingsInterface` to embed the texts. + * @param _dbConfig - Database configuration settings. + * @returns A promise that resolves to a new `VectorStore` instance. + * @throws {Error} Throws an error if this method is not overridden by a subclass. + */ static fromTexts( _texts: string[], _metadatas: object[] | object, @@ -326,6 +745,19 @@ export abstract class VectorStore ); } + /** + * Creates a `VectorStore` instance from an array of documents, using the specified + * embeddings and database configuration. + * + * Subclasses must implement this method to define how documents are embedded + * and stored. Throws an error if not overridden. + * + * @param _docs - Array of `DocumentInterface` instances representing the documents to be stored. + * @param _embeddings - Instance of `EmbeddingsInterface` to embed the documents. + * @param _dbConfig - Database configuration settings. + * @returns A promise that resolves to a new `VectorStore` instance. + * @throws {Error} Throws an error if this method is not overridden by a subclass. + */ static fromDocuments( _docs: DocumentInterface[], _embeddings: EmbeddingsInterface, @@ -337,6 +769,44 @@ export abstract class VectorStore ); } + /** + * Creates a `VectorStoreRetriever` instance with flexible configuration options. + * + * @param kOrFields + * - If a number is provided, it sets the `k` parameter (number of items to retrieve). + * - If an object is provided, it should contain various configuration options. + * @param filter + * - Optional filter criteria to limit the items retrieved based on the specified filter type. + * @param callbacks + * - Optional callbacks that may be triggered at specific stages of the retrieval process. + * @param tags + * - Tags to categorize or label the `VectorStoreRetriever`. Defaults to an empty array if not provided. + * @param metadata + * - Additional metadata as key-value pairs to add contextual information for the retrieval process. + * @param verbose + * - If `true`, enables detailed logging for the retrieval process. Defaults to `false`. + * + * @returns + * - A configured `VectorStoreRetriever` instance based on the provided parameters. + * + * @example + * Basic usage with a `k` value: + * ```typescript + * const retriever = myVectorStore.asRetriever(5); + * ``` + * + * Usage with a configuration object: + * ```typescript + * const retriever = myVectorStore.asRetriever({ + * k: 10, + * filter: myFilter, + * tags: ['example', 'test'], + * verbose: true, + * searchType: 'mmr', + * searchKwargs: { alpha: 0.5 }, + * }); + * ``` + */ asRetriever( kOrFields?: number | Partial>, filter?: this["FilterType"], @@ -378,12 +848,50 @@ export abstract class VectorStore } /** - * Abstract class extending VectorStore with functionality for saving and - * loading the vector store. + * Abstract class extending `VectorStore` that defines a contract for saving + * and loading vector store instances. + * + * The `SaveableVectorStore` class allows vector store implementations to + * persist their data and retrieve it when needed.The format for saving and + * loading data is left to the implementing subclass. + * + * Subclasses must implement the `save` method to handle their custom + * serialization logic, while the `load` method enables reconstruction of a + * vector store from saved data, requiring compatible embeddings through the + * `EmbeddingsInterface`. + * + * @abstract + * @extends VectorStore */ export abstract class SaveableVectorStore extends VectorStore { + /** + * Saves the current state of the vector store to the specified directory. + * + * This method must be implemented by subclasses to define their own + * serialization process for persisting vector data. The implementation + * determines the structure and format of the saved data. + * + * @param directory - The directory path where the vector store data + * will be saved. + * @abstract + */ abstract save(directory: string): Promise; + /** + * Loads a vector store instance from the specified directory, using the + * provided embeddings to ensure compatibility. + * + * This static method reconstructs a `SaveableVectorStore` from previously + * saved data. Implementations should interpret the saved data format to + * recreate the vector store instance. + * + * @param _directory - The directory path from which the vector store + * data will be loaded. + * @param _embeddings - An instance of `EmbeddingsInterface` to align + * the embeddings with the loaded vector data. + * @returns A promise that resolves to a `SaveableVectorStore` instance + * constructed from the saved data. + */ static load( _directory: string, _embeddings: EmbeddingsInterface