Skip to content

Commit

Permalink
docs(core): VectorStore and Retriever: types, interfaces, classes (#7141
Browse files Browse the repository at this point in the history
)
  • Loading branch information
SkSirius authored Nov 5, 2024
1 parent e54c101 commit 7a9eac7
Show file tree
Hide file tree
Showing 3 changed files with 626 additions and 32 deletions.
34 changes: 17 additions & 17 deletions docs/core_docs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,6 @@ docs/integrations/vectorstores/elasticsearch.md
docs/integrations/vectorstores/elasticsearch.mdx
docs/integrations/vectorstores/chroma.md
docs/integrations/vectorstores/chroma.mdx
docs/integrations/toolkits/vectorstore.md
docs/integrations/toolkits/vectorstore.mdx
docs/integrations/toolkits/sql.md
docs/integrations/toolkits/sql.mdx
docs/integrations/toolkits/openapi.md
docs/integrations/toolkits/openapi.mdx
docs/integrations/tools/tavily_search.md
docs/integrations/tools/tavily_search.mdx
docs/integrations/tools/serpapi.md
Expand All @@ -266,6 +260,12 @@ docs/integrations/tools/exa_search.md
docs/integrations/tools/exa_search.mdx
docs/integrations/tools/duckduckgo_search.md
docs/integrations/tools/duckduckgo_search.mdx
docs/integrations/toolkits/vectorstore.md
docs/integrations/toolkits/vectorstore.mdx
docs/integrations/toolkits/sql.md
docs/integrations/toolkits/sql.mdx
docs/integrations/toolkits/openapi.md
docs/integrations/toolkits/openapi.mdx
docs/integrations/text_embedding/togetherai.md
docs/integrations/text_embedding/togetherai.mdx
docs/integrations/text_embedding/openai.md
Expand Down Expand Up @@ -376,16 +376,6 @@ docs/integrations/retrievers/self_query/hnswlib.md
docs/integrations/retrievers/self_query/hnswlib.mdx
docs/integrations/retrievers/self_query/chroma.md
docs/integrations/retrievers/self_query/chroma.mdx
docs/integrations/document_loaders/file_loaders/unstructured.md
docs/integrations/document_loaders/file_loaders/unstructured.mdx
docs/integrations/document_loaders/file_loaders/text.md
docs/integrations/document_loaders/file_loaders/text.mdx
docs/integrations/document_loaders/file_loaders/pdf.md
docs/integrations/document_loaders/file_loaders/pdf.mdx
docs/integrations/document_loaders/file_loaders/directory.md
docs/integrations/document_loaders/file_loaders/directory.mdx
docs/integrations/document_loaders/file_loaders/csv.md
docs/integrations/document_loaders/file_loaders/csv.mdx
docs/integrations/document_loaders/web_loaders/web_puppeteer.md
docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx
docs/integrations/document_loaders/web_loaders/web_cheerio.md
Expand All @@ -397,4 +387,14 @@ docs/integrations/document_loaders/web_loaders/pdf.mdx
docs/integrations/document_loaders/web_loaders/langsmith.md
docs/integrations/document_loaders/web_loaders/langsmith.mdx
docs/integrations/document_loaders/web_loaders/firecrawl.md
docs/integrations/document_loaders/web_loaders/firecrawl.mdx
docs/integrations/document_loaders/web_loaders/firecrawl.mdx
docs/integrations/document_loaders/file_loaders/unstructured.md
docs/integrations/document_loaders/file_loaders/unstructured.mdx
docs/integrations/document_loaders/file_loaders/text.md
docs/integrations/document_loaders/file_loaders/text.mdx
docs/integrations/document_loaders/file_loaders/pdf.md
docs/integrations/document_loaders/file_loaders/pdf.mdx
docs/integrations/document_loaders/file_loaders/directory.md
docs/integrations/document_loaders/file_loaders/directory.mdx
docs/integrations/document_loaders/file_loaders/csv.md
docs/integrations/document_loaders/file_loaders/csv.mdx
94 changes: 90 additions & 4 deletions langchain-core/src/retrievers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,24 @@ import { Runnable, type RunnableInterface } from "../runnables/base.js";
import { RunnableConfig, ensureConfig } from "../runnables/config.js";

/**
* Base Retriever class. All indexes should extend this class.
* Input configuration options for initializing a retriever that extends
* the `BaseRetriever` class. This interface provides base properties
* common to all retrievers, allowing customization of callback functions,
* tagging, metadata, and logging verbosity.
*
* Fields:
* - `callbacks` (optional): An array of callback functions that handle various
* events during retrieval, such as logging, error handling, or progress updates.
*
* - `tags` (optional): An array of strings used to add contextual tags to
* retrieval operations, allowing for easier categorization and tracking.
*
* - `metadata` (optional): A record of key-value pairs to store additional
* contextual information for retrieval operations, which can be useful
* for logging or auditing purposes.
*
* - `verbose` (optional): A boolean flag that, if set to `true`, enables
* detailed logging and output during the retrieval process. Defaults to `false`.
*/
export interface BaseRetrieverInput {
callbacks?: Callbacks;
Expand All @@ -19,20 +36,47 @@ export interface BaseRetrieverInput {
verbose?: boolean;
}

/**
* Interface for a base retriever that defines core functionality for
* retrieving relevant documents from a source based on a query.
*
* The `BaseRetrieverInterface` standardizes the `getRelevantDocuments` method,
* enabling retrieval of documents that match the query criteria.
*
* @template Metadata - The type of metadata associated with each document,
* defaulting to `Record<string, any>`.
*/
export interface BaseRetrieverInterface<
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Metadata extends Record<string, any> = Record<string, any>
> extends RunnableInterface<string, DocumentInterface<Metadata>[]> {
/**
* Retrieves documents relevant to a given query, allowing optional
* configurations for customization.
*
* @param query - A string representing the query to search for relevant documents.
* @param config - (optional) Configuration options for the retrieval process,
* which may include callbacks and additional context settings.
* @returns A promise that resolves to an array of `DocumentInterface` instances,
* each containing metadata specified by the `Metadata` type parameter.
*/
getRelevantDocuments(
query: string,
config?: Callbacks | BaseCallbackConfig
): Promise<DocumentInterface<Metadata>[]>;
}

/**
* Abstract base class for a Document retrieval system. A retrieval system
* is defined as something that can take string queries and return the
* most 'relevant' Documents from some source.
* Abstract base class for a document retrieval system, designed to
* process string queries and return the most relevant documents from a source.
*
* `BaseRetriever` provides common properties and methods for derived retrievers,
* such as callbacks, tagging, and verbose logging. Custom retrieval systems
* should extend this class and implement `_getRelevantDocuments` to define
* the specific retrieval logic.
*
* @template Metadata - The type of metadata associated with each document,
* defaulting to `Record<string, any>`.
*/
export abstract class BaseRetriever<
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand All @@ -41,14 +85,33 @@ export abstract class BaseRetriever<
extends Runnable<string, DocumentInterface<Metadata>[]>
implements BaseRetrieverInterface
{
/**
* Optional callbacks to handle various events in the retrieval process.
*/
callbacks?: Callbacks;

/**
* Tags to label or categorize the retrieval operation.
*/
tags?: string[];

/**
* Metadata to provide additional context or information about the retrieval
* operation.
*/
metadata?: Record<string, unknown>;

/**
* If set to `true`, enables verbose logging for the retrieval process.
*/
verbose?: boolean;

/**
* Constructs a new `BaseRetriever` instance with optional configuration fields.
*
* @param fields - Optional input configuration that can include `callbacks`,
* `tags`, `metadata`, and `verbose` settings for custom retriever behavior.
*/
constructor(fields?: BaseRetrieverInput) {
super(fields);
this.callbacks = fields?.callbacks;
Expand All @@ -62,13 +125,36 @@ export abstract class BaseRetriever<
* changes to people currently using subclassed custom retrievers.
* Change it on next major release.
*/
/**
* Placeholder method for retrieving relevant documents based on a query.
*
* This method is intended to be implemented by subclasses and will be
* converted to an abstract method in the next major release. Currently, it
* throws an error if not implemented, ensuring that custom retrievers define
* the specific retrieval logic.
*
* @param _query - The query string used to search for relevant documents.
* @param _callbacks - (optional) Callback manager for managing callbacks
* during retrieval.
* @returns A promise resolving to an array of `DocumentInterface` instances relevant to the query.
* @throws {Error} Throws an error indicating the method is not implemented.
*/
_getRelevantDocuments(
_query: string,
_callbacks?: CallbackManagerForRetrieverRun
): Promise<DocumentInterface<Metadata>[]> {
throw new Error("Not implemented!");
}

/**
* Executes a retrieval operation.
*
* @param input - The query string used to search for relevant documents.
* @param options - (optional) Configuration options for the retrieval run,
* which may include callbacks, tags, and metadata.
* @returns A promise that resolves to an array of `DocumentInterface` instances
* representing the most relevant documents to the query.
*/
async invoke(
input: string,
options?: RunnableConfig
Expand Down
Loading

0 comments on commit 7a9eac7

Please sign in to comment.