From eb7ee1262f60f15a956f35b4f0e1768cb1191300 Mon Sep 17 00:00:00 2001 From: volodymyr-memsql <57520563+volodymyr-memsql@users.noreply.github.com> Date: Fri, 14 Jun 2024 00:23:16 +0300 Subject: [PATCH] community[minor]: Update SingleStore vector store (#5715) * initial work * fix tests * update documentation * fix example * fix spelling * Format * Format --------- Co-authored-by: Volodymyr Tkachuk Co-authored-by: jacoblee93 --- .../integrations/vectorstores/singlestore.mdx | 31 +- .../singlestore_hybrid_search.ts | 86 ++++ .../src/vectorstores/singlestore.ts | 342 ++++++++++++-- .../tests/singlestore.int.test.ts | 436 +++++++++++++++++- 4 files changed, 861 insertions(+), 34 deletions(-) create mode 100644 examples/src/indexes/vector_stores/singlestore_hybrid_search.ts diff --git a/docs/core_docs/docs/integrations/vectorstores/singlestore.mdx b/docs/core_docs/docs/integrations/vectorstores/singlestore.mdx index edfec8a96d28..451456f27623 100644 --- a/docs/core_docs/docs/integrations/vectorstores/singlestore.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/singlestore.mdx @@ -6,7 +6,17 @@ import CodeBlock from "@theme/CodeBlock"; # SingleStore -[SingleStoreDB](https://singlestore.com/) is a high-performance distributed SQL database that supports deployment both in the [cloud](https://www.singlestore.com/cloud/) and on-premise. It provides vector storage, as well as vector functions like [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html) and [euclidean_distance](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/euclidean_distance.html), thereby supporting AI applications that require text similarity matching. +[SingleStoreDB](https://singlestore.com/) is a robust, high-performance distributed SQL database solution designed to excel in both [cloud](https://www.singlestore.com/cloud/) and on-premises environments. Boasting a versatile feature set, it offers seamless deployment options while delivering unparalleled performance. + +A standout feature of SingleStoreDB is its advanced support for vector storage and operations, making it an ideal choice for applications requiring intricate AI capabilities such as text similarity matching. With built-in vector functions like [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html) and [euclidean_distance](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/euclidean_distance.html), SingleStoreDB empowers developers to implement sophisticated algorithms efficiently. + +For developers keen on leveraging vector data within SingleStoreDB, a comprehensive tutorial is available, guiding them through the intricacies of [working with vector data](https://docs.singlestore.com/managed-service/en/developer-resources/functional-extensions/working-with-vector-data.html). This tutorial delves into the Vector Store within SingleStoreDB, showcasing its ability to facilitate searches based on vector similarity. Leveraging vector indexes, queries can be executed with remarkable speed, enabling swift retrieval of relevant data. + +Moreover, SingleStoreDB's Vector Store seamlessly integrates with [full-text indexing based on Lucene](https://docs.singlestore.com/cloud/developer-resources/functional-extensions/working-with-full-text-search/), enabling powerful text similarity searches. Users can filter search results based on selected fields of document metadata objects, enhancing query precision. + +What sets SingleStoreDB apart is its ability to combine vector and full-text searches in various ways, offering flexibility and versatility. Whether prefiltering by text or vector similarity and selecting the most relevant data, or employing a weighted sum approach to compute a final similarity score, developers have multiple options at their disposal. + +In essence, SingleStoreDB provides a comprehensive solution for managing and querying vector data, offering unparalleled performance and flexibility for AI-driven applications. :::tip Compatibility Only available on Node.js. @@ -50,3 +60,22 @@ import UsageExampleWithMetadata from "@examples/indexes/vector_stores/singlestor If it is needed to filter results based on specific metadata fields, you can pass a filter parameter to narrow down your search to the documents that match all specified fields in the filter object: {UsageExampleWithMetadata} + +### Vector indexes + +Enhance your search efficiency with SingleStore DB version 8.5 or above by leveraging [ANN vector indexes](https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/). +By setting `useVectorIndex: true` during vector store object creation, you can activate this feature. +Additionally, if your vectors differ in dimensionality from the default OpenAI embedding size of 1536, ensure to specify the `vectorSize` parameter accordingly. + +### Hybrid search + +import HybridSearchUsageExample from "@examples/indexes/vector_stores/singlestore_hybrid_search.ts"; + +SingleStoreDB presents a diverse range of search strategies, each meticulously crafted to cater to specific use cases and user preferences. +The default `VECTOR_ONLY` strategy utilizes vector operations such as `DOT_PRODUCT` or `EUCLIDEAN_DISTANCE` to calculate similarity scores directly between vectors, while `TEXT_ONLY` employs Lucene-based full-text search, particularly advantageous for text-centric applications. +For users seeking a balanced approach, `FILTER_BY_TEXT` first refines results based on text similarity before conducting vector comparisons, whereas `FILTER_BY_VECTOR` prioritizes vector similarity, filtering results before assessing text similarity for optimal matches. +Notably, both `FILTER_BY_TEXT` and `FILTER_BY_VECTOR` necessitate a full-text index for operation. Additionally, `WEIGHTED_SUM` emerges as a sophisticated strategy, calculating the final similarity score by weighing vector and text similarities, albeit exclusively utilizing dot_product distance calculations and also requiring a full-text index. +These versatile strategies empower users to fine-tune searches according to their unique needs, facilitating efficient and precise data retrieval and analysis. +Moreover, SingleStoreDB's hybrid approaches, exemplified by `FILTER_BY_TEXT`, `FILTER_BY_VECTOR`, and `WEIGHTED_SUM` strategies, seamlessly blend vector and text-based searches to maximize efficiency and accuracy, ensuring users can fully leverage the platform's capabilities for a wide range of applications. + +{HybridSearchUsageExample} diff --git a/examples/src/indexes/vector_stores/singlestore_hybrid_search.ts b/examples/src/indexes/vector_stores/singlestore_hybrid_search.ts new file mode 100644 index 000000000000..dd4f918697f0 --- /dev/null +++ b/examples/src/indexes/vector_stores/singlestore_hybrid_search.ts @@ -0,0 +1,86 @@ +import { SingleStoreVectorStore } from "@langchain/community/vectorstores/singlestore"; +import { OpenAIEmbeddings } from "@langchain/openai"; + +export const run = async () => { + const vectorStore = await SingleStoreVectorStore.fromTexts( + [ + "In the parched desert, a sudden rainstorm brought relief, as the droplets danced upon the thirsty earth, rejuvenating the landscape with the sweet scent of petrichor.", + "Amidst the bustling cityscape, the rain fell relentlessly, creating a symphony of pitter-patter on the pavement, while umbrellas bloomed like colorful flowers in a sea of gray.", + "High in the mountains, the rain transformed into a delicate mist, enveloping the peaks in a mystical veil, where each droplet seemed to whisper secrets to the ancient rocks below.", + "Blanketing the countryside in a soft, pristine layer, the snowfall painted a serene tableau, muffling the world in a tranquil hush as delicate flakes settled upon the branches of trees like nature's own lacework.", + "In the urban landscape, snow descended, transforming bustling streets into a winter wonderland, where the laughter of children echoed amidst the flurry of snowballs and the twinkle of holiday lights.", + "Atop the rugged peaks, snow fell with an unyielding intensity, sculpting the landscape into a pristine alpine paradise, where the frozen crystals shimmered under the moonlight, casting a spell of enchantment over the wilderness below.", + ], + [ + { category: "rain" }, + { category: "rain" }, + { category: "rain" }, + { category: "snow" }, + { category: "snow" }, + { category: "snow" }, + ], + new OpenAIEmbeddings(), + { + connectionOptions: { + host: process.env.SINGLESTORE_HOST, + port: Number(process.env.SINGLESTORE_PORT), + user: process.env.SINGLESTORE_USERNAME, + password: process.env.SINGLESTORE_PASSWORD, + database: process.env.SINGLESTORE_DATABASE, + }, + distanceMetric: "DOT_PRODUCT", + useVectorIndex: true, + useFullTextIndex: true, + } + ); + + const resultOne = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1, + { category: "rain" } + ); + console.log(resultOne[0].pageContent); + + await vectorStore.setSearchConfig({ + searchStrategy: "TEXT_ONLY", + }); + const resultTwo = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1 + ); + console.log(resultTwo[0].pageContent); + + await vectorStore.setSearchConfig({ + searchStrategy: "FILTER_BY_TEXT", + filterThreshold: 0.1, + }); + const resultThree = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1 + ); + console.log(resultThree[0].pageContent); + + await vectorStore.setSearchConfig({ + searchStrategy: "FILTER_BY_VECTOR", + filterThreshold: 0.1, + }); + const resultFour = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1 + ); + console.log(resultFour[0].pageContent); + + await vectorStore.setSearchConfig({ + searchStrategy: "WEIGHTED_SUM", + textWeight: 0.2, + vectorWeight: 0.8, + vectorselectCountMultiplier: 10, + }); + const resultFive = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1 + ); + console.log(resultFive[0].pageContent); + + await vectorStore.end(); +}; diff --git a/libs/langchain-community/src/vectorstores/singlestore.ts b/libs/langchain-community/src/vectorstores/singlestore.ts index 5ca99c7658fa..e1997a7c8645 100644 --- a/libs/langchain-community/src/vectorstores/singlestore.ts +++ b/libs/langchain-community/src/vectorstores/singlestore.ts @@ -10,13 +10,21 @@ import { format } from "mysql2"; import { createPool } from "mysql2/promise"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents"; +import { Document, DocumentInterface } from "@langchain/core/documents"; +import { Callbacks } from "@langchain/core/callbacks/manager"; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type Metadata = Record; export type DistanceMetrics = "DOT_PRODUCT" | "EUCLIDEAN_DISTANCE"; +export type SearchStrategy = + | "VECTOR_ONLY" + | "TEXT_ONLY" + | "FILTER_BY_TEXT" + | "FILTER_BY_VECTOR" + | "WEIGHTED_SUM"; + const OrderingDirective: Record = { DOT_PRODUCT: "DESC", EUCLIDEAN_DISTANCE: "", @@ -36,14 +44,33 @@ type ConnectionWithOptions = { type ConnectionConfig = ConnectionWithUri | ConnectionWithOptions; +type SearchConfig = { + searchStrategy?: SearchStrategy; + filterThreshold?: number; + textWeight?: number; + vectorWeight?: number; + vectorselectCountMultiplier?: number; +}; + export type SingleStoreVectorStoreConfig = ConnectionConfig & { tableName?: string; + idColumnName?: string; contentColumnName?: string; vectorColumnName?: string; metadataColumnName?: string; distanceMetric?: DistanceMetrics; + useVectorIndex?: boolean; + vectorIndexName?: string; + vectorIndexOptions?: Metadata; + vectorSize?: number; + useFullTextIndex?: boolean; + searchConfig?: SearchConfig; }; +/** + * Adds the connect attributes to the connection options. + * @param config A SingleStoreVectorStoreConfig object. + */ function withConnectAttributes( config: SingleStoreVectorStoreConfig ): ConnectionOptions { @@ -71,7 +98,7 @@ function withConnectAttributes( result.connectAttributes = { ...result.connectAttributes, _connector_name: "langchain js sdk", - _connector_version: "1.0.0", + _connector_version: "2.0.0", _driver_name: "Node-MySQL-2", }; @@ -88,6 +115,8 @@ export class SingleStoreVectorStore extends VectorStore { tableName: string; + idColumnName: string; + contentColumnName: string; vectorColumnName: string; @@ -96,6 +125,19 @@ export class SingleStoreVectorStore extends VectorStore { distanceMetric: DistanceMetrics; + useVectorIndex: boolean; + + vectorIndexName: string; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + vectorIndexOptions: Metadata; + + vectorSize: number; + + useFullTextIndex: boolean; + + searchConfig: SearchConfig; + _vectorstoreType(): string { return "singlestore"; } @@ -107,10 +149,23 @@ export class SingleStoreVectorStore extends VectorStore { super(embeddings, config); this.connectionPool = createPool(withConnectAttributes(config)); this.tableName = config.tableName ?? "embeddings"; + this.idColumnName = config.idColumnName ?? "id"; this.contentColumnName = config.contentColumnName ?? "content"; this.vectorColumnName = config.vectorColumnName ?? "vector"; this.metadataColumnName = config.metadataColumnName ?? "metadata"; this.distanceMetric = config.distanceMetric ?? "DOT_PRODUCT"; + this.useVectorIndex = config.useVectorIndex ?? false; + this.vectorIndexName = config.vectorIndexName ?? ""; + this.vectorIndexOptions = config.vectorIndexOptions ?? {}; + this.vectorSize = config.vectorSize ?? 1536; + this.useFullTextIndex = config.useFullTextIndex ?? false; + this.searchConfig = config.searchConfig ?? { + searchStrategy: "VECTOR_ONLY", + filterThreshold: 1.0, + textWeight: 0.5, + vectorWeight: 0.5, + vectorselectCountMultiplier: 10, + }; } /** @@ -118,11 +173,34 @@ export class SingleStoreVectorStore extends VectorStore { * already exist. */ async createTableIfNotExists(): Promise { - await this.connectionPool - .execute(`CREATE TABLE IF NOT EXISTS ${this.tableName} ( - ${this.contentColumnName} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, - ${this.vectorColumnName} BLOB, - ${this.metadataColumnName} JSON);`); + let fullTextIndex = ""; + if (this.useFullTextIndex) { + fullTextIndex = `, FULLTEXT(${this.contentColumnName})`; + } + if (this.useVectorIndex) { + let vectorIndexOptions = ""; + if (Object.keys(this.vectorIndexOptions).length > 0) { + vectorIndexOptions = `INDEX_OPTIONS '${JSON.stringify( + this.vectorIndexOptions + )}'`; + } + await this.connectionPool + .execute(`CREATE TABLE IF NOT EXISTS ${this.tableName} ( + ${this.idColumnName} BIGINT AUTO_INCREMENT PRIMARY KEY, + ${this.contentColumnName} LONGTEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + ${this.vectorColumnName} VECTOR(${this.vectorSize}, F32) NOT NULL, + ${this.metadataColumnName} JSON, + VECTOR INDEX ${this.vectorIndexName} (${this.vectorColumnName}) ${vectorIndexOptions} + ${fullTextIndex});`); + } else { + await this.connectionPool + .execute(`CREATE TABLE IF NOT EXISTS ${this.tableName} ( + ${this.idColumnName} BIGINT AUTO_INCREMENT PRIMARY KEY, + ${this.contentColumnName} LONGTEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + ${this.vectorColumnName} BLOB, + ${this.metadataColumnName} JSON + ${fullTextIndex});`); + } } /** @@ -132,6 +210,20 @@ export class SingleStoreVectorStore extends VectorStore { return this.connectionPool.end(); } + /** + * Sets the search configuration for the SingleStoreVectorStore instance. + * @param config A SearchConfig object. + */ + async setSearchConfig(config: SearchConfig): Promise { + this.searchConfig = { + searchStrategy: config.searchStrategy ?? "VECTOR_ONLY", + filterThreshold: config.filterThreshold ?? 1.0, + textWeight: config.textWeight ?? 0.5, + vectorWeight: config.vectorWeight ?? 0.5, + vectorselectCountMultiplier: config.vectorselectCountMultiplier ?? 10, + }; + } + /** * Adds new documents to the SingleStoreDB database. * @param documents An array of Document objects. @@ -154,9 +246,13 @@ export class SingleStoreVectorStore extends VectorStore { await Promise.all( vectors.map(async (vector, idx) => { try { - await this.connectionPool.execute( + await this.connectionPool.query( format( - `INSERT INTO ${tableName} VALUES (?, JSON_ARRAY_PACK('[?]'), ?);`, + `INSERT INTO ${tableName}( + ${this.contentColumnName}, + ${this.vectorColumnName}, + ${this.metadataColumnName}) + VALUES (?, JSON_ARRAY_PACK('[?]'), ?);`, [ documents[idx].pageContent, vector, @@ -169,21 +265,69 @@ export class SingleStoreVectorStore extends VectorStore { } }) ); + if (this.useFullTextIndex || this.useVectorIndex) { + await this.connectionPool.query(`OPTIMIZE TABLE ${tableName} FLUSH;`); + } } /** - * Performs a similarity search on the vectors stored in the SingleStoreDB - * database. - * @param query An array of numbers representing the query vector. + * + * Performs a similarity search on the texts stored in the SingleStoreDB + * using the specified search strategy and distance metric. + * @param query A string representing the query text. + * @param vector An array of numbers representing the query vector. * @param k The number of nearest neighbors to return. - * @param filter Optional metadata to filter the vectors by. - * @returns Top matching vectors with score + * @param filter Optional metadata to filter the texts by. + * @returns Top matching documents with score */ - async similaritySearchVectorWithScore( - query: number[], + async similaritySearchTextAndVectorWithScore( + query: string, + vector: number[], k: number, filter?: Metadata ): Promise<[Document, number][]> { + if (!this.searchConfig.searchStrategy) { + throw new Error("Search strategy is required."); + } + if ( + this.searchConfig.searchStrategy !== "VECTOR_ONLY" && + !this.useFullTextIndex + ) { + throw new Error( + "Full text index is required for text-based search strategies." + ); + } + if ( + (this.searchConfig.searchStrategy === "FILTER_BY_TEXT" || + this.searchConfig.searchStrategy === "FILTER_BY_VECTOR") && + !this.searchConfig.filterThreshold && + this.searchConfig.filterThreshold !== 0 + ) { + throw new Error( + "Filter threshold is required for filter-based search strategies." + ); + } + if ( + this.searchConfig.searchStrategy === "WEIGHTED_SUM" && + ((!this.searchConfig.textWeight && this.searchConfig.textWeight !== 0) || + (!this.searchConfig.vectorWeight && + this.searchConfig.vectorWeight !== 0) || + (!this.searchConfig.vectorselectCountMultiplier && + this.searchConfig.vectorselectCountMultiplier !== 0)) + ) { + throw new Error( + "Text and vector weight and vector select count multiplier are required for weighted sum search strategy." + ); + } + if ( + this.searchConfig.searchStrategy === "WEIGHTED_SUM" && + this.distanceMetric !== "DOT_PRODUCT" + ) { + throw new Error( + "Weighted sum search strategy is only available for DOT_PRODUCT distance metric." + ); + } + const filterThreshold = this.searchConfig.filterThreshold ?? 1.0; // build the where clause from filter const whereArgs: string[] = []; const buildWhereClause = (record: Metadata, argList: string[]): string => { @@ -212,10 +356,87 @@ export class SingleStoreVectorStore extends VectorStore { } return whereTokens.join(" AND "); }; - const whereClause = filter - ? "WHERE ".concat(buildWhereClause(filter, [])) - : ""; + const filterByTextClause = (): string => { + whereArgs.push(query, filterThreshold.toString()); + return `MATCH (${this.contentColumnName}) AGAINST (?) > ?`; + }; + const filterByVectorClause = (): string => { + whereArgs.push(JSON.stringify(vector), filterThreshold.toString()); + return this.distanceMetric === "DOT_PRODUCT" + ? `${this.distanceMetric}(${this.vectorColumnName}, JSON_ARRAY_PACK(?)) > ?` + : `${this.distanceMetric}(${this.vectorColumnName}, JSON_ARRAY_PACK(?)) < ?`; + }; + const whereClauses: string[] = []; + if (filter) { + whereClauses.push(buildWhereClause(filter, [])); + } + if (this.searchConfig.searchStrategy === "FILTER_BY_TEXT") { + whereClauses.push(filterByTextClause()); + } + if (this.searchConfig.searchStrategy === "FILTER_BY_VECTOR") { + whereClauses.push(filterByVectorClause()); + } + const whereClause = + whereClauses.length > 0 ? `WHERE ${whereClauses.join(" AND ")}` : ""; + let queryText = ""; + switch (this.searchConfig.searchStrategy) { + case "TEXT_ONLY": + case "FILTER_BY_VECTOR": + queryText = format( + `SELECT ${this.contentColumnName}, ${this.metadataColumnName}, + MATCH (${this.contentColumnName}) AGAINST (?) as __score + FROM ${this.tableName} ${whereClause} ORDER BY __score DESC LIMIT ?;`, + [query, ...whereArgs, k] + ); + break; + case "VECTOR_ONLY": + case "FILTER_BY_TEXT": + queryText = format( + `SELECT ${this.contentColumnName}, ${this.metadataColumnName}, + ${this.distanceMetric}(${ + this.vectorColumnName + }, JSON_ARRAY_PACK('[?]')) as __score + FROM ${this.tableName} ${whereClause} ORDER BY __score ${ + OrderingDirective[this.distanceMetric] + } LIMIT ?;`, + [vector, ...whereArgs, k] + ); + break; + case "WEIGHTED_SUM": + queryText = format( + `SELECT ${this.contentColumnName}, ${ + this.metadataColumnName + }, __score1 * ? + __score2 * ? as __score + FROM ( + SELECT ${this.idColumnName}, ${this.contentColumnName}, ${ + this.metadataColumnName + }, MATCH (${this.contentColumnName}) AGAINST (?) as __score1 + FROM ${this.tableName} ${whereClause}) r1 FULL OUTER JOIN ( + SELECT ${this.idColumnName}, ${this.distanceMetric}(${ + this.vectorColumnName + }, JSON_ARRAY_PACK('[?]')) as __score2 + FROM ${this.tableName} ${whereClause} ORDER BY __score2 ${ + OrderingDirective[this.distanceMetric] + } LIMIT ? + ) r2 ON r1.${this.idColumnName} = r2.${ + this.idColumnName + } ORDER BY __score ${OrderingDirective[this.distanceMetric]} LIMIT ?`, + [ + this.searchConfig.textWeight, + this.searchConfig.vectorWeight, + query, + ...whereArgs, + vector, + ...whereArgs, + k * (this.searchConfig.vectorselectCountMultiplier ?? 10), + k, + ] + ); + break; + default: + throw new Error("Invalid search strategy."); + } const [rows]: [ ( | RowDataPacket[] @@ -225,19 +446,7 @@ export class SingleStoreVectorStore extends VectorStore { | ResultSetHeader ), FieldPacket[] - ] = await this.connectionPool.query( - format( - `SELECT ${this.contentColumnName}, - ${this.metadataColumnName}, - ${this.distanceMetric}(${ - this.vectorColumnName - }, JSON_ARRAY_PACK('[?]')) as __score FROM ${ - this.tableName - } ${whereClause} - ORDER BY __score ${OrderingDirective[this.distanceMetric]} LIMIT ?;`, - [query, ...whereArgs, k] - ) - ); + ] = await this.connectionPool.query(queryText); const result: [Document, number][] = []; for (const row of rows as RowDataPacket[]) { const rowData = row as unknown as Record; @@ -252,6 +461,75 @@ export class SingleStoreVectorStore extends VectorStore { return result; } + /** + * Performs a similarity search on the texts stored in the SingleStoreDB + * @param query A string representing the query text. + * @param k The number of nearest neighbors to return. By default, it is 4. + * @param filter Optional metadata to filter the texts by. + * @param _callbacks - Callbacks object, not used in this implementation. + * @returns Top matching documents + */ + async similaritySearch( + query: string, + k?: number, + filter?: Metadata, + _callbacks?: Callbacks | undefined + ): Promise[]> { + // @typescript-eslint/no-explicit-any + const queryVector = await this.embeddings.embedQuery(query); + return this.similaritySearchTextAndVectorWithScore( + query, + queryVector, + k ?? 4, + filter + ).then((result) => result.map(([doc]) => doc)); + } + + /** + * Performs a similarity search on the texts stored in the SingleStoreDB + * @param query A string representing the query text. + * @param k The number of nearest neighbors to return. By default, it is 4. + * @param filter Optional metadata to filter the texts by. + * @param _callbacks + * @returns Top matching documents with score + */ + async similaritySearchWithScore( + query: string, + k?: number, + filter?: Metadata, + _callbacks?: Callbacks | undefined + ): Promise<[DocumentInterface, number][]> { + // @typescript-eslint/no-explicit-any + const queryVector = await this.embeddings.embedQuery(query); + return this.similaritySearchTextAndVectorWithScore( + query, + queryVector, + k ?? 4, + filter + ); + } + + /** + * Performs a similarity search on the vectors stored in the SingleStoreDB + * database. + * @param query An array of numbers representing the query vector. + * @param k The number of nearest neighbors to return. + * @param filter Optional metadata to filter the vectors by. + * @returns Top matching vectors with score + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: Metadata + ): Promise<[Document, number][]> { + if (this.searchConfig.searchStrategy !== "VECTOR_ONLY") { + throw new Error( + "similaritySearchVectorWithScore is only available for VECTOR_ONLY search strategy." + ); + } + return this.similaritySearchTextAndVectorWithScore("", query, k, filter); + } + /** * Creates a new instance of the SingleStoreVectorStore class from a list * of texts. diff --git a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts index aacfd5dd71b4..2f6868f51cf1 100644 --- a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts @@ -3,7 +3,50 @@ import { test, expect } from "@jest/globals"; import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; -import { SingleStoreVectorStore } from "../singlestore.js"; +import { SingleStoreVectorStore, SearchStrategy } from "../singlestore.js"; + +class MockEmbeddings extends OpenAIEmbeddings { + queryIndex: number; + + constructor() { + super(); + this.queryIndex = 0; + } + + async embedDocuments(documents: string[]): Promise { + return documents.map((text: string, _) => this.embed(text)); + } + + embed(_: string): number[] { + this.queryIndex += 1; + return [ + Math.cos((this.queryIndex * Math.PI) / 10.0), + Math.sin((this.queryIndex * Math.PI) / 10.0), + ]; + } + + async embedQuery(document: string): Promise { + return this.embed(document); + } +} + +const weatherTexts: string[] = [ + "In the parched desert, a sudden rainstorm brought relief, as the droplets danced upon the thirsty earth, rejuvenating the landscape with the sweet scent of petrichor.", + "Amidst the bustling cityscape, the rain fell relentlessly, creating a symphony of pitter-patter on the pavement, while umbrellas bloomed like colorful flowers in a sea of gray.", + "High in the mountains, the rain transformed into a delicate mist, enveloping the peaks in a mystical veil, where each droplet seemed to whisper secrets to the ancient rocks below.", + "Blanketing the countryside in a soft, pristine layer, the snowfall painted a serene tableau, muffling the world in a tranquil hush as delicate flakes settled upon the branches of trees like nature's own lacework.", + "In the urban landscape, snow descended, transforming bustling streets into a winter wonderland, where the laughter of children echoed amidst the flurry of snowballs and the twinkle of holiday lights.", + "Atop the rugged peaks, snow fell with an unyielding intensity, sculpting the landscape into a pristine alpine paradise, where the frozen crystals shimmered under the moonlight, casting a spell of enchantment over the wilderness below.", +]; + +const weatherMetadata: object[] = [ + { count: "1", category: "rain", group: "a" }, + { count: "2", category: "rain", group: "a" }, + { count: "3", category: "rain", group: "b" }, + { count: "1", category: "snow", group: "b" }, + { count: "2", category: "snow", group: "a" }, + { count: "3", category: "snow", group: "a" }, +]; test.skip("SingleStoreVectorStore", async () => { expect(process.env.SINGLESTORE_HOST).toBeDefined(); @@ -173,3 +216,394 @@ test.skip("SingleStoreVectorStore filtering", async () => { expect(results5).toEqual([]); await vectorStore.end(); }); + +test.skip("SingleStorevectorStore wrong search type", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + [], + [], + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "wrong_serch_type_test", + useVectorIndex: true, + useFullTextIndex: false, + } + ); + for (const searchType of [ + "TEXT_ONLY", + "FILTER_BY_TEXT", + "FILTER_BY_VECTOR", + "WEIGHTED_SUM", + ]) { + await vectorStore.setSearchConfig({ + searchStrategy: searchType as SearchStrategy, + }); + await expect( + vectorStore.similaritySearch("hello world", 1) + ).rejects.toThrow( + "Full text index is required for text-based search strategies." + ); + } + + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore no filter threshold type 1", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + [], + [], + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "no_filter_threshold_type_test", + useVectorIndex: true, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "FILTER_BY_TEXT", + }, + } + ); + await expect( + vectorStore.similaritySearch("hello world", 1, { id: 1 }) + ).rejects.toThrow( + "Filter threshold is required for filter-based search strategies." + ); + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore no filter threshold type 2", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + [], + [], + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "no_filter_threshold_type_test", + useVectorIndex: true, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "FILTER_BY_VECTOR", + }, + } + ); + await expect( + vectorStore.similaritySearch("hello world", 1, { id: 1 }) + ).rejects.toThrow( + "Filter threshold is required for filter-based search strategies." + ); + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore no weight coefs 1", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + [], + [], + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "no_weighted_sum_params", + useVectorIndex: true, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "WEIGHTED_SUM", + vectorWeight: 1, + textWeight: 1, + }, + } + ); + await expect( + vectorStore.similaritySearch("hello world", 1, { id: 1 }) + ).rejects.toThrow( + "Text and vector weight and vector select count multiplier are required for weighted sum search strategy." + ); + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore no weight coefs 2", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + [], + [], + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "no_weighted_sum_params", + useVectorIndex: true, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "WEIGHTED_SUM", + textWeight: 1, + vectorselectCountMultiplier: 10, + }, + } + ); + await expect( + vectorStore.similaritySearch("hello world", 1, { id: 1 }) + ).rejects.toThrow( + "Text and vector weight and vector select count multiplier are required for weighted sum search strategy." + ); + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore no weight coefs 3", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + [], + [], + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "no_weighted_sum_params", + useVectorIndex: true, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "WEIGHTED_SUM", + vectorWeight: 1, + vectorselectCountMultiplier: 10, + }, + } + ); + await expect( + vectorStore.similaritySearch("hello world", 1, { id: 1 }) + ).rejects.toThrow( + "Text and vector weight and vector select count multiplier are required for weighted sum search strategy." + ); + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore text only search", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + weatherTexts, + weatherMetadata, + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "text_only_search", + useVectorIndex: false, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "TEXT_ONLY", + }, + } + ); + const output = await vectorStore.similaritySearch( + "rainstorm in parched desert", + 3, + { count: "1" } + ); + await vectorStore.end(); + expect(output.length).toEqual(2); + expect(output[0].pageContent).toContain( + "In the parched desert, a sudden rainstorm brought relief," + ); + expect(output[1].pageContent).toContain( + "Blanketing the countryside in a soft, pristine layer" + ); +}); + +test.skip("SingleStoreVectorStore filter by text search", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await SingleStoreVectorStore.fromTexts( + weatherTexts, + weatherMetadata, + new MockEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "filter_by_text_search", + useVectorIndex: false, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "FILTER_BY_TEXT", + filterThreshold: 0.0001, + }, + } + ); + const output = await vectorStore.similaritySearch( + "rainstorm in parched desert", + 1 + ); + await vectorStore.end(); + expect(output.length).toEqual(1); + expect(output[0].pageContent).toContain( + "In the parched desert, a sudden rainstorm brought relief," + ); +}); + +test.skip("SingleStoreVectorStore filter by vector search", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await new SingleStoreVectorStore(new MockEmbeddings(), { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "filter_by_vector_search", + useVectorIndex: false, + vectorSize: 2, + useFullTextIndex: true, + searchConfig: { + searchStrategy: "FILTER_BY_VECTOR", + }, + }); + for (let i = 0; i < weatherTexts.length; i += 1) { + await vectorStore.addDocuments([ + new Document({ + pageContent: weatherTexts[i], + metadata: weatherMetadata[i], + }), + ]); + } + await vectorStore.setSearchConfig({ + searchStrategy: "FILTER_BY_VECTOR", + filterThreshold: -0.2, + }); + const output = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1, + { group: "b" } + ); + await vectorStore.end(); + expect(output.length).toEqual(1); + expect(output[0].pageContent).toContain( + "High in the mountains, the rain transformed into a delicate" + ); +}); + +test.skip("SingleStoreVectorStore filter by text search", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await new SingleStoreVectorStore(new MockEmbeddings(), { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "filter_by_text_search", + useVectorIndex: false, + vectorSize: 2, + useFullTextIndex: true, + }); + for (let i = 0; i < weatherTexts.length; i += 1) { + await vectorStore.addDocuments([ + new Document({ + pageContent: weatherTexts[i], + metadata: weatherMetadata[i], + }), + ]); + } + await vectorStore.setSearchConfig({ + searchStrategy: "FILTER_BY_TEXT", + filterThreshold: 0, + }); + const output = await vectorStore.similaritySearch( + "rainstorm in parched desert", + 1 + ); + await vectorStore.end(); + expect(output.length).toEqual(1); + expect(output[0].pageContent).toContain( + "In the parched desert, a sudden rainstorm brought relief" + ); +}); + +test.skip("SingleStoreVectorStore weighted sum search unsupported strategy", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await new SingleStoreVectorStore(new MockEmbeddings(), { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "filter_by_weighted_sum_unsuported", + useVectorIndex: true, + vectorSize: 2, + useFullTextIndex: true, + distanceMetric: "EUCLIDEAN_DISTANCE", + searchConfig: { + searchStrategy: "WEIGHTED_SUM", + textWeight: 1, + vectorWeight: 1, + vectorselectCountMultiplier: 10, + }, + }); + await expect(vectorStore.similaritySearch("some text", 1)).rejects.toThrow( + "Weighted sum search strategy is only available for DOT_PRODUCT distance metric." + ); + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore weighted sum search", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + const vectorStore = await new SingleStoreVectorStore(new MockEmbeddings(), { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "filter_by_weighted_sum", + useVectorIndex: true, + vectorSize: 2, + useFullTextIndex: true, + distanceMetric: "DOT_PRODUCT", + searchConfig: { + searchStrategy: "WEIGHTED_SUM", + textWeight: 1, + vectorWeight: 1, + vectorselectCountMultiplier: 10, + }, + }); + for (let i = 0; i < weatherTexts.length; i += 1) { + await vectorStore.addDocuments([ + new Document({ + pageContent: weatherTexts[i], + metadata: weatherMetadata[i], + }), + ]); + } + const output = await vectorStore.similaritySearch( + "rainstorm in parched desert, rain", + 1, + { category: "snow" } + ); + await vectorStore.end(); + expect(output.length).toEqual(1); + expect(output[0].pageContent).toContain( + "Atop the rugged peaks, snow fell with an unyielding" + ); +});