diff --git a/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx b/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx index 3d1a23eef46c..5aed8ee2c273 100644 --- a/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx @@ -35,12 +35,47 @@ import ExampleLoader from "@examples/indexes/vector_stores/hana_vector/fromDocs. {ExampleLoader} +## Creating an HNSW Vector Index + +A vector index can significantly speed up top-k nearest neighbor queries for vectors. Users can create a Hierarchical Navigable Small World (HNSW) vector index using the `create_hnsw_index` function. + +For more information about creating an index at the database level, such as parameters requirement, please refer to the [official documentation](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/create-vector-index-statement-data-definition). + +import ExampleIndex from "@examples/indexes/vector_stores/hana_vector/createHnswIndex.ts"; + +{ExampleIndex} + ## Basic Vectorstore Operations import ExampleBasic from "@examples/indexes/vector_stores/hana_vector/basics.ts"; {ExampleBasic} +## Advanced filtering + +import { Table, Tr, Th, Td } from "@mdx-js/react"; + +In addition to the basic value-based filtering capabilities, it is possible to use more advanced filtering. The table below shows the available filter operators. + +| Operator | Semantic | +| ---------- | -------------------------------------------------------------------------- | +| `$eq` | Equality (==) | +| `$ne` | Inequality (!=) | +| `$lt` | Less than (<) | +| `$lte` | Less than or equal (<=) | +| `$gt` | Greater than (>) | +| `$gte` | Greater than or equal (>=) | +| `$in` | Contained in a set of given values (in) | +| `$nin` | Not contained in a set of given values (not in) | +| `$between` | Between the range of two boundary values | +| `$like` | Text equality based on the "LIKE" semantics in SQL (using "%" as wildcard) | +| `$and` | Logical "and", supporting 2 or more operands | +| `$or` | Logical "or", supporting 2 or more operands | + +import ExampleAdvancedFilter from "@examples/indexes/vector_stores/hana_vector/advancedFiltering.ts"; + +{ExampleAdvancedFilter} + ## Using a VectorStore as a retriever in chains for retrieval augmented generation (RAG) import ExampleChain from "@examples/indexes/vector_stores/hana_vector/chains.ts"; diff --git a/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts b/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts new file mode 100644 index 000000000000..a3095c29b17e --- /dev/null +++ b/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts @@ -0,0 +1,210 @@ +import { OpenAIEmbeddings } from "@langchain/openai"; +import hanaClient from "hdb"; +import { Document } from "@langchain/core/documents"; +import { + HanaDB, + HanaDBArgs, +} from "@langchain/community/vectorstores/hanavector"; + +const connectionParams = { + host: process.env.HANA_HOST, + port: process.env.HANA_PORT, + user: process.env.HANA_UID, + password: process.env.HANA_PWD, +}; +const client = hanaClient.createClient(connectionParams); + +// Connect to SAP HANA +await new Promise((resolve, reject) => { + client.connect((err: Error) => { + if (err) { + reject(err); + } else { + console.log("Connected to SAP HANA successfully."); + resolve(); + } + }); +}); + +const docs: Document[] = [ + { + pageContent: "First", + metadata: { name: "adam", is_active: true, id: 1, height: 10.0 }, + }, + { + pageContent: "Second", + metadata: { name: "bob", is_active: false, id: 2, height: 5.7 }, + }, + { + pageContent: "Third", + metadata: { name: "jane", is_active: true, id: 3, height: 2.4 }, + }, +]; + +// Initialize embeddings +const embeddings = new OpenAIEmbeddings(); + +const args: HanaDBArgs = { + connection: client, + tableName: "testAdvancedFilters", +}; + +// Create a LangChain VectorStore interface for the HANA database and specify the table (collection) to use in args. +const vectorStore = new HanaDB(embeddings, args); +// need to initialize once an instance is created. +await vectorStore.initialize(); +// Delete already existing documents from the table +await vectorStore.delete({ filter: {} }); +await vectorStore.addDocuments(docs); + +// Helper function to print filter results +function printFilterResult(result: Document[]) { + if (result.length === 0) { + console.log(""); + } else { + result.forEach((doc) => console.log(doc.metadata)); + } +} + +let advancedFilter; + +// Not equal +advancedFilter = { id: { $ne: 1 } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"id":{"$ne":1}} +{ name: 'bob', is_active: false, id: 2, height: 5.7 } +{ name: 'jane', is_active: true, id: 3, height: 2.4 } +*/ + +// Between range +advancedFilter = { id: { $between: [1, 2] } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"id":{"$between":[1,2]}} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ + +// In list +advancedFilter = { name: { $in: ["adam", "bob"] } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"name":{"$in":["adam","bob"]}} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ + +// Not in list +advancedFilter = { name: { $nin: ["adam", "bob"] } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"name":{"$nin":["adam","bob"]}} +{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ + +// Greater than +advancedFilter = { id: { $gt: 1 } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"id":{"$gt":1}} +{ name: 'bob', is_active: false, id: 2, height: 5.7 } +{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ + +// Greater than or equal to +advancedFilter = { id: { $gte: 1 } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"id":{"$gte":1}} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'bob', is_active: false, id: 2, height: 5.7 } +{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ + +// Less than +advancedFilter = { id: { $lt: 1 } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"id":{"$lt":1}} + */ + +// Less than or equal to +advancedFilter = { id: { $lte: 1 } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"id":{"$lte":1}} +{ name: 'adam', is_active: true, id: 1, height: 10 } */ + +// Text filtering with $like +advancedFilter = { name: { $like: "a%" } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"name":{"$like":"a%"}} +{ name: 'adam', is_active: true, id: 1, height: 10 } */ + +advancedFilter = { name: { $like: "%a%" } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"name":{"$like":"%a%"}} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ + +// Combined filtering with $or +advancedFilter = { $or: [{ id: 1 }, { name: "bob" }] }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"$or":[{"id":1},{"name":"bob"}]} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ + +// Combined filtering with $and +advancedFilter = { $and: [{ id: 1 }, { id: 2 }] }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"$and":[{"id":1},{"id":2}]} + */ + +advancedFilter = { $or: [{ id: 1 }, { id: 2 }, { id: 3 }] }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"$or":[{"id":1},{"id":2},{"id":3}]} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'bob', is_active: false, id: 2, height: 5.7 } +{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ + +// You can also define a nested filter with $and and $or. +advancedFilter = { + $and: [{ $or: [{ id: 1 }, { id: 2 }] }, { height: { $gte: 5.0 } }], +}; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"$and":[{"$or":[{"id":1},{"id":2}]},{"height":{"$gte":5.0}}]} +{ name: 'adam', is_active: true, id: 1, height: 10 } +{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ + +// Disconnect from SAP HANA aft er the operations +client.disconnect(); diff --git a/examples/src/indexes/vector_stores/hana_vector/createHnswIndex.ts b/examples/src/indexes/vector_stores/hana_vector/createHnswIndex.ts new file mode 100644 index 000000000000..206ef9a402cf --- /dev/null +++ b/examples/src/indexes/vector_stores/hana_vector/createHnswIndex.ts @@ -0,0 +1,98 @@ +import hanaClient from "hdb"; +import { + HanaDB, + HanaDBArgs, +} from "@langchain/community/vectorstores/hanavector"; +import { OpenAIEmbeddings } from "@langchain/openai"; + +// table "test_fromDocs" is already created with the previous example. +// Now, we will use this existing table to create indexes and perform similarity search. + +const connectionParams = { + host: process.env.HANA_HOST, + port: process.env.HANA_PORT, + user: process.env.HANA_UID, + password: process.env.HANA_PWD, +}; +const client = hanaClient.createClient(connectionParams); + +// Connect to SAP HANA +await new Promise((resolve, reject) => { + client.connect((err: Error) => { + if (err) { + reject(err); + } else { + console.log("Connected to SAP HANA successfully."); + resolve(); + } + }); +}); + +// Initialize embeddings +const embeddings = new OpenAIEmbeddings(); + +// First instance using the existing table "test_fromDocs" (default: Cosine similarity) +const argsCosine: HanaDBArgs = { + connection: client, + tableName: "test_fromDocs", +}; + +// Second instance using the existing table "test_fromDocs" but with L2 Euclidean distance +const argsL2: HanaDBArgs = { + connection: client, + tableName: "test_fromDocs", + distanceStrategy: "euclidean", // Use Euclidean distance for this instance +}; + +// Initialize both HanaDB instances +const vectorStoreCosine = new HanaDB(embeddings, argsCosine); +const vectorStoreL2 = new HanaDB(embeddings, argsL2); + +// Create HNSW index with Cosine similarity (default) +await vectorStoreCosine.createHnswIndex({ + indexName: "hnsw_cosine_index", + efSearch: 400, + m: 50, + efConstruction: 150, +}); + +// Create HNSW index with Euclidean (L2) distance +await vectorStoreL2.createHnswIndex({ + indexName: "hnsw_l2_index", + efSearch: 400, + m: 50, + efConstruction: 150, +}); + +// Query text for similarity search +const query = "What did the president say about Ketanji Brown Jackson"; + +// Perform similarity search using the default Cosine index +const docsCosine = await vectorStoreCosine.similaritySearch(query, 2); +console.log("Cosine Similarity Results:"); +docsCosine.forEach((doc) => { + console.log("-".repeat(80)); + console.log(doc.pageContent); +}); +/* +Cosine Similarity Results: +---------------------------------------------------------------------- +One of the most serious constitutional ... + +And I did that 4 days ago, when I ... +---------------------------------------------------------------------- +As I said last year, especially ... + +While it often appears that we never agree, that isn’t true... +*/ +// Perform similarity search using Euclidean distance (L2 index) +const docsL2 = await vectorStoreL2.similaritySearch(query, 2); +console.log("Euclidean (L2) Distance Results:"); +docsL2.forEach((doc) => { + console.log("-".repeat(80)); + console.log(doc.pageContent); +}); +// The L2 distance results should be the same as cosine search results. + +// Disconnect from SAP HANA after the operations +client.disconnect(); diff --git a/libs/langchain-community/src/vectorstores/hanavector.ts b/libs/langchain-community/src/vectorstores/hanavector.ts index 8f55568adb27..48e40b8ee48c 100644 --- a/libs/langchain-community/src/vectorstores/hanavector.ts +++ b/libs/langchain-community/src/vectorstores/hanavector.ts @@ -8,6 +8,73 @@ import { maximalMarginalRelevance } from "@langchain/core/utils/math"; export type DistanceStrategy = "euclidean" | "cosine"; +const COMPARISONS_TO_SQL: Record = { + $eq: "=", + $ne: "<>", + $lt: "<", + $lte: "<=", + $gt: ">", + $gte: ">=", +}; + +// Base value types that can be used in comparisons +type ComparisonRValue = + | string + | number + | boolean + | Date + | Array; +// Available comparison operators for filtering +type Comparator = + | "$eq" + | "$ne" + | "$lt" + | "$lte" + | "$gt" + | "$gte" + | "$in" + | "$nin" + | "$between" + | "$like"; +// Filter using comparison operators +// Defines the relationship between a comparison operator and its value +type ComparatorFilter = { + [K in Comparator]?: ComparisonRValue; +}; + +type LogicalOperator = "$and" | "$or"; +type LogicalFilter = { + [K in LogicalOperator]?: Filter[]; +}; +type PropertyFilter = { + [property: string]: string | number | boolean | Date | ComparatorFilter; +}; + +type Filter = PropertyFilter | LogicalFilter; + +interface DateValue { + type: "date"; + date: string | Date; +} + +const IN_OPERATORS_TO_SQL: Record = { + $in: "IN", + $nin: "NOT IN", +}; + +const BETWEEN_OPERATOR_TO_SQL: Record = { + $between: "BETWEEN", +}; + +const LIKE_OPERATOR_TO_SQL: Record = { + $like: "LIKE", +}; + +const LOGICAL_OPERATORS_TO_SQL: Record = { + $and: "AND", + $or: "OR", +}; + const HANA_DISTANCE_FUNCTION: Record = { cosine: ["COSINE_SIMILARITY", "DESC"], euclidean: ["L2DISTANCE", "ASC"], @@ -20,10 +87,6 @@ const defaultMetadataColumn = "VEC_META"; const defaultVectorColumn = "VEC_VECTOR"; const defaultVectorColumnLength = -1; // -1 means dynamic length -interface Filter { - [key: string]: boolean | string | number; -} - /** * Interface defining the arguments required to create an instance of * `HanaDB`. @@ -37,6 +100,7 @@ export interface HanaDBArgs { metadataColumn?: string; vectorColumn?: string; vectorColumnLength?: number; + specificMetadataColumns?: string[]; } export class HanaDB extends VectorStore { @@ -60,6 +124,8 @@ export class HanaDB extends VectorStore { declare FilterType: Filter; + private specificMetadataColumns: string[]; + _vectorstoreType(): string { return "hanadb"; } @@ -78,9 +144,12 @@ export class HanaDB extends VectorStore { args.vectorColumn || defaultVectorColumn ); this.vectorColumnLength = HanaDB.sanitizeInt( - args.vectorColumnLength || defaultVectorColumnLength - ); // Using '??' to allow 0 as a valid value - + args.vectorColumnLength || defaultVectorColumnLength, + -1 + ); + this.specificMetadataColumns = HanaDB.sanitizeSpecificMetadataColumns( + args.specificMetadataColumns || [] + ); this.connection = args.connection; } @@ -166,15 +235,16 @@ export class HanaDB extends VectorStore { } /** - * Sanitizes the input to integer. Throws an error if the value is less than -1. + * Sanitizes the input to integer. Throws an error if the value is less than lower bound. * @param inputInt The input to be sanitized. * @returns The sanitized integer. */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - public static sanitizeInt(inputInt: any): number { + public static sanitizeInt(inputInt: number | string, lowerBound = 0): number { const value = parseInt(inputInt.toString(), 10); - if (Number.isNaN(value) || value < -1) { - throw new Error(`Value (${value}) must not be smaller than -1`); + if (Number.isNaN(value) || value < lowerBound) { + throw new Error( + `Value (${value}) must not be smaller than ${lowerBound}` + ); } return value; } @@ -221,6 +291,10 @@ export class HanaDB extends VectorStore { return metadata; } + static sanitizeSpecificMetadataColumns(columns: string[]): string[] { + return columns.map((column) => this.sanitizeName(column)); + } + /** * Parses a string representation of a float array and returns an array of numbers. * @param {string} arrayAsString - The string representation of the array. @@ -287,11 +361,13 @@ export class HanaDB extends VectorStore { `"${this.contentColumn}" NCLOB, ` + `"${this.metadataColumn}" NCLOB, ` + `"${this.vectorColumn}" REAL_VECTOR`; + // Length can either be -1 (QRC01+02-24) or 0 (QRC03-24 onwards) + if (this.vectorColumnLength === -1 || this.vectorColumnLength === 0) { + sqlStr += ");"; + } else { + sqlStr += `(${this.vectorColumnLength}));`; + } - sqlStr += - this.vectorColumnLength === -1 - ? ");" - : `(${this.vectorColumnLength}));`; const client = this.connection; await this.executeQuery(client, sqlStr); } @@ -316,40 +392,257 @@ export class HanaDB extends VectorStore { * @returns A tuple containing the WHERE clause string and an array of query parameters. */ private createWhereByFilter( - filter?: Filter - ): [string, Array] { - const queryTuple: Array = []; + filter?: this["FilterType"] + ): [string, Array] { let whereStr = ""; - if (filter) { - Object.keys(filter).forEach((key, i) => { - whereStr += i === 0 ? " WHERE " : " AND "; - whereStr += ` JSON_VALUE(${this.metadataColumn}, '$.${key}') = ?`; - - const value = filter[key]; - if (typeof value === "number") { - if (Number.isInteger(value)) { - // hdb requires string while sap/hana-client doesn't - queryTuple.push(value.toString()); + let queryTuple: Array = []; + + if (filter && Object.keys(filter).length > 0) { + const [where, params] = this.processFilterObject(filter); + whereStr = ` WHERE ${where}`; + queryTuple = params; + } + + return [whereStr, queryTuple]; + } + + /** + * Processes a filter object to generate SQL WHERE clause components. + * @param filter - A filter object with keys as metadata fields and values as filter values. + * @returns A tuple containing the WHERE clause string and an array of query parameters. + */ + private processFilterObject( + filter: this["FilterType"] + ): [string, Array] { + let whereStr = ""; + const queryTuple: Array = []; + + Object.keys(filter).forEach((key, i) => { + const filterValue = filter[key as keyof Filter] as + | ComparisonRValue + | ComparatorFilter + | Filter[]; + if (i !== 0) { + whereStr += " AND "; + } + + // Handling logical operators ($and, $or) + if (key in LOGICAL_OPERATORS_TO_SQL) { + const logicalOperator = LOGICAL_OPERATORS_TO_SQL[key]; + const logicalOperands = filterValue as Filter[]; + logicalOperands.forEach((operand: Filter, j: number) => { + if (j !== 0) { + whereStr += ` ${logicalOperator} `; + } + const [whereLogical, paramsLogical] = + this.processFilterObject(operand); + whereStr += "(" + whereLogical + ")"; + queryTuple.push(...paramsLogical); + }); + + return; + } + + // Handle special comparison operators and simple types + let operator = "="; + let sqlParam = "?"; + if (typeof filterValue === "number") { + if (Number.isInteger(filterValue)) { + // hdb requires string while sap/hana-client doesn't + queryTuple.push(filterValue.toString()); + } else { + throw new Error( + `Unsupported filter data-type: wrong number type for key ${key}` + ); + } + } else if (typeof filterValue === "string") { + queryTuple.push(filterValue); + } else if (typeof filterValue === "boolean") { + queryTuple.push(filterValue.toString()); + } else if (typeof filterValue === "object" && filterValue !== null) { + // Get the special operator key, like $eq, $ne, $in, $between, etc. + const specialOp = Object.keys(filterValue)[0] as Comparator; + const specialVal = (filterValue as ComparatorFilter)[specialOp]; + // Handling of 'special' operators starting with "$" + if (specialOp in COMPARISONS_TO_SQL) { + operator = COMPARISONS_TO_SQL[specialOp]; + if (specialVal === undefined) { + throw new Error( + `Operator '${specialOp}' expects a non-undefined value.` + ); + } + if (typeof specialVal === "boolean") { + queryTuple.push(specialVal.toString()); + } else if (typeof specialVal === "number") { + sqlParam = "CAST(? as float)"; + queryTuple.push(specialVal); + } else if ( + typeof specialVal === "object" && + specialVal !== null && + "type" in specialVal && + specialVal.type === "date" && + "date" in specialVal + ) { + sqlParam = "CAST(? as DATE)"; + queryTuple.push((specialVal as DateValue).date); + } else { + queryTuple.push(specialVal); + } + } else if (specialOp in BETWEEN_OPERATOR_TO_SQL) { + // ensure the value is an array with exact length of 2 + if (!Array.isArray(specialVal) || specialVal.length !== 2) { + throw new Error(`Operator '${specialOp}' expects two values.`); + } + const [betweenFrom, betweenTo] = specialVal as [ + ComparisonRValue, + ComparisonRValue + ]; + operator = BETWEEN_OPERATOR_TO_SQL[specialOp]; + sqlParam = "? AND ?"; + queryTuple.push(betweenFrom.toString(), betweenTo.toString()); + } else if (specialOp in LIKE_OPERATOR_TO_SQL) { + operator = LIKE_OPERATOR_TO_SQL[specialOp]; + if (specialVal !== undefined) { + queryTuple.push(specialVal.toString()); } else { throw new Error( - `Unsupported filter data-type: wrong number type for key ${key}` + `Operator '${specialOp}' expects a non-undefined value.` ); } - } else if (typeof value === "string") { - queryTuple.push(value); - } else if (typeof value === "boolean") { - queryTuple.push(value.toString()); + } else if (specialOp in IN_OPERATORS_TO_SQL) { + operator = IN_OPERATORS_TO_SQL[specialOp]; + if (Array.isArray(specialVal)) { + const placeholders = Array(specialVal.length).fill("?").join(","); + sqlParam = `(${placeholders})`; + queryTuple.push( + ...specialVal.map((listEntry) => listEntry.toString()) + ); + } else { + throw new Error(`Unsupported value for ${operator}: ${specialVal}`); + } } else { - throw new Error( - `Unsupported filter data-type: ${typeof value} for key ${key}` - ); + throw new Error(`Unsupported operator: ${specialOp}`); } - }); - } + } else { + throw new Error(`Unsupported filter data-type: ${typeof filterValue}`); + } + // Metadata column handling + const selector = this.specificMetadataColumns.includes(key) + ? `"${key}"` + : `JSON_VALUE(${this.metadataColumn}, '$.${key}')`; + whereStr += `${selector} ${operator} ${sqlParam}`; + }); return [whereStr, queryTuple]; } + /** + * Creates an HNSW vector index on a specified table and vector column with + * optional build and search configurations. If no configurations are provided, + * default parameters from the database are used. If provided values exceed the + * valid ranges, an error will be raised. + * The index is always created in ONLINE mode. + * + * @param {object} options Object containing configuration options for the index + * @param {number} [options.m] (Optional) Maximum number of neighbors per graph node (Valid Range: [4, 1000]) + * @param {number} [options.efConstruction] (Optional) Maximal candidates to consider when building the graph + * (Valid Range: [1, 100000]) + * @param {number} [options.efSearch] (Optional) Minimum candidates for top-k-nearest neighbor queries + * (Valid Range: [1, 100000]) + * @param {string} [options.indexName] (Optional) Custom index name. Defaults to __idx + * @returns {Promise} Promise that resolves when index is added. + */ + public async createHnswIndex( + options: { + m?: number; + efConstruction?: number; + efSearch?: number; + indexName?: string; + } = {} + ): Promise { + const { m, efConstruction, efSearch, indexName } = options; + + // Determine the distance function based on the configured strategy + const distanceFuncName = HANA_DISTANCE_FUNCTION[this.distanceStrategy][0]; + const defaultIndexName = `${this.tableName}_${distanceFuncName}_idx`; + + // Use provided indexName or fallback to default + const finalIndexName = HanaDB.sanitizeName(indexName || defaultIndexName); + // Initialize buildConfig and searchConfig objects + const buildConfig: Record = {}; + const searchConfig: Record = {}; + + // Validate and add m parameter to buildConfig if provided + if (m !== undefined) { + const minimumHnswM = 4; + const maximumHnswM = 1000; + const sanitizedM = HanaDB.sanitizeInt(m, minimumHnswM); + if (sanitizedM < minimumHnswM || sanitizedM > maximumHnswM) { + throw new Error("M must be in the range [4, 1000]"); + } + buildConfig.M = sanitizedM; + } + + // Validate and add efConstruction to buildConfig if provided + if (efConstruction !== undefined) { + const minimumEfConstruction = 1; + const maximumEfConstruction = 100000; + const sanitizedEfConstruction = HanaDB.sanitizeInt( + efConstruction, + minimumEfConstruction + ); + if ( + sanitizedEfConstruction < minimumEfConstruction || + sanitizedEfConstruction > maximumEfConstruction + ) { + throw new Error("efConstruction must be in the range [1, 100000]"); + } + buildConfig.efConstruction = sanitizedEfConstruction; + } + + // Validate and add efSearch to searchConfig if provided + if (efSearch !== undefined) { + const minimumEfSearch = 1; + const maximumEfSearch = 100000; + const sanitizedEfSearch = HanaDB.sanitizeInt(efSearch, minimumEfSearch); + if ( + sanitizedEfSearch < minimumEfSearch || + sanitizedEfSearch > maximumEfSearch + ) { + throw new Error("efSearch must be in the range [1, 100000]"); + } + searchConfig.efSearch = sanitizedEfSearch; + } + + // Convert buildConfig and searchConfig to JSON strings if they contain values + const buildConfigStr = Object.keys(buildConfig).length + ? JSON.stringify(buildConfig) + : ""; + const searchConfigStr = Object.keys(searchConfig).length + ? JSON.stringify(searchConfig) + : ""; + + // Create the base SQL string for index creation + let sqlStr = `CREATE HNSW VECTOR INDEX ${finalIndexName} ON "${this.tableName}" ("${this.vectorColumn}") + SIMILARITY FUNCTION ${distanceFuncName} `; + + // Append buildConfig to the SQL string if provided + if (buildConfigStr) { + sqlStr += `BUILD CONFIGURATION '${buildConfigStr}' `; + } + + // Append searchConfig to the SQL string if provided + if (searchConfigStr) { + sqlStr += `SEARCH CONFIGURATION '${searchConfigStr}' `; + } + + // Add the ONLINE option + sqlStr += "ONLINE;"; + + const client = this.connection; + await this.executeQuery(client, sqlStr); + } + /** * Deletes entries from the table based on the provided filter. * @param ids - Optional. Deletion by ids is not supported and will throw an error. @@ -482,7 +775,7 @@ export class HanaDB extends VectorStore { async similaritySearch( query: string, k: number, - filter?: Filter + filter?: this["FilterType"] ): Promise { const results = await this.similaritySearchWithScore(query, k, filter); return results.map((result) => result[0]); @@ -499,7 +792,7 @@ export class HanaDB extends VectorStore { async similaritySearchWithScore( query: string, k: number, - filter?: Filter + filter?: this["FilterType"] ): Promise<[Document, number][]> { const queryEmbedding = await this.embeddings.embedQuery(query); return this.similaritySearchVectorWithScore(queryEmbedding, k, filter); @@ -516,7 +809,7 @@ export class HanaDB extends VectorStore { async similaritySearchVectorWithScore( queryEmbedding: number[], k: number, - filter?: Filter + filter?: this["FilterType"] ): Promise<[Document, number][]> { const wholeResult = await this.similaritySearchWithScoreAndVectorByVector( queryEmbedding, @@ -537,9 +830,8 @@ export class HanaDB extends VectorStore { async similaritySearchWithScoreAndVectorByVector( embedding: number[], k: number, - filter?: Filter + filter?: this["FilterType"] ): Promise> { - // const result: Array<[Document, number, number[]]> = []; // Sanitize inputs const sanitizedK = HanaDB.sanitizeInt(k); const sanitizedEmbedding = HanaDB.sanitizeListFloat(embedding); @@ -600,7 +892,6 @@ export class HanaDB extends VectorStore { options: MaxMarginalRelevanceSearchOptions ): Promise { const { k, fetchK = 20, lambda = 0.5 } = options; - // console.log(options) const queryEmbedding = await this.embeddings.embedQuery(query); const docs = await this.similaritySearchWithScoreAndVectorByVector( diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts new file mode 100644 index 000000000000..9634adaa05b4 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts @@ -0,0 +1,142 @@ +import { Document } from "@langchain/core/documents"; + +interface Metadata { + name: string; + date: string; + count: number; + is_active: boolean; + tags: string[]; + location: number[]; + id: number; + height: number | null; + happiness: number | null; + sadness?: number; +} + +const metadatas: Metadata[] = [ + { + name: "adam", + date: "2021-01-01", + count: 1, + is_active: true, + tags: ["a", "b"], + location: [1.0, 2.0], + id: 1, + height: 10.0, + happiness: 0.9, + sadness: 0.1, + }, + { + name: "bob", + date: "2021-01-02", + count: 2, + is_active: false, + tags: ["b", "c"], + location: [2.0, 3.0], + id: 2, + height: 5.7, + happiness: 0.8, + sadness: 0.1, + }, + { + name: "jane", + date: "2021-01-01", + count: 3, + is_active: true, + tags: ["b", "d"], + location: [3.0, 4.0], + id: 3, + height: 2.4, + happiness: null, + }, +]; + +const texts: string[] = metadatas.map((metadata) => `id ${metadata.id} `); + +export const DOCUMENTS: Document[] = texts.map( + (text, index) => + new Document({ pageContent: text, metadata: metadatas[index] }) +); + +interface TestCase { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + filter: Record; + expected: number[]; +} + +export const TYPE_1_FILTERING_TEST_CASES: TestCase[] = [ + { filter: { id: 1 }, expected: [1] }, + { filter: { name: "adam" }, expected: [1] }, + { filter: { is_active: true }, expected: [1, 3] }, + { filter: { is_active: false }, expected: [2] }, + { filter: { id: 1, is_active: true }, expected: [1] }, + { filter: { id: 1, is_active: false }, expected: [] }, +]; + +export const TYPE_2_FILTERING_TEST_CASES: TestCase[] = [ + { filter: { id: 1 }, expected: [1] }, + { filter: { id: { $ne: 1 } }, expected: [2, 3] }, + { filter: { id: { $gt: 1 } }, expected: [2, 3] }, + { filter: { id: { $gte: 1 } }, expected: [1, 2, 3] }, + { filter: { id: { $lt: 1 } }, expected: [] }, + { filter: { id: { $lte: 1 } }, expected: [1] }, + { filter: { name: "adam" }, expected: [1] }, + { filter: { name: "bob" }, expected: [2] }, + { filter: { name: { $eq: "adam" } }, expected: [1] }, + { filter: { name: { $ne: "adam" } }, expected: [2, 3] }, + { filter: { name: { $gt: "jane" } }, expected: [] }, + { filter: { name: { $gte: "jane" } }, expected: [3] }, + { filter: { name: { $lt: "jane" } }, expected: [1, 2] }, + { filter: { name: { $lte: "jane" } }, expected: [1, 2, 3] }, + { filter: { is_active: { $eq: true } }, expected: [1, 3] }, + { filter: { is_active: { $ne: true } }, expected: [2] }, + { filter: { height: { $gt: 5.0 } }, expected: [1, 2] }, + { filter: { height: { $gte: 5.0 } }, expected: [1, 2] }, + { filter: { height: { $lt: 5.0 } }, expected: [3] }, + { filter: { height: { $lte: 5.8 } }, expected: [2, 3] }, + // New date-related test cases + { + filter: { date: { $eq: { type: "date", date: "2021-01-01" } } }, + expected: [1, 3], + }, + { filter: { date: { $ne: "2021-01-01" } }, expected: [2] }, + { filter: { date: { $gt: "2021-01-01" } }, expected: [2] }, + { filter: { date: { $gte: "2021-01-01" } }, expected: [1, 2, 3] }, + { filter: { date: { $lt: "2021-01-02" } }, expected: [1, 3] }, + { filter: { date: { $lte: "2021-01-02" } }, expected: [1, 2, 3] }, +]; + +export const TYPE_3_FILTERING_TEST_CASES: TestCase[] = [ + { filter: { $or: [{ id: 1 }, { id: 2 }] }, expected: [1, 2] }, + { filter: { $or: [{ id: 1 }, { name: "bob" }] }, expected: [1, 2] }, + { filter: { $and: [{ id: 1 }, { id: 2 }] }, expected: [] }, + { filter: { $or: [{ id: 1 }, { id: 2 }, { id: 3 }] }, expected: [1, 2, 3] }, +]; + +export const TYPE_4_FILTERING_TEST_CASES: TestCase[] = [ + { filter: { id: { $between: [1, 2] } }, expected: [1, 2] }, + { filter: { id: { $between: [1, 1] } }, expected: [1] }, + { filter: { name: { $in: ["adam", "bob"] } }, expected: [1, 2] }, + { filter: { name: { $nin: ["adam", "bob"] } }, expected: [3] }, +]; + +export const TYPE_5_FILTERING_TEST_CASES: TestCase[] = [ + { filter: { name: { $like: "a%" } }, expected: [1] }, + { filter: { name: { $like: "%a%" } }, expected: [1, 3] }, +]; + +export const TYPE_6_FILTERING_TEST_CASES: TestCase[] = [ + { + filter: { + $and: [ + { + $or: [{ id: { $eq: 1 } }, { id: { $in: [2, 3] } }], + }, + { height: { $gte: 5.0 } }, + ], + }, + expected: [1, 2], + }, + { filter: { id: 3, height: { $gte: 5.0 } }, expected: [] }, + { filter: { $and: [{ id: 1 }, { height: { $gte: 5.0 } }] }, expected: [1] }, +]; diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts index 42a1fa945576..dc8b4a534e81 100644 --- a/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts @@ -5,7 +5,15 @@ import { Document } from "@langchain/core/documents"; import { FakeEmbeddings } from "@langchain/core/utils/testing"; import { test, expect } from "@jest/globals"; import { HanaDB, HanaDBArgs } from "../hanavector.js"; - +import { + DOCUMENTS, + TYPE_1_FILTERING_TEST_CASES, + TYPE_2_FILTERING_TEST_CASES, + TYPE_3_FILTERING_TEST_CASES, + TYPE_4_FILTERING_TEST_CASES, + TYPE_5_FILTERING_TEST_CASES, + TYPE_6_FILTERING_TEST_CASES, +} from "./hanavector.fixtures.js"; // Connection parameters const connectionParams = { host: process.env.HANA_HOST, @@ -269,12 +277,10 @@ describe("add documents and similarity search tests", () => { }, }, ]); - const results: Document[] = await vectorStore.similaritySearch( "Sandwiches taste good.", 1 ); - // console.log(results); expect(results.length).toEqual(1); expect(results).toMatchObject([ { @@ -868,3 +874,458 @@ describe("Tests on HANA side", () => { expect(exceptionOccurred).toBe(true); }); }); + +describe("HNSW Index Creation Tests", () => { + test("test HNSW index creation with default values", async () => { + /** + * Description: + * This test verifies that the HNSW index can be successfully created with default values + * when no parameters are passed to the createHnswIndex function. + */ + const tableNameTest = "TEST_TABLE_HNSW_DEFAULT"; + const args = { + connection: client, + tableName: tableNameTest, + }; + + // Cleanup: Drop table if exists + await dropTable(client, tableNameTest); + + // Create HanaDB instance and add data + const vector = await HanaDB.fromTexts( + ["foo", "bar", "baz"], + {}, + embeddings, + args + ); + + let exceptionOccurred = false; + try { + // Call the createHnswIndex function with no parameters (default values) + await vector.createHnswIndex(); + } catch (error) { + console.log(error); + exceptionOccurred = true; + } + + // Assert that no exception occurred + expect(exceptionOccurred).toBe(false); + }); + + test("test HNSW index creation with specific values", async () => { + /** + * Description: + * This test verifies that the HNSW index can be created with specific values for m, efConstruction, + * efSearch, and a custom indexName. + */ + const tableNameTest = "TEST_TABLE_HNSW_DEFINED"; + const args = { + connection: client, + tableName: tableNameTest, + }; + + // Cleanup: Drop table if exists + await dropTable(client, tableNameTest); + + // Create HanaDB instance and add data + const vector = await HanaDB.fromTexts( + ["foo", "bar", "baz"], + {}, + embeddings, + args + ); + + let exceptionOccurred = false; + try { + // Call the createHnswIndex function with specific values + await vector.createHnswIndex({ + m: 50, + efConstruction: 150, + efSearch: 300, + indexName: "custom_index", + }); + } catch (error) { + console.log(error); + exceptionOccurred = true; + } + + // Assert that no exception occurred + expect(exceptionOccurred).toBe(false); + }); + + test("test HNSW index creation after initialization", async () => { + const tableNameTest = "TEST_TABLE_HNSW_INDEX_AFTER_INIT"; + + // Clean up: drop the table if it exists + await dropTable(client, tableNameTest); + const args = { + connection: client, + tableName: tableNameTest, + }; + // Initialize HanaDB without adding documents yet + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + // Create HNSW index before adding any documents + await vectorDB.createHnswIndex({ + indexName: "index_pre_add", + efSearch: 400, + m: 50, + efConstruction: 150, + }); + + // Add texts after index creation + await vectorDB.addDocuments([ + { + pageContent: "Bye bye", + metadata: { id: 2, name: "2" }, + }, + { + pageContent: "Hello world", + metadata: { id: 1, name: "1" }, + }, + { + pageContent: "hello nice world", + metadata: { id: 3, name: "3" }, + }, + ]); + + const results = await vectorDB.similaritySearch("Hello world", 1); + expect(results).toHaveLength(1); + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 1, name: "1" }, + }), + ]); + }); + + test("test duplicate HNSW index creation", async () => { + const tableNameTest = "TEST_TABLE_HNSW_DUPLICATE_INDEX"; + const args = { + connection: client, + tableName: tableNameTest, + }; + // Clean up: drop the table if it exists + await dropTable(client, tableNameTest); + + // Create HanaDB instance and add data + const vectorDB = await HanaDB.fromTexts( + ["foo", "bar", "baz"], + {}, + embeddings, + args + ); + + // Create HNSW index for the first time + await vectorDB.createHnswIndex({ + indexName: "index_cosine", + efSearch: 300, + m: 80, + efConstruction: 100, + }); + + // Trying to create the same index again should raise an exception + await expect( + vectorDB.createHnswIndex({ + efSearch: 300, + m: 80, + efConstruction: 100, + }) + ).rejects.toThrow(); + }); + + test("test HNSW index creation with invalid m value", async () => { + /** + * Description: + * This test ensures that the HNSW index creation throws an error when an invalid value for m is passed + * (e.g., m < 4 or m > 1000). + */ + const tableNameTest = "TEST_TABLE_HNSW_INVALID_M"; + const args = { + connection: client, + tableName: tableNameTest, + }; + + // Cleanup: Drop table if exists + await dropTable(client, tableNameTest); + + // Create HanaDB instance and add data + const vector = await HanaDB.fromTexts( + ["foo", "bar", "baz"], + {}, + embeddings, + args + ); + + let exceptionOccurred = false; + try { + // Call the createHnswIndex function with invalid m value + await vector.createHnswIndex({ + m: 2, // Invalid value for m (should be >= 4) + }); + } catch (error) { + exceptionOccurred = true; + } + + // Assert that exception occurred + expect(exceptionOccurred).toBe(true); + }); + + test("test HNSW index creation with invalid efConstruction value", async () => { + /** + * Description: + * This test ensures that the HNSW index creation throws an error when an invalid efConstruction value is passed + * (e.g., efConstruction > 100000). + */ + const tableNameTest = "TEST_TABLE_HNSW_INVALID_EF_CONSTRUCTION"; + const args = { + connection: client, + tableName: tableNameTest, + }; + + // Cleanup: Drop table if exists + await dropTable(client, tableNameTest); + + // Create HanaDB instance and add data + const vector = await HanaDB.fromTexts( + ["foo", "bar", "baz"], + {}, + embeddings, + args + ); + + let exceptionOccurred = false; + try { + // Call the createHnswIndex function with invalid efConstruction value + await vector.createHnswIndex({ + efConstruction: 100001, // Invalid value for efConstruction (should be <= 100000) + }); + } catch (error) { + exceptionOccurred = true; + } + + // Assert that exception occurred + expect(exceptionOccurred).toBe(true); + }); + + test("test HNSW index creation with invalid efSearch value", async () => { + /** + * Description: + * This test ensures that the HNSW index creation throws an error when an invalid efSearch value is passed + * (e.g., efSearch < 1 or efSearch > 100000). + */ + const tableNameTest = "TEST_TABLE_HNSW_INVALID_EF_SEARCH"; + const args = { + connection: client, + tableName: tableNameTest, + }; + + // Cleanup: Drop table if exists + await dropTable(client, tableNameTest); + + // Create HanaDB instance and add data + const vector = await HanaDB.fromTexts( + ["foo", "bar", "baz"], + {}, + embeddings, + args + ); + + let exceptionOccurred = false; + try { + // Call the createHnswIndex function with invalid efSearch value + await vector.createHnswIndex({ + efSearch: 0, // Invalid value for efSearch (should be >= 1) + }); + } catch (error) { + exceptionOccurred = true; + } + + // Assert that exception occurred + expect(exceptionOccurred).toBe(true); + }); +}); + +describe("Filter Tests", () => { + // Filter Test 1: Applying various filters from TYPE_1_FILTERING_TEST_CASES + it.each(TYPE_1_FILTERING_TEST_CASES)( + "should apply type 1 filtering correctly with filter %j", + async (testCase) => { + const { filter, expected } = testCase; + const tableNameTest = "TEST_TABLE_ENHANCED_FILTER_1"; + const args = { + connection: client, + tableName: tableNameTest, + }; + await dropTable(client, tableNameTest); + + // Initialize the HanaDB instance + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + + // Add documents to the database + await vectorDB.addDocuments(DOCUMENTS); + + // Perform a similarity search with the filter + const docs = await vectorDB.similaritySearch("Foo", 5, filter); + const ids = docs.map((doc) => doc.metadata.id); + + // Check if the returned document IDs match the expected IDs + expect(ids.length).toBe(expected.length); + expect(ids.every((id) => expected.includes(id))).toBe(true); + } + ); + + // Filter Test 2: Testing TYPE_2_FILTERING_TEST_CASES + it.each(TYPE_2_FILTERING_TEST_CASES)( + "should apply type 2 filtering correctly with filter %j", + async (testCase) => { + const { filter, expected } = testCase; + const tableNameTest = "TEST_TABLE_ENHANCED_FILTER_2"; + const args = { + connection: client, + tableName: tableNameTest, + }; + await dropTable(client, tableNameTest); + + // Initialize the HanaDB instance + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + + // Add documents to the database + await vectorDB.addDocuments(DOCUMENTS); + + // Perform a similarity search with the filter + const docs = await vectorDB.similaritySearch("Foo", 5, filter); + const ids = docs.map((doc) => doc.metadata.id); + + // Check if the returned document IDs match the expected IDs + expect(ids.length).toBe(expected.length); + expect(ids.every((id) => expected.includes(id))).toBe(true); + } + ); + + // Filter Test 3: Testing TYPE_3_FILTERING_TEST_CASES + it.each(TYPE_3_FILTERING_TEST_CASES)( + "should apply type 3 filtering correctly with filter %j", + async (testCase) => { + const { filter, expected } = testCase; + const tableNameTest = "TEST_TABLE_ENHANCED_FILTER_3"; + const args = { + connection: client, + tableName: tableNameTest, + }; + await dropTable(client, tableNameTest); + + // Initialize the HanaDB instance + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + + // Add documents to the database + await vectorDB.addDocuments(DOCUMENTS); + + // Perform a similarity search with the filter + const docs = await vectorDB.similaritySearch("Foo", 5, filter); + const ids = docs.map((doc) => doc.metadata.id); + + // Check if the returned document IDs match the expected IDs + expect(ids.length).toBe(expected.length); + expect(ids.every((id) => expected.includes(id))).toBe(true); + } + ); + + // Filter Test 4: Testing TYPE_4_FILTERING_TEST_CASES + it.each(TYPE_4_FILTERING_TEST_CASES)( + "should apply type 4 filtering correctly with filter %j", + async (testCase) => { + const { filter, expected } = testCase; + const tableNameTest = "TEST_TABLE_ENHANCED_FILTER_4"; + const args = { + connection: client, + tableName: tableNameTest, + }; + await dropTable(client, tableNameTest); + + // Initialize the HanaDB instance + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + + // Add documents to the database + await vectorDB.addDocuments(DOCUMENTS); + + // Perform a similarity search with the filter + const docs = await vectorDB.similaritySearch("Foo", 5, filter); + const ids = docs.map((doc) => doc.metadata.id); + + // Check if the returned document IDs match the expected IDs + expect(ids.length).toBe(expected.length); + expect(ids.every((id) => expected.includes(id))).toBe(true); + } + ); + + // Filter Test 5: Testing TYPE_4_FILTERING_TEST_CASES + it.each(TYPE_5_FILTERING_TEST_CASES)( + "should apply type 5 filtering correctly with filter %j", + async (testCase) => { + const { filter, expected } = testCase; + const tableNameTest = "TEST_TABLE_ENHANCED_FILTER_5"; + const args = { + connection: client, + tableName: tableNameTest, + }; + await dropTable(client, tableNameTest); + + // Initialize the HanaDB instance + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + + // Add documents to the database + await vectorDB.addDocuments(DOCUMENTS); + + // Perform a similarity search with the filter + const docs = await vectorDB.similaritySearch("Foo", 5, filter); + const ids = docs.map((doc) => doc.metadata.id); + + // Check if the returned document IDs match the expected IDs + expect(ids.length).toBe(expected.length); + expect(ids.every((id) => expected.includes(id))).toBe(true); + } + ); + + // Filter Test 6: Testing TYPE_6_FILTERING_TEST_CASES + it.each(TYPE_6_FILTERING_TEST_CASES)( + "should apply type 6 filtering correctly with filter %j", + async (testCase) => { + const { filter, expected } = testCase; + const tableNameTest = "TEST_TABLE_ENHANCED_FILTER_6"; + const args = { + connection: client, + tableName: tableNameTest, + }; + await dropTable(client, tableNameTest); + + // Initialize the HanaDB instance + const vectorDB = new HanaDB(embeddings, args); + await vectorDB.initialize(); + expect(vectorDB).toBeDefined(); + + // Add documents to the database + await vectorDB.addDocuments(DOCUMENTS); + + // Perform a similarity search with the filter + const docs = await vectorDB.similaritySearch("Foo", 5, filter); + console.log(docs); + const ids = docs.map((doc) => doc.metadata.id); + + // Check if the returned document IDs match the expected IDs + expect(ids.length).toBe(expected.length); + expect(ids.every((id) => expected.includes(id))).toBe(true); + } + ); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.test.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.test.ts index 67221de543b3..9fda0b6e5434 100644 --- a/libs/langchain-community/src/vectorstores/tests/hanavector.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/hanavector.test.ts @@ -7,7 +7,7 @@ describe("Sanity check tests", () => { HanaDB.sanitizeInt("HUGO"); // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (error: any) { - expect(error.message).toContain("must not be smaller than -1"); + expect(error.message).toContain("must not be smaller than 0"); } }); @@ -17,13 +17,13 @@ describe("Sanity check tests", () => { }); it("should sanitize int with negative values", () => { - expect(HanaDB.sanitizeInt(-1)).toBe(-1); - expect(HanaDB.sanitizeInt("-1")).toBe(-1); + expect(HanaDB.sanitizeInt(-1, -1)).toBe(-1); + expect(HanaDB.sanitizeInt("-1", -1)).toBe(-1); }); it("should sanitize int with illegal negative value", () => { try { - HanaDB.sanitizeInt(-2); + HanaDB.sanitizeInt(-2, -1); // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (error: any) { expect(error.message).toContain("must not be smaller than -1");