-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(community): Add support for SAP HANA Vector hnsw index creation …
…and advanced filtering (#7238)
- Loading branch information
Showing
7 changed files
with
1,289 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
210 changes: 210 additions & 0 deletions
210
examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
import { OpenAIEmbeddings } from "@langchain/openai"; | ||
import hanaClient from "hdb"; | ||
import { Document } from "@langchain/core/documents"; | ||
import { | ||
HanaDB, | ||
HanaDBArgs, | ||
} from "@langchain/community/vectorstores/hanavector"; | ||
|
||
const connectionParams = { | ||
host: process.env.HANA_HOST, | ||
port: process.env.HANA_PORT, | ||
user: process.env.HANA_UID, | ||
password: process.env.HANA_PWD, | ||
}; | ||
const client = hanaClient.createClient(connectionParams); | ||
|
||
// Connect to SAP HANA | ||
await new Promise<void>((resolve, reject) => { | ||
client.connect((err: Error) => { | ||
if (err) { | ||
reject(err); | ||
} else { | ||
console.log("Connected to SAP HANA successfully."); | ||
resolve(); | ||
} | ||
}); | ||
}); | ||
|
||
const docs: Document[] = [ | ||
{ | ||
pageContent: "First", | ||
metadata: { name: "adam", is_active: true, id: 1, height: 10.0 }, | ||
}, | ||
{ | ||
pageContent: "Second", | ||
metadata: { name: "bob", is_active: false, id: 2, height: 5.7 }, | ||
}, | ||
{ | ||
pageContent: "Third", | ||
metadata: { name: "jane", is_active: true, id: 3, height: 2.4 }, | ||
}, | ||
]; | ||
|
||
// Initialize embeddings | ||
const embeddings = new OpenAIEmbeddings(); | ||
|
||
const args: HanaDBArgs = { | ||
connection: client, | ||
tableName: "testAdvancedFilters", | ||
}; | ||
|
||
// Create a LangChain VectorStore interface for the HANA database and specify the table (collection) to use in args. | ||
const vectorStore = new HanaDB(embeddings, args); | ||
// need to initialize once an instance is created. | ||
await vectorStore.initialize(); | ||
// Delete already existing documents from the table | ||
await vectorStore.delete({ filter: {} }); | ||
await vectorStore.addDocuments(docs); | ||
|
||
// Helper function to print filter results | ||
function printFilterResult(result: Document[]) { | ||
if (result.length === 0) { | ||
console.log("<empty result>"); | ||
} else { | ||
result.forEach((doc) => console.log(doc.metadata)); | ||
} | ||
} | ||
|
||
let advancedFilter; | ||
|
||
// Not equal | ||
advancedFilter = { id: { $ne: 1 } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"id":{"$ne":1}} | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } | ||
{ name: 'jane', is_active: true, id: 3, height: 2.4 } | ||
*/ | ||
|
||
// Between range | ||
advancedFilter = { id: { $between: [1, 2] } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"id":{"$between":[1,2]}} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ | ||
|
||
// In list | ||
advancedFilter = { name: { $in: ["adam", "bob"] } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"name":{"$in":["adam","bob"]}} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ | ||
|
||
// Not in list | ||
advancedFilter = { name: { $nin: ["adam", "bob"] } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"name":{"$nin":["adam","bob"]}} | ||
{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ | ||
|
||
// Greater than | ||
advancedFilter = { id: { $gt: 1 } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"id":{"$gt":1}} | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } | ||
{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ | ||
|
||
// Greater than or equal to | ||
advancedFilter = { id: { $gte: 1 } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"id":{"$gte":1}} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } | ||
{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ | ||
|
||
// Less than | ||
advancedFilter = { id: { $lt: 1 } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"id":{"$lt":1}} | ||
<empty result> */ | ||
|
||
// Less than or equal to | ||
advancedFilter = { id: { $lte: 1 } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"id":{"$lte":1}} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } */ | ||
|
||
// Text filtering with $like | ||
advancedFilter = { name: { $like: "a%" } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"name":{"$like":"a%"}} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } */ | ||
|
||
advancedFilter = { name: { $like: "%a%" } }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"name":{"$like":"%a%"}} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ | ||
|
||
// Combined filtering with $or | ||
advancedFilter = { $or: [{ id: 1 }, { name: "bob" }] }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"$or":[{"id":1},{"name":"bob"}]} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ | ||
|
||
// Combined filtering with $and | ||
advancedFilter = { $and: [{ id: 1 }, { id: 2 }] }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"$and":[{"id":1},{"id":2}]} | ||
<empty result> */ | ||
|
||
advancedFilter = { $or: [{ id: 1 }, { id: 2 }, { id: 3 }] }; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"$or":[{"id":1},{"id":2},{"id":3}]} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } | ||
{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ | ||
|
||
// You can also define a nested filter with $and and $or. | ||
advancedFilter = { | ||
$and: [{ $or: [{ id: 1 }, { id: 2 }] }, { height: { $gte: 5.0 } }], | ||
}; | ||
console.log(`Filter: ${JSON.stringify(advancedFilter)}`); | ||
printFilterResult( | ||
await vectorStore.similaritySearch("just testing", 5, advancedFilter) | ||
); | ||
/* Filter: {"$and":[{"$or":[{"id":1},{"id":2}]},{"height":{"$gte":5.0}}]} | ||
{ name: 'adam', is_active: true, id: 1, height: 10 } | ||
{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ | ||
|
||
// Disconnect from SAP HANA aft er the operations | ||
client.disconnect(); |
98 changes: 98 additions & 0 deletions
98
examples/src/indexes/vector_stores/hana_vector/createHnswIndex.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import hanaClient from "hdb"; | ||
import { | ||
HanaDB, | ||
HanaDBArgs, | ||
} from "@langchain/community/vectorstores/hanavector"; | ||
import { OpenAIEmbeddings } from "@langchain/openai"; | ||
|
||
// table "test_fromDocs" is already created with the previous example. | ||
// Now, we will use this existing table to create indexes and perform similarity search. | ||
|
||
const connectionParams = { | ||
host: process.env.HANA_HOST, | ||
port: process.env.HANA_PORT, | ||
user: process.env.HANA_UID, | ||
password: process.env.HANA_PWD, | ||
}; | ||
const client = hanaClient.createClient(connectionParams); | ||
|
||
// Connect to SAP HANA | ||
await new Promise<void>((resolve, reject) => { | ||
client.connect((err: Error) => { | ||
if (err) { | ||
reject(err); | ||
} else { | ||
console.log("Connected to SAP HANA successfully."); | ||
resolve(); | ||
} | ||
}); | ||
}); | ||
|
||
// Initialize embeddings | ||
const embeddings = new OpenAIEmbeddings(); | ||
|
||
// First instance using the existing table "test_fromDocs" (default: Cosine similarity) | ||
const argsCosine: HanaDBArgs = { | ||
connection: client, | ||
tableName: "test_fromDocs", | ||
}; | ||
|
||
// Second instance using the existing table "test_fromDocs" but with L2 Euclidean distance | ||
const argsL2: HanaDBArgs = { | ||
connection: client, | ||
tableName: "test_fromDocs", | ||
distanceStrategy: "euclidean", // Use Euclidean distance for this instance | ||
}; | ||
|
||
// Initialize both HanaDB instances | ||
const vectorStoreCosine = new HanaDB(embeddings, argsCosine); | ||
const vectorStoreL2 = new HanaDB(embeddings, argsL2); | ||
|
||
// Create HNSW index with Cosine similarity (default) | ||
await vectorStoreCosine.createHnswIndex({ | ||
indexName: "hnsw_cosine_index", | ||
efSearch: 400, | ||
m: 50, | ||
efConstruction: 150, | ||
}); | ||
|
||
// Create HNSW index with Euclidean (L2) distance | ||
await vectorStoreL2.createHnswIndex({ | ||
indexName: "hnsw_l2_index", | ||
efSearch: 400, | ||
m: 50, | ||
efConstruction: 150, | ||
}); | ||
|
||
// Query text for similarity search | ||
const query = "What did the president say about Ketanji Brown Jackson"; | ||
|
||
// Perform similarity search using the default Cosine index | ||
const docsCosine = await vectorStoreCosine.similaritySearch(query, 2); | ||
console.log("Cosine Similarity Results:"); | ||
docsCosine.forEach((doc) => { | ||
console.log("-".repeat(80)); | ||
console.log(doc.pageContent); | ||
}); | ||
/* | ||
Cosine Similarity Results: | ||
---------------------------------------------------------------------- | ||
One of the most serious constitutional ... | ||
And I did that 4 days ago, when I ... | ||
---------------------------------------------------------------------- | ||
As I said last year, especially ... | ||
While it often appears that we never agree, that isn’t true... | ||
*/ | ||
// Perform similarity search using Euclidean distance (L2 index) | ||
const docsL2 = await vectorStoreL2.similaritySearch(query, 2); | ||
console.log("Euclidean (L2) Distance Results:"); | ||
docsL2.forEach((doc) => { | ||
console.log("-".repeat(80)); | ||
console.log(doc.pageContent); | ||
}); | ||
// The L2 distance results should be the same as cosine search results. | ||
|
||
// Disconnect from SAP HANA after the operations | ||
client.disconnect(); |
Oops, something went wrong.