diff --git a/docs/core_docs/docs/integrations/vectorstores/pgvector.mdx b/docs/core_docs/docs/integrations/vectorstores/pgvector.mdx
index 16ba9c29045b..64e578acff98 100644
--- a/docs/core_docs/docs/integrations/vectorstores/pgvector.mdx
+++ b/docs/core_docs/docs/integrations/vectorstores/pgvector.mdx
@@ -74,3 +74,24 @@ before using the constructor.
import ConnectionReuseExample from "@examples/indexes/vector_stores/pgvector_vectorstore/pgvector_pool.ts";
{ConnectionReuseExample}
+
+### Create HNSW Index
+
+By default, the extension performs a sequential scan search, with 100% recall. You might consider creating an HNSW index for approximate nearest neighbor (ANN) search to speed up similaritySearchVectorWithScore execution time. To create the HNSW index on your vector column, use the `createHnswIndex()` method:
+
+The method parameters include:
+
+**dimensions**: Defines the number of dimensions in your vector data type, up to 2000. For example, use 1536 for OpenAI's `text-embedding-ada-002` and Amazon's `amazon.titan-embed-text-v1` models.
+
+**m?**: The max number of connections per layer (16 by default). Index build time improves with smaller values, while higher values can speed up search queries.
+
+**efConstruction?**: The size of the dynamic candidate list for constructing the graph (64 by default). A higher value can potentially improve the index quality at the cost of index build time.
+
+**distanceFunction?**: The distance function name you want to use, is automatically selected based on the distanceStrategy.
+
+More info at the [`Pgvector GitHub project`](https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw) and the HNSW paper from Malkov Yu A. and Yashunin D. A.. 2020. [`Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs`](https://arxiv.org/pdf/1603.09320)
+
+import HnswExample from "@examples/indexes/vector_stores/pgvector_vectorstore/pgvector_hnsw.ts";
+
+{HnswExample}
+
diff --git a/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector_hnsw.ts b/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector_hnsw.ts
new file mode 100644
index 000000000000..d3c66f3fdd9d
--- /dev/null
+++ b/examples/src/indexes/vector_stores/pgvector_vectorstore/pgvector_hnsw.ts
@@ -0,0 +1,54 @@
+import { OpenAIEmbeddings } from "@langchain/openai";
+import {
+ DistanceStrategy,
+ PGVectorStore,
+} from "@langchain/community/vectorstores/pgvector";
+import { PoolConfig } from "pg";
+
+// First, follow set-up instructions at
+// https://js.langchain.com/docs/modules/indexes/vector_stores/integrations/pgvector
+
+const config = {
+ postgresConnectionOptions: {
+ type: "postgres",
+ host: "127.0.0.1",
+ port: 5433,
+ user: "myuser",
+ password: "ChangeMe",
+ database: "api",
+ } as PoolConfig,
+ tableName: "testlangchain",
+ columns: {
+ idColumnName: "id",
+ vectorColumnName: "vector",
+ contentColumnName: "content",
+ metadataColumnName: "metadata",
+ },
+ // supported distance strategies: cosine (default), innerProduct, or euclidean
+ distanceStrategy: "cosine" as DistanceStrategy,
+};
+
+const pgvectorStore = await PGVectorStore.initialize(
+ new OpenAIEmbeddings(),
+ config
+);
+
+// create the index
+await pgvectorStore.createHnswIndex({
+ dimensions: 1536,
+ efConstruction: 64,
+ m: 16,
+});
+
+await pgvectorStore.addDocuments([
+ { pageContent: "what's this", metadata: { a: 2, b: ["tag1", "tag2"] } },
+ { pageContent: "Cat drinks milk", metadata: { a: 1, b: ["tag2"] } },
+]);
+
+const model = new OpenAIEmbeddings();
+const query = await model.embedQuery("water");
+const results = await pgvectorStore.similaritySearchVectorWithScore(query, 1);
+
+console.log(results);
+
+await pgvectorStore.end();
diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts
index af5ac76bc047..6a58c4ba5cd0 100644
--- a/libs/langchain-community/src/vectorstores/pgvector.ts
+++ b/libs/langchain-community/src/vectorstores/pgvector.ts
@@ -677,4 +677,55 @@ export class PGVectorStore extends VectorStore {
this.client?.release();
return this.pool.end();
}
+
+ /**
+ * Method to create the HNSW index on the vector column.
+ *
+ * @param dimensions - Defines the number of dimensions in your vector data type, up to 2000. For example, use 1536 for OpenAI's text-embedding-ada-002 and Amazon's amazon.titan-embed-text-v1 models.
+ * @param m - The max number of connections per layer (16 by default). Index build time improves with smaller values, while higher values can speed up search queries.
+ * @param efConstruction - The size of the dynamic candidate list for constructing the graph (64 by default). A higher value can potentially improve the index quality at the cost of index build time.
+ * @param distanceFunction - The distance function name you want to use, is automatically selected based on the distanceStrategy.
+ * @returns Promise that resolves with the query response of creating the index.
+ */
+ async createHnswIndex(config: {
+ dimensions: number;
+ m?: number;
+ efConstruction?: number;
+ distanceFunction?: string;
+ }): Promise {
+ let idxDistanceFunction = config?.distanceFunction || "vector_cosine_ops";
+
+ switch (this.distanceStrategy) {
+ case "cosine":
+ idxDistanceFunction = "vector_cosine_ops";
+ break;
+ case "innerProduct":
+ idxDistanceFunction = "vector_ip_ops";
+ break;
+ case "euclidean":
+ idxDistanceFunction = "vector_l2_ops";
+ break;
+ default:
+ throw new Error(`Unknown distance strategy: ${this.distanceStrategy}`);
+ }
+
+ const createIndexQuery = `CREATE INDEX IF NOT EXISTS ${
+ this.vectorColumnName
+ }_embedding_hnsw_idx
+ ON ${this.computedTableName} USING hnsw ((${
+ this.vectorColumnName
+ }::vector(${config.dimensions})) ${idxDistanceFunction})
+ WITH (
+ m=${config?.m || 16},
+ ef_construction=${config?.efConstruction || 64}
+ );`;
+
+ try {
+ await this.pool.query(createIndexQuery);
+ } catch (e) {
+ console.error(
+ `Failed to create HNSW index on table ${this.computedTableName}, error: ${e}`
+ );
+ }
+ }
}
diff --git a/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts
index c22627443912..a1a241c80e8a 100644
--- a/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts
+++ b/libs/langchain-community/src/vectorstores/tests/pgvector/pgvector.int.test.ts
@@ -2,21 +2,30 @@ import { expect, test } from "@jest/globals";
import pg, { PoolConfig } from "pg";
import { OpenAIEmbeddings } from "@langchain/openai";
import { PGVectorStore, PGVectorStoreArgs } from "../../pgvector.js";
+// import { BedrockEmbeddings } from "../../../embeddings/bedrock.js";
+
+const embeddingsEngine = new OpenAIEmbeddings();
+
+// const embeddingsEngine = new BedrockEmbeddings({
+// region: "us-east-1",
+// });
+
+const postgresConnectionOptions = {
+ type: "postgres",
+ host: "127.0.0.1",
+ port: 5432,
+ user: "myuser",
+ password: "ChangeMe",
+ database: "api",
+} as PoolConfig;
describe("PGVectorStore", () => {
let pgvectorVectorStore: PGVectorStore;
const tableName = "testlangchain";
beforeAll(async () => {
- const config = {
- postgresConnectionOptions: {
- type: "postgres",
- host: "127.0.0.1",
- port: 5432,
- user: "myuser",
- password: "ChangeMe",
- database: "api",
- } as PoolConfig,
+ const config: PGVectorStoreArgs = {
+ postgresConnectionOptions,
tableName: "testlangchain",
// collectionTableName: "langchain_pg_collection",
// collectionName: "langchain",
@@ -29,7 +38,7 @@ describe("PGVectorStore", () => {
};
pgvectorVectorStore = await PGVectorStore.initialize(
- new OpenAIEmbeddings(),
+ embeddingsEngine,
config
);
});
@@ -297,14 +306,7 @@ describe("PGVectorStore with collection", () => {
beforeAll(async () => {
const config = {
- postgresConnectionOptions: {
- type: "postgres",
- host: "127.0.0.1",
- port: 5432,
- user: "myuser",
- password: "ChangeMe",
- database: "api",
- } as PoolConfig,
+ postgresConnectionOptions,
tableName,
collectionTableName,
collectionName: "langchain",
@@ -317,7 +319,7 @@ describe("PGVectorStore with collection", () => {
};
pgvectorVectorStore = await PGVectorStore.initialize(
- new OpenAIEmbeddings(),
+ embeddingsEngine,
config
);
});
@@ -535,13 +537,7 @@ describe("PGVectorStore with schema", () => {
let pool: pg.Pool;
beforeAll(async () => {
- pool = new pg.Pool({
- host: "127.0.0.1",
- port: 5432,
- user: "myuser",
- password: "ChangeMe",
- database: "api",
- });
+ pool = new pg.Pool(postgresConnectionOptions);
const config: PGVectorStoreArgs = {
pool,
@@ -560,7 +556,7 @@ describe("PGVectorStore with schema", () => {
await pool.query(`CREATE SCHEMA IF NOT EXISTS ${schema}`);
pgvectorVectorStore = await PGVectorStore.initialize(
- new OpenAIEmbeddings(),
+ embeddingsEngine,
config
);
computedTableName = pgvectorVectorStore.computedTableName;
@@ -773,3 +769,74 @@ describe("PGVectorStore with schema", () => {
}
});
});
+
+describe("PGVectorStore with HNSW index", () => {
+ let pgvectorVectorStore: PGVectorStore;
+ const tableName = "testlangchain";
+
+ beforeAll(async () => {
+ const config: PGVectorStoreArgs = {
+ postgresConnectionOptions,
+ tableName: "testlangchain",
+ columns: {
+ idColumnName: "id",
+ vectorColumnName: "vector",
+ contentColumnName: "content",
+ metadataColumnName: "metadata",
+ },
+ distanceStrategy: "cosine",
+ };
+
+ pgvectorVectorStore = await PGVectorStore.initialize(
+ embeddingsEngine,
+ config
+ );
+
+ // Create the index
+ await pgvectorVectorStore.createHnswIndex({ dimensions: 1536 });
+ });
+
+ afterEach(async () => {
+ // Drop table, then recreate it for the next test.
+ await pgvectorVectorStore.pool.query(`DROP TABLE "${tableName}"`);
+ await pgvectorVectorStore.ensureTableInDatabase();
+ await pgvectorVectorStore.createHnswIndex({ dimensions: 1536 });
+ });
+
+ afterAll(async () => {
+ await pgvectorVectorStore.end();
+ });
+
+ test("Ensure table has HNSW index", async () => {
+ const result = await pgvectorVectorStore.pool.query(
+ `SELECT indexname, tablename, indexdef FROM pg_indexes where indexname='vector_embedding_hnsw_idx';`
+ );
+ const { indexdef } = result.rows[0];
+ expect(result.rowCount).toBe(1);
+ expect(indexdef.includes("USING hnsw")).toBe(true);
+ });
+
+ test("Test embeddings creation", async () => {
+ const documents = [
+ {
+ pageContent: "hello",
+ metadata: { a: 1 },
+ },
+ {
+ pageContent: "Cat drinks milk",
+ metadata: { a: 2 },
+ },
+ { pageContent: "hi", metadata: { a: 1 } },
+ ];
+ await pgvectorVectorStore.addDocuments(documents);
+
+ const query = await embeddingsEngine.embedQuery("milk");
+ const results = await pgvectorVectorStore.similaritySearchVectorWithScore(
+ query,
+ 1
+ );
+
+ expect(results).toHaveLength(1);
+ expect(results[0][0].pageContent).toEqual("Cat drinks milk");
+ });
+});