From 4320ad192db215f9764cbb18f263b357cf913f79 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 13 Nov 2024 10:57:02 -0800 Subject: [PATCH 1/3] migrate to lancedb --- .../integrations/vectorstores/lancedb.mdx | 4 +- examples/package.json | 2 +- .../src/indexes/vector_stores/lancedb/load.ts | 2 +- libs/langchain-community/package.json | 10 +- .../src/vectorstores/lancedb.ts | 12 +- .../vectorstores/tests/lancedb.int.test.ts | 2 +- yarn.lock | 109 ++++++++---------- 7 files changed, 66 insertions(+), 75 deletions(-) diff --git a/docs/core_docs/docs/integrations/vectorstores/lancedb.mdx b/docs/core_docs/docs/integrations/vectorstores/lancedb.mdx index aa3511ceac26..8ee73c78e360 100644 --- a/docs/core_docs/docs/integrations/vectorstores/lancedb.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/lancedb.mdx @@ -12,10 +12,10 @@ LanceDB datasets are persisted to disk and can be shared between Node.js and Pyt ## Setup -Install the [LanceDB](https://github.com/lancedb/lancedb) [Node.js bindings](https://www.npmjs.com/package/vectordb): +Install the [LanceDB](https://github.com/lancedb/lancedb) [Node.js bindings](https://www.npmjs.com/package/@lancedb/lancedb): ```bash npm2yarn -npm install -S vectordb +npm install -S @lancedb/lancedb ``` import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; diff --git a/examples/package.json b/examples/package.json index 3b3c3340186f..2c5f9949b0d7 100644 --- a/examples/package.json +++ b/examples/package.json @@ -33,6 +33,7 @@ "@getzep/zep-js": "^0.9.0", "@gomomento/sdk": "^1.51.1", "@google/generative-ai": "^0.7.0", + "@lancedb/lancedb": "^0.12.0", "@langchain/anthropic": "workspace:*", "@langchain/aws": "workspace:*", "@langchain/azure-cosmosdb": "workspace:*", @@ -102,7 +103,6 @@ "typeorm": "^0.3.20", "typesense": "^1.5.3", "uuid": "^10.0.0", - "vectordb": "^0.9.0", "voy-search": "0.6.2", "weaviate-ts-client": "^2.0.0", "zod": "^3.22.4", diff --git a/examples/src/indexes/vector_stores/lancedb/load.ts b/examples/src/indexes/vector_stores/lancedb/load.ts index afa7d6c5524a..3592c64df650 100644 --- a/examples/src/indexes/vector_stores/lancedb/load.ts +++ b/examples/src/indexes/vector_stores/lancedb/load.ts @@ -1,6 +1,6 @@ import { LanceDB } from "@langchain/community/vectorstores/lancedb"; import { OpenAIEmbeddings } from "@langchain/openai"; -import { connect } from "vectordb"; +import { connect } from "@lancedb/lancedb"; import * as fs from "node:fs/promises"; import * as path from "node:path"; import os from "node:os"; diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 8ca1c034abd9..a6e3ebb1fa20 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -79,6 +79,7 @@ "@huggingface/inference": "^2.6.4", "@ibm-cloud/watsonx-ai": "^1.1.0", "@jest/globals": "^29.5.0", + "@lancedb/lancedb": "^0.12.0", "@langchain/core": "workspace:*", "@langchain/scripts": ">=0.1.0 <0.2.0", "@langchain/standard-tests": "0.0.0", @@ -210,7 +211,6 @@ "typescript": "~5.1.6", "typesense": "^1.5.3", "usearch": "^1.1.1", - "vectordb": "^0.9.0", "voy-search": "0.6.2", "weaviate-ts-client": "^1.4.0", "web-auth-library": "^1.0.3", @@ -246,6 +246,7 @@ "@gradientai/nodejs-sdk": "^1.2.0", "@huggingface/inference": "^2.6.4", "@ibm-cloud/watsonx-ai": "*", + "@lancedb/lancedb": "^0.12.0", "@langchain/core": ">=0.2.21 <0.4.0", "@layerup/layerup-security": "^1.5.12", "@libsql/client": "^0.14.0", @@ -334,7 +335,6 @@ "typeorm": "^0.3.20", "typesense": "^1.5.3", "usearch": "^1.1.1", - "vectordb": "^0.1.4", "voy-search": "0.6.2", "weaviate-ts-client": "*", "web-auth-library": "^1.0.3", @@ -424,6 +424,9 @@ "@huggingface/inference": { "optional": true }, + "@lancedb/lancedb": { + "optional": true + }, "@layerup/layerup-security": { "optional": true }, @@ -682,9 +685,6 @@ "usearch": { "optional": true }, - "vectordb": { - "optional": true - }, "voy-search": { "optional": true }, diff --git a/libs/langchain-community/src/vectorstores/lancedb.ts b/libs/langchain-community/src/vectorstores/lancedb.ts index 7df73aac93e7..20bd586c8ddc 100644 --- a/libs/langchain-community/src/vectorstores/lancedb.ts +++ b/libs/langchain-community/src/vectorstores/lancedb.ts @@ -1,4 +1,4 @@ -import { connect, Table, Connection, WriteMode } from "vectordb"; +import { connect, Table, Connection } from "@lancedb/lancedb"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; @@ -12,7 +12,7 @@ export type LanceDBArgs = { textKey?: string; uri?: string; tableName?: string; - mode?: WriteMode; + mode?: "create" | "overwrite"; }; /** @@ -29,7 +29,7 @@ export class LanceDB extends VectorStore { private tableName: string; - private mode?: WriteMode; + private mode?: "create" | "overwrite"; constructor(embeddings: EmbeddingsInterface, args?: LanceDBArgs) { super(embeddings, args || {}); @@ -38,7 +38,7 @@ export class LanceDB extends VectorStore { this.textKey = args?.textKey || "text"; this.uri = args?.uri || "~/lancedb"; this.tableName = args?.tableName || "langchain"; - this.mode = args?.mode || WriteMode.Overwrite; + this.mode = args?.mode || "overwrite"; } /** @@ -86,7 +86,7 @@ export class LanceDB extends VectorStore { if (!this.table) { const db: Connection = await connect(this.uri); this.table = await db.createTable(this.tableName, data, { - writeMode: this.mode, + mode: this.mode, }); return; @@ -110,7 +110,7 @@ export class LanceDB extends VectorStore { "Table not found. Please add vectors to the table first." ); } - const results = await this.table.search(query).limit(k).execute(); + const results = await this.table.query().nearestTo(query).limit(k).toArray(); const docsAndScore: [Document, number][] = []; results.forEach((item) => { diff --git a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts index 3d561c903440..c8af6e384889 100644 --- a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts @@ -2,7 +2,7 @@ import { beforeEach, describe, expect, test } from "@jest/globals"; import * as fs from "node:fs/promises"; import * as path from "node:path"; import * as os from "node:os"; -import { connect, Table } from "vectordb"; +import { connect, Table } from "@lancedb/lancedb"; import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; diff --git a/yarn.lock b/yarn.lock index 16cc8eb38d04..5ca737c8d845 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11126,41 +11126,68 @@ __metadata: languageName: node linkType: hard -"@lancedb/vectordb-darwin-arm64@npm:0.4.20": - version: 0.4.20 - resolution: "@lancedb/vectordb-darwin-arm64@npm:0.4.20" +"@lancedb/lancedb-darwin-arm64@npm:0.12.0": + version: 0.12.0 + resolution: "@lancedb/lancedb-darwin-arm64@npm:0.12.0" conditions: os=darwin & cpu=arm64 languageName: node linkType: hard -"@lancedb/vectordb-darwin-x64@npm:0.4.20": - version: 0.4.20 - resolution: "@lancedb/vectordb-darwin-x64@npm:0.4.20" +"@lancedb/lancedb-darwin-x64@npm:0.12.0": + version: 0.12.0 + resolution: "@lancedb/lancedb-darwin-x64@npm:0.12.0" conditions: os=darwin & cpu=x64 languageName: node linkType: hard -"@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20": - version: 0.4.20 - resolution: "@lancedb/vectordb-linux-arm64-gnu@npm:0.4.20" - conditions: os=linux & cpu=arm64 +"@lancedb/lancedb-linux-arm64-gnu@npm:0.12.0": + version: 0.12.0 + resolution: "@lancedb/lancedb-linux-arm64-gnu@npm:0.12.0" + conditions: os=linux & cpu=arm64 & libc=glibc languageName: node linkType: hard -"@lancedb/vectordb-linux-x64-gnu@npm:0.4.20": - version: 0.4.20 - resolution: "@lancedb/vectordb-linux-x64-gnu@npm:0.4.20" - conditions: os=linux & cpu=x64 +"@lancedb/lancedb-linux-x64-gnu@npm:0.12.0": + version: 0.12.0 + resolution: "@lancedb/lancedb-linux-x64-gnu@npm:0.12.0" + conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard -"@lancedb/vectordb-win32-x64-msvc@npm:0.4.20": - version: 0.4.20 - resolution: "@lancedb/vectordb-win32-x64-msvc@npm:0.4.20" +"@lancedb/lancedb-win32-x64-msvc@npm:0.12.0": + version: 0.12.0 + resolution: "@lancedb/lancedb-win32-x64-msvc@npm:0.12.0" conditions: os=win32 & cpu=x64 languageName: node linkType: hard +"@lancedb/lancedb@npm:^0.12.0": + version: 0.12.0 + resolution: "@lancedb/lancedb@npm:0.12.0" + dependencies: + "@lancedb/lancedb-darwin-arm64": 0.12.0 + "@lancedb/lancedb-darwin-x64": 0.12.0 + "@lancedb/lancedb-linux-arm64-gnu": 0.12.0 + "@lancedb/lancedb-linux-x64-gnu": 0.12.0 + "@lancedb/lancedb-win32-x64-msvc": 0.12.0 + reflect-metadata: ^0.2.2 + peerDependencies: + apache-arrow: ">=13.0.0 <=17.0.0" + dependenciesMeta: + "@lancedb/lancedb-darwin-arm64": + optional: true + "@lancedb/lancedb-darwin-x64": + optional: true + "@lancedb/lancedb-linux-arm64-gnu": + optional: true + "@lancedb/lancedb-linux-x64-gnu": + optional: true + "@lancedb/lancedb-win32-x64-msvc": + optional: true + conditions: (os=darwin | os=linux | os=win32) & (cpu=x64 | cpu=arm64) + languageName: node + linkType: hard + "@langchain/anthropic@*, @langchain/anthropic@workspace:*, @langchain/anthropic@workspace:libs/langchain-anthropic": version: 0.0.0-use.local resolution: "@langchain/anthropic@workspace:libs/langchain-anthropic" @@ -11490,6 +11517,7 @@ __metadata: "@huggingface/inference": ^2.6.4 "@ibm-cloud/watsonx-ai": ^1.1.0 "@jest/globals": ^29.5.0 + "@lancedb/lancedb": ^0.12.0 "@langchain/core": "workspace:*" "@langchain/openai": ">=0.2.0 <0.4.0" "@langchain/scripts": ">=0.1.0 <0.2.0" @@ -11629,7 +11657,6 @@ __metadata: typesense: ^1.5.3 usearch: ^1.1.1 uuid: ^10.0.0 - vectordb: ^0.9.0 voy-search: 0.6.2 weaviate-ts-client: ^1.4.0 web-auth-library: ^1.0.3 @@ -11666,6 +11693,7 @@ __metadata: "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 "@ibm-cloud/watsonx-ai": "*" + "@lancedb/lancedb": ^0.12.0 "@langchain/core": ">=0.2.21 <0.4.0" "@layerup/layerup-security": ^1.5.12 "@libsql/client": ^0.14.0 @@ -11754,7 +11782,6 @@ __metadata: typeorm: ^0.3.20 typesense: ^1.5.3 usearch: ^1.1.1 - vectordb: ^0.1.4 voy-search: 0.6.2 weaviate-ts-client: "*" web-auth-library: ^1.0.3 @@ -11816,6 +11843,8 @@ __metadata: optional: true "@huggingface/inference": optional: true + "@lancedb/lancedb": + optional: true "@layerup/layerup-security": optional: true "@libsql/client": @@ -11988,8 +12017,6 @@ __metadata: optional: true usearch: optional: true - vectordb: - optional: true voy-search: optional: true weaviate-ts-client: @@ -13234,13 +13261,6 @@ __metadata: languageName: node linkType: hard -"@neon-rs/load@npm:^0.0.74": - version: 0.0.74 - resolution: "@neon-rs/load@npm:0.0.74" - checksum: d26ec9b08cdf1a7c5aeefe98f77112d205d11b4005a7934b21fe8fd27528847e08e4749e7e6c3fc05ae9f701175a58c11a095ae6af449634df3991a2c82e1dfa - languageName: node - linkType: hard - "@neondatabase/serverless@npm:0.6.0": version: 0.6.0 resolution: "@neondatabase/serverless@npm:0.6.0" @@ -27373,6 +27393,7 @@ __metadata: "@getzep/zep-js": ^0.9.0 "@gomomento/sdk": ^1.51.1 "@google/generative-ai": ^0.7.0 + "@lancedb/lancedb": ^0.12.0 "@langchain/anthropic": "workspace:*" "@langchain/aws": "workspace:*" "@langchain/azure-cosmosdb": "workspace:*" @@ -27457,7 +27478,6 @@ __metadata: typescript: ~5.1.6 typesense: ^1.5.3 uuid: ^10.0.0 - vectordb: ^0.9.0 voy-search: 0.6.2 weaviate-ts-client: ^2.0.0 zod: ^3.22.4 @@ -38701,7 +38721,7 @@ __metadata: languageName: node linkType: hard -"reflect-metadata@npm:^0.2.1": +"reflect-metadata@npm:^0.2.1, reflect-metadata@npm:^0.2.2": version: 0.2.2 resolution: "reflect-metadata@npm:0.2.2" checksum: a66c7b583e4efdd8f3c3124fbff33da2d0c86d8280617516308b32b2159af7a3698c961db3246387f56f6316b1d33a608f39bb2b49d813316dfc58f6d3bf3210 @@ -43229,35 +43249,6 @@ __metadata: languageName: node linkType: hard -"vectordb@npm:^0.9.0": - version: 0.9.0 - resolution: "vectordb@npm:0.9.0" - dependencies: - "@lancedb/vectordb-darwin-arm64": 0.4.20 - "@lancedb/vectordb-darwin-x64": 0.4.20 - "@lancedb/vectordb-linux-arm64-gnu": 0.4.20 - "@lancedb/vectordb-linux-x64-gnu": 0.4.20 - "@lancedb/vectordb-win32-x64-msvc": 0.4.20 - "@neon-rs/load": ^0.0.74 - axios: ^1.4.0 - peerDependencies: - "@apache-arrow/ts": ^14.0.2 - apache-arrow: ^14.0.2 - dependenciesMeta: - "@lancedb/vectordb-darwin-arm64": - optional: true - "@lancedb/vectordb-darwin-x64": - optional: true - "@lancedb/vectordb-linux-arm64-gnu": - optional: true - "@lancedb/vectordb-linux-x64-gnu": - optional: true - "@lancedb/vectordb-win32-x64-msvc": - optional: true - conditions: (os=darwin | os=linux | os=win32) & (cpu=x64 | cpu=arm64) - languageName: node - linkType: hard - "vfile-location@npm:^3.0.0, vfile-location@npm:^3.2.0": version: 3.2.0 resolution: "vfile-location@npm:3.2.0" From 8777bb7362c679f8b623673becd057315205f482 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 15 Nov 2024 14:09:36 -0800 Subject: [PATCH 2/3] upgrade to 0.13.0 --- examples/package.json | 2 +- libs/langchain-community/package.json | 2 +- yarn.lock | 50 +++++++++++++-------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/examples/package.json b/examples/package.json index 2c5f9949b0d7..75ab6e9eb4d6 100644 --- a/examples/package.json +++ b/examples/package.json @@ -33,7 +33,7 @@ "@getzep/zep-js": "^0.9.0", "@gomomento/sdk": "^1.51.1", "@google/generative-ai": "^0.7.0", - "@lancedb/lancedb": "^0.12.0", + "@lancedb/lancedb": "^0.13.0", "@langchain/anthropic": "workspace:*", "@langchain/aws": "workspace:*", "@langchain/azure-cosmosdb": "workspace:*", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index a6e3ebb1fa20..d2ff44e78a16 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -79,7 +79,7 @@ "@huggingface/inference": "^2.6.4", "@ibm-cloud/watsonx-ai": "^1.1.0", "@jest/globals": "^29.5.0", - "@lancedb/lancedb": "^0.12.0", + "@lancedb/lancedb": "^0.13.0", "@langchain/core": "workspace:*", "@langchain/scripts": ">=0.1.0 <0.2.0", "@langchain/standard-tests": "0.0.0", diff --git a/yarn.lock b/yarn.lock index 5ca737c8d845..42dd445ba3fe 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11126,50 +11126,50 @@ __metadata: languageName: node linkType: hard -"@lancedb/lancedb-darwin-arm64@npm:0.12.0": - version: 0.12.0 - resolution: "@lancedb/lancedb-darwin-arm64@npm:0.12.0" +"@lancedb/lancedb-darwin-arm64@npm:0.13.0": + version: 0.13.0 + resolution: "@lancedb/lancedb-darwin-arm64@npm:0.13.0" conditions: os=darwin & cpu=arm64 languageName: node linkType: hard -"@lancedb/lancedb-darwin-x64@npm:0.12.0": - version: 0.12.0 - resolution: "@lancedb/lancedb-darwin-x64@npm:0.12.0" +"@lancedb/lancedb-darwin-x64@npm:0.13.0": + version: 0.13.0 + resolution: "@lancedb/lancedb-darwin-x64@npm:0.13.0" conditions: os=darwin & cpu=x64 languageName: node linkType: hard -"@lancedb/lancedb-linux-arm64-gnu@npm:0.12.0": - version: 0.12.0 - resolution: "@lancedb/lancedb-linux-arm64-gnu@npm:0.12.0" +"@lancedb/lancedb-linux-arm64-gnu@npm:0.13.0": + version: 0.13.0 + resolution: "@lancedb/lancedb-linux-arm64-gnu@npm:0.13.0" conditions: os=linux & cpu=arm64 & libc=glibc languageName: node linkType: hard -"@lancedb/lancedb-linux-x64-gnu@npm:0.12.0": - version: 0.12.0 - resolution: "@lancedb/lancedb-linux-x64-gnu@npm:0.12.0" +"@lancedb/lancedb-linux-x64-gnu@npm:0.13.0": + version: 0.13.0 + resolution: "@lancedb/lancedb-linux-x64-gnu@npm:0.13.0" conditions: os=linux & cpu=x64 & libc=glibc languageName: node linkType: hard -"@lancedb/lancedb-win32-x64-msvc@npm:0.12.0": - version: 0.12.0 - resolution: "@lancedb/lancedb-win32-x64-msvc@npm:0.12.0" +"@lancedb/lancedb-win32-x64-msvc@npm:0.13.0": + version: 0.13.0 + resolution: "@lancedb/lancedb-win32-x64-msvc@npm:0.13.0" conditions: os=win32 & cpu=x64 languageName: node linkType: hard -"@lancedb/lancedb@npm:^0.12.0": - version: 0.12.0 - resolution: "@lancedb/lancedb@npm:0.12.0" +"@lancedb/lancedb@npm:^0.13.0": + version: 0.13.0 + resolution: "@lancedb/lancedb@npm:0.13.0" dependencies: - "@lancedb/lancedb-darwin-arm64": 0.12.0 - "@lancedb/lancedb-darwin-x64": 0.12.0 - "@lancedb/lancedb-linux-arm64-gnu": 0.12.0 - "@lancedb/lancedb-linux-x64-gnu": 0.12.0 - "@lancedb/lancedb-win32-x64-msvc": 0.12.0 + "@lancedb/lancedb-darwin-arm64": 0.13.0 + "@lancedb/lancedb-darwin-x64": 0.13.0 + "@lancedb/lancedb-linux-arm64-gnu": 0.13.0 + "@lancedb/lancedb-linux-x64-gnu": 0.13.0 + "@lancedb/lancedb-win32-x64-msvc": 0.13.0 reflect-metadata: ^0.2.2 peerDependencies: apache-arrow: ">=13.0.0 <=17.0.0" @@ -11517,7 +11517,7 @@ __metadata: "@huggingface/inference": ^2.6.4 "@ibm-cloud/watsonx-ai": ^1.1.0 "@jest/globals": ^29.5.0 - "@lancedb/lancedb": ^0.12.0 + "@lancedb/lancedb": ^0.13.0 "@langchain/core": "workspace:*" "@langchain/openai": ">=0.2.0 <0.4.0" "@langchain/scripts": ">=0.1.0 <0.2.0" @@ -27393,7 +27393,7 @@ __metadata: "@getzep/zep-js": ^0.9.0 "@gomomento/sdk": ^1.51.1 "@google/generative-ai": ^0.7.0 - "@lancedb/lancedb": ^0.12.0 + "@lancedb/lancedb": ^0.13.0 "@langchain/anthropic": "workspace:*" "@langchain/aws": "workspace:*" "@langchain/azure-cosmosdb": "workspace:*" From d0ae3e5432275ad9e31fd47abf3a843156cb6748 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Sat, 16 Nov 2024 17:15:28 -0800 Subject: [PATCH 3/3] Format --- libs/langchain-community/src/vectorstores/lancedb.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libs/langchain-community/src/vectorstores/lancedb.ts b/libs/langchain-community/src/vectorstores/lancedb.ts index 20bd586c8ddc..f482b32c2148 100644 --- a/libs/langchain-community/src/vectorstores/lancedb.ts +++ b/libs/langchain-community/src/vectorstores/lancedb.ts @@ -110,7 +110,11 @@ export class LanceDB extends VectorStore { "Table not found. Please add vectors to the table first." ); } - const results = await this.table.query().nearestTo(query).limit(k).toArray(); + const results = await this.table + .query() + .nearestTo(query) + .limit(k) + .toArray(); const docsAndScore: [Document, number][] = []; results.forEach((item) => {