Skip to content

Commit

Permalink
Add Vercel KV base store (#2912)
Browse files Browse the repository at this point in the history
* Allow ParentDocumentRetriever to subclass MultiVectorRetreiver

* Initial vercel/kv wrapper

* Fixups

* Add entrypoint

* Remove subclassing

* Update ParentDocumentRetriever

* Update entrypoints
  • Loading branch information
jacoblee93 authored Oct 13, 2023
1 parent 379e7fb commit 40cc2b5
Show file tree
Hide file tree
Showing 21 changed files with 317 additions and 6 deletions.
1 change: 1 addition & 0 deletions environment_tests/test-exports-bun/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/storage/encoder_backed";
export * from "langchain/storage/in_memory";
export * from "langchain/util/math";
export * from "langchain/util/time";
Expand Down
1 change: 1 addition & 0 deletions environment_tests/test-exports-cf/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/storage/encoder_backed";
export * from "langchain/storage/in_memory";
export * from "langchain/util/math";
export * from "langchain/util/time";
Expand Down
1 change: 1 addition & 0 deletions environment_tests/test-exports-cjs/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ const cache = require("langchain/cache");
const stores_doc_in_memory = require("langchain/stores/doc/in_memory");
const stores_file_in_memory = require("langchain/stores/file/in_memory");
const stores_message_in_memory = require("langchain/stores/message/in_memory");
const storage_encoder_backed = require("langchain/storage/encoder_backed");
const storage_in_memory = require("langchain/storage/in_memory");
const util_math = require("langchain/util/math");
const util_time = require("langchain/util/time");
Expand Down
1 change: 1 addition & 0 deletions environment_tests/test-exports-esbuild/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import * as cache from "langchain/cache";
import * as stores_doc_in_memory from "langchain/stores/doc/in_memory";
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
import * as stores_message_in_memory from "langchain/stores/message/in_memory";
import * as storage_encoder_backed from "langchain/storage/encoder_backed";
import * as storage_in_memory from "langchain/storage/in_memory";
import * as util_math from "langchain/util/math";
import * as util_time from "langchain/util/time";
Expand Down
1 change: 1 addition & 0 deletions environment_tests/test-exports-esm/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import * as cache from "langchain/cache";
import * as stores_doc_in_memory from "langchain/stores/doc/in_memory";
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
import * as stores_message_in_memory from "langchain/stores/message/in_memory";
import * as storage_encoder_backed from "langchain/storage/encoder_backed";
import * as storage_in_memory from "langchain/storage/in_memory";
import * as util_math from "langchain/util/math";
import * as util_time from "langchain/util/time";
Expand Down
1 change: 1 addition & 0 deletions environment_tests/test-exports-vercel/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/storage/encoder_backed";
export * from "langchain/storage/in_memory";
export * from "langchain/util/math";
export * from "langchain/util/time";
Expand Down
1 change: 1 addition & 0 deletions environment_tests/test-exports-vite/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/storage/encoder_backed";
export * from "langchain/storage/in_memory";
export * from "langchain/util/math";
export * from "langchain/util/time";
Expand Down
6 changes: 6 additions & 0 deletions langchain/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -619,12 +619,18 @@ stores/message/planetscale.d.ts
stores/message/xata.cjs
stores/message/xata.js
stores/message/xata.d.ts
storage/encoder_backed.cjs
storage/encoder_backed.js
storage/encoder_backed.d.ts
storage/in_memory.cjs
storage/in_memory.js
storage/in_memory.d.ts
storage/ioredis.cjs
storage/ioredis.js
storage/ioredis.d.ts
storage/vercel_kv.cjs
storage/vercel_kv.js
storage/vercel_kv.d.ts
graphs/neo4j_graph.cjs
graphs/neo4j_graph.js
graphs/neo4j_graph.d.ts
Expand Down
21 changes: 21 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -631,12 +631,18 @@
"stores/message/xata.cjs",
"stores/message/xata.js",
"stores/message/xata.d.ts",
"storage/encoder_backed.cjs",
"storage/encoder_backed.js",
"storage/encoder_backed.d.ts",
"storage/in_memory.cjs",
"storage/in_memory.js",
"storage/in_memory.d.ts",
"storage/ioredis.cjs",
"storage/ioredis.js",
"storage/ioredis.d.ts",
"storage/vercel_kv.cjs",
"storage/vercel_kv.js",
"storage/vercel_kv.d.ts",
"graphs/neo4j_graph.cjs",
"graphs/neo4j_graph.js",
"graphs/neo4j_graph.d.ts",
Expand Down Expand Up @@ -771,6 +777,7 @@
"@typescript-eslint/eslint-plugin": "^5.58.0",
"@typescript-eslint/parser": "^5.58.0",
"@upstash/redis": "^1.20.6",
"@vercel/kv": "^0.2.3",
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.25.1",
Expand Down Expand Up @@ -882,6 +889,7 @@
"@tensorflow/tfjs-converter": "*",
"@tensorflow/tfjs-core": "*",
"@upstash/redis": "^1.20.6",
"@vercel/kv": "^0.2.3",
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.25.1",
Expand Down Expand Up @@ -1050,6 +1058,9 @@
"@upstash/redis": {
"optional": true
},
"@vercel/kv": {
"optional": true
},
"@vercel/postgres": {
"optional": true
},
Expand Down Expand Up @@ -2284,6 +2295,11 @@
"import": "./stores/message/xata.js",
"require": "./stores/message/xata.cjs"
},
"./storage/encoder_backed": {
"types": "./storage/encoder_backed.d.ts",
"import": "./storage/encoder_backed.js",
"require": "./storage/encoder_backed.cjs"
},
"./storage/in_memory": {
"types": "./storage/in_memory.d.ts",
"import": "./storage/in_memory.js",
Expand All @@ -2294,6 +2310,11 @@
"import": "./storage/ioredis.js",
"require": "./storage/ioredis.cjs"
},
"./storage/vercel_kv": {
"types": "./storage/vercel_kv.d.ts",
"import": "./storage/vercel_kv.js",
"require": "./storage/vercel_kv.cjs"
},
"./graphs/neo4j_graph": {
"types": "./graphs/neo4j_graph.d.ts",
"import": "./graphs/neo4j_graph.js",
Expand Down
3 changes: 3 additions & 0 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,10 @@ const entrypoints = {
"stores/message/planetscale": "stores/message/planetscale",
"stores/message/xata": "stores/message/xata",
// storage
"storage/encoder_backed": "storage/encoder_backed",
"storage/in_memory": "storage/in_memory",
"storage/ioredis": "storage/ioredis",
"storage/vercel_kv": "storage/vercel_kv",
"graphs/neo4j_graph": "graphs/neo4j_graph",
// hub
hub: "hub",
Expand Down Expand Up @@ -426,6 +428,7 @@ const requiresOptionalDependency = [
"stores/message/planetscale",
"stores/message/xata",
"storage/ioredis",
"storage/vercel_kv",
"graphs/neo4j_graph",
// Prevent export due to circular dependency with "load" entrypoint
"hub",
Expand Down
1 change: 1 addition & 0 deletions langchain/src/load/import_constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ export const optionalImportEntrypoints = [
"langchain/stores/message/planetscale",
"langchain/stores/message/xata",
"langchain/storage/ioredis",
"langchain/storage/vercel_kv",
"langchain/graphs/neo4j_graph",
"langchain/hub",
"langchain/experimental/multimodal_embeddings/googlevertexai",
Expand Down
1 change: 1 addition & 0 deletions langchain/src/load/import_map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export * as cache from "../cache/index.js";
export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
export * as stores__file__in_memory from "../stores/file/in_memory.js";
export * as stores__message__in_memory from "../stores/message/in_memory.js";
export * as storage__encoder_backed from "../storage/encoder_backed.js";
export * as storage__in_memory from "../storage/in_memory.js";
export * as util__math from "../util/math.js";
export * as util__time from "../util/time.js";
Expand Down
3 changes: 3 additions & 0 deletions langchain/src/load/import_type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ export interface OptionalImportMap {
"langchain/storage/ioredis"?:
| typeof import("../storage/ioredis.js")
| Promise<typeof import("../storage/ioredis.js")>;
"langchain/storage/vercel_kv"?:
| typeof import("../storage/vercel_kv.js")
| Promise<typeof import("../storage/vercel_kv.js")>;
"langchain/graphs/neo4j_graph"?:
| typeof import("../graphs/neo4j_graph.js")
| Promise<typeof import("../graphs/neo4j_graph.js")>;
Expand Down
30 changes: 30 additions & 0 deletions langchain/src/retrievers/tests/parent_document.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,33 @@ test("Should work with a backwards compatible docstore too", async () => {
expect(retrievedDocs.length).toEqual(1);
expect(retrievedDocs[0].pageContent.length).toBeGreaterThan(1000);
});

test("Should return a part of a document if a parent splitter is passed", async () => {
const vectorstore = new MemoryVectorStore(new OpenAIEmbeddings());
const docstore = new InMemoryStore();
const retriever = new ParentDocumentRetriever({
vectorstore,
docstore,
parentSplitter: new RecursiveCharacterTextSplitter({
chunkOverlap: 0,
chunkSize: 500,
}),
childSplitter: new RecursiveCharacterTextSplitter({
chunkOverlap: 0,
chunkSize: 50,
}),
});
const docs = await new TextLoader(
"../examples/state_of_the_union.txt"
).load();
await retriever.addDocuments(docs);
const query = "justice breyer";
const retrievedDocs = await retriever.getRelevantDocuments(query);
const vectorstoreRetreivedDocs = await vectorstore.similaritySearch(
"justice breyer"
);
console.log(vectorstoreRetreivedDocs, vectorstoreRetreivedDocs.length);
console.log(retrievedDocs);
expect(retrievedDocs.length).toBeGreaterThan(1);
expect(retrievedDocs[0].pageContent.length).toBeGreaterThan(100);
});
20 changes: 19 additions & 1 deletion langchain/src/storage/encoder_backed.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BaseStore } from "../schema/storage.js";
import { Document } from "../document.js";

/**
* Class that provides a layer of abstraction over the base storage,
Expand All @@ -10,7 +11,7 @@ export class EncoderBackedStore<K, V, SerializedType = any> extends BaseStore<
K,
V
> {
lc_namespace = ["langchain", "storage", "encoder_backed"];
lc_namespace = ["langchain", "storage"];

store: BaseStore<string, SerializedType>;

Expand Down Expand Up @@ -82,3 +83,20 @@ export class EncoderBackedStore<K, V, SerializedType = any> extends BaseStore<
yield* this.store.yieldKeys(prefix);
}
}

export function createDocumentStoreFromByteStore(
store: BaseStore<string, Uint8Array>
) {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
return new EncoderBackedStore({
store,
keyEncoder: (key: string) => key,
valueSerializer: (doc: Document) =>
encoder.encode(
JSON.stringify({ pageContent: doc.pageContent, metadata: doc.metadata })
),
valueDeserializer: (bytes: Uint8Array) =>
new Document(JSON.parse(decoder.decode(bytes))),
});
}
2 changes: 1 addition & 1 deletion langchain/src/storage/in_memory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { BaseStore } from "../schema/storage.js";
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export class InMemoryStore<T = any> extends BaseStore<string, T> {
lc_namespace = ["langchain", "storage", "in_memory"];
lc_namespace = ["langchain", "storage"];

protected store: Record<string, T> = {};

Expand Down
8 changes: 4 additions & 4 deletions langchain/src/storage/ioredis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { BaseStore } from "../schema/storage.js";
* as well as yielding keys from the database.
*/
export class RedisByteStore extends BaseStore<string, Uint8Array> {
lc_namespace = ["langchain", "storage", "ioredis"];
lc_namespace = ["langchain", "storage"];

protected client: Redis;

Expand Down Expand Up @@ -56,11 +56,11 @@ export class RedisByteStore extends BaseStore<string, Uint8Array> {
async mget(keys: string[]) {
const prefixedKeys = keys.map(this._getPrefixedKey.bind(this));
const retrievedValues = await this.client.mgetBuffer(prefixedKeys);
return retrievedValues.map((key) => {
if (!key) {
return retrievedValues.map((value) => {
if (!value) {
return undefined;
} else {
return key;
return value;
}
});
}
Expand Down
64 changes: 64 additions & 0 deletions langchain/src/storage/tests/vercel_kv.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* eslint-disable @typescript-eslint/no-non-null-assertion */
/* eslint-disable no-process-env */

import { test } from "@jest/globals";
import { createClient } from "@vercel/kv";
import { VercelKVStore } from "../vercel_kv.js";
import { createDocumentStoreFromByteStore } from "../encoder_backed.js";
import { Document } from "../../document.js";

test("VercelKVStore", async () => {
const store = new VercelKVStore({
client: createClient({
url: process.env.VERCEL_KV_API_URL!,
token: process.env.VERCEL_KV_API_TOKEN!,
}),
});
const value1 = new Date().toISOString();
const value2 = new Date().toISOString() + new Date().toISOString();
const encoder = new TextEncoder();
await store.mset([
["key1", encoder.encode(value1)],
["key2", encoder.encode(value2)],
]);
const retrievedValues = await store.mget(["key1", "key2"]);
expect(retrievedValues).toEqual([
encoder.encode(value1),
encoder.encode(value2),
]);
for await (const key of store.yieldKeys()) {
console.log(key);
}
await store.mdelete(["key1", "key2"]);
const retrievedValues2 = await store.mget(["key1", "key2"]);
expect(retrievedValues2).toEqual([undefined, undefined]);
});

test("Encoder-backed", async () => {
const store = createDocumentStoreFromByteStore(
new VercelKVStore({
client: createClient({
url: process.env.VERCEL_KV_API_URL!,
token: process.env.VERCEL_KV_API_TOKEN!,
}),
})
);
const value1 = new Date().toISOString();
const value2 = new Date().toISOString() + new Date().toISOString();
const [doc1, doc2] = [
new Document({ pageContent: value1 }),
new Document({ pageContent: value2 }),
];
await store.mset([
["key1", doc1],
["key2", doc2],
]);
const retrievedValues = await store.mget(["key1", "key2"]);
expect(retrievedValues).toEqual([doc1, doc2]);
for await (const key of store.yieldKeys()) {
console.log(key);
}
await store.mdelete(["key1", "key2"]);
const retrievedValues2 = await store.mget(["key1", "key2"]);
expect(retrievedValues2).toEqual([undefined, undefined]);
});
Loading

0 comments on commit 40cc2b5

Please sign in to comment.