From b9087e765d73900f1184a41900aa6820b1936f5b Mon Sep 17 00:00:00 2001 From: Brace Sproul Date: Wed, 6 Dec 2023 09:44:54 -0800 Subject: [PATCH 1/2] docs[patch]: Fix broken link 'tip' in docs (#14349) --- .../cookbook/prompt_size.ipynb | 10 ++++++-- .../how_to/message_history.ipynb | 10 ++++++-- .../agent_types/openai_assistants.ipynb | 5 +++- .../chains/foundational/llm_chain.ipynb | 6 ++++- .../foundational/sequential_chains.ipynb | 6 ++++- .../chains/foundational/transformation.ipynb | 6 ++++- .../data_connection/retrievers/index.ipynb | 2 ++ .../retrievers/self_query.ipynb | 2 ++ docs/docs/modules/model_io/chat/index.ipynb | 2 ++ docs/docs/modules/model_io/llms/index.ipynb | 2 ++ .../use_cases/question_answering/index.ipynb | 25 +++++++++++++++---- 11 files changed, 63 insertions(+), 13 deletions(-) diff --git a/docs/docs/expression_language/cookbook/prompt_size.ipynb b/docs/docs/expression_language/cookbook/prompt_size.ipynb index b03a5a93e555d..2ee6945c88aa4 100644 --- a/docs/docs/expression_language/cookbook/prompt_size.ipynb +++ b/docs/docs/expression_language/cookbook/prompt_size.ipynb @@ -209,7 +209,10 @@ "id": "637f994a-5134-402a-bcf0-4de3911eaf49", "metadata": {}, "source": [ - ":::tip [LangSmith trace](https://smith.langchain.com/public/60909eae-f4f1-43eb-9f96-354f5176f66f/r)\n", + ":::tip\n", + "\n", + "[LangSmith trace](https://smith.langchain.com/public/60909eae-f4f1-43eb-9f96-354f5176f66f/r)\n", + "\n", ":::" ] }, @@ -374,7 +377,10 @@ "id": "5a7e498b-dc68-4267-a35c-90ceffa91c46", "metadata": {}, "source": [ - ":::tip [LangSmith trace](https://smith.langchain.com/public/3b27d47f-e4df-4afb-81b1-0f88b80ca97e/r)\n", + ":::tip\n", + "\n", + "[LangSmith trace](https://smith.langchain.com/public/3b27d47f-e4df-4afb-81b1-0f88b80ca97e/r)\n", + "\n", ":::" ] } diff --git a/docs/docs/expression_language/how_to/message_history.ipynb b/docs/docs/expression_language/how_to/message_history.ipynb index 4a9218b328aea..77991cb66bd77 100644 --- a/docs/docs/expression_language/how_to/message_history.ipynb +++ b/docs/docs/expression_language/how_to/message_history.ipynb @@ -251,7 +251,10 @@ "id": "da3d1feb-b4bb-4624-961c-7db2e1180df7", "metadata": {}, "source": [ - ":::tip [Langsmith trace](https://smith.langchain.com/public/863a003b-7ca8-4b24-be9e-d63ec13c106e/r)\n", + ":::tip\n", + "\n", + "[Langsmith trace](https://smith.langchain.com/public/863a003b-7ca8-4b24-be9e-d63ec13c106e/r)\n", + "\n", ":::" ] }, @@ -334,7 +337,10 @@ "id": "b898d1b1-11e6-4d30-a8dd-cc5e45533611", "metadata": {}, "source": [ - ":::tip [LangSmith trace](https://smith.langchain.com/public/f6c3e1d1-a49d-4955-a9fa-c6519df74fa7/r)\n", + ":::tip\n", + "\n", + "[LangSmith trace](https://smith.langchain.com/public/f6c3e1d1-a49d-4955-a9fa-c6519df74fa7/r)\n", + "\n", ":::" ] }, diff --git a/docs/docs/modules/agents/agent_types/openai_assistants.ipynb b/docs/docs/modules/agents/agent_types/openai_assistants.ipynb index 152d41f90aea5..0fb83ff6b02cd 100644 --- a/docs/docs/modules/agents/agent_types/openai_assistants.ipynb +++ b/docs/docs/modules/agents/agent_types/openai_assistants.ipynb @@ -153,7 +153,10 @@ "id": "db6b9cbf-dd54-4346-be6c-842e08756ccc", "metadata": {}, "source": [ - ":::tip [LangSmith trace](https://smith.langchain.com/public/6750972b-0849-4beb-a8bb-353d424ffade/r)\n", + ":::tip\n", + "\n", + "[LangSmith trace](https://smith.langchain.com/public/6750972b-0849-4beb-a8bb-353d424ffade/r)\n", + "\n", ":::" ] }, diff --git a/docs/docs/modules/chains/foundational/llm_chain.ipynb b/docs/docs/modules/chains/foundational/llm_chain.ipynb index 94a335aa920c3..8eeddacff4f2d 100644 --- a/docs/docs/modules/chains/foundational/llm_chain.ipynb +++ b/docs/docs/modules/chains/foundational/llm_chain.ipynb @@ -66,7 +66,11 @@ "source": [ "## [Legacy] LLMChain\n", "\n", - ":::note This is a legacy class, using LCEL as shown above is preffered.\n", + ":::note\n", + "\n", + "This is a legacy class, using LCEL as shown above is preferred.\n", + "\n", + ":::\n", "\n", "An `LLMChain` is a simple chain that adds some functionality around language models. It is used widely throughout LangChain, including in other chains and agents.\n", "\n", diff --git a/docs/docs/modules/chains/foundational/sequential_chains.ipynb b/docs/docs/modules/chains/foundational/sequential_chains.ipynb index 304f73c215986..09ff7956158c4 100644 --- a/docs/docs/modules/chains/foundational/sequential_chains.ipynb +++ b/docs/docs/modules/chains/foundational/sequential_chains.ipynb @@ -130,7 +130,11 @@ "source": [ "## [Legacy] SequentialChain\n", "\n", - ":::note This is a legacy class, using LCEL as shown above is preffered.\n", + ":::note\n", + "\n", + "This is a legacy class, using LCEL as shown above is preferred.\n", + "\n", + ":::\n", "\n", "Sequential chains allow you to connect multiple chains and compose them into pipelines that execute some specific scenario. There are two types of sequential chains:\n", "\n", diff --git a/docs/docs/modules/chains/foundational/transformation.ipynb b/docs/docs/modules/chains/foundational/transformation.ipynb index 5435cfb6ac3bc..10d36a53d548f 100644 --- a/docs/docs/modules/chains/foundational/transformation.ipynb +++ b/docs/docs/modules/chains/foundational/transformation.ipynb @@ -88,7 +88,11 @@ "source": [ "## [Legacy] TransformationChain\n", "\n", - ":::note This is a legacy class, using LCEL as shown above is preffered.\n", + ":::note\n", + "\n", + "This is a legacy class, using LCEL as shown above is preferred.\n", + "\n", + ":::\n", "\n", "This notebook showcases using a generic transformation chain." ] diff --git a/docs/docs/modules/data_connection/retrievers/index.ipynb b/docs/docs/modules/data_connection/retrievers/index.ipynb index 0d40ea7290c80..1f407fa9cecf2 100644 --- a/docs/docs/modules/data_connection/retrievers/index.ipynb +++ b/docs/docs/modules/data_connection/retrievers/index.ipynb @@ -17,7 +17,9 @@ "metadata": {}, "source": [ ":::info\n", + "\n", "Head to [Integrations](/docs/integrations/retrievers/) for documentation on built-in retriever integrations with 3rd-party tools.\n", + "\n", ":::\n", "\n", "A retriever is an interface that returns documents given an unstructured query. It is more general than a vector store.\n", diff --git a/docs/docs/modules/data_connection/retrievers/self_query.ipynb b/docs/docs/modules/data_connection/retrievers/self_query.ipynb index fcee4a9784e3c..1b9828436b780 100644 --- a/docs/docs/modules/data_connection/retrievers/self_query.ipynb +++ b/docs/docs/modules/data_connection/retrievers/self_query.ipynb @@ -8,7 +8,9 @@ "# Self-querying\n", "\n", ":::info\n", + "\n", "Head to [Integrations](/docs/integrations/retrievers/self_query) for documentation on vector stores with built-in support for self-querying.\n", + "\n", ":::\n", "\n", "A self-querying retriever is one that, as the name suggests, has the ability to query itself. Specifically, given any natural language query, the retriever uses a query-constructing LLM chain to write a structured query and then applies that structured query to its underlying VectorStore. This allows the retriever to not only use the user-input query for semantic similarity comparison with the contents of stored documents but to also extract filters from the user query on the metadata of stored documents and to execute those filters.\n", diff --git a/docs/docs/modules/model_io/chat/index.ipynb b/docs/docs/modules/model_io/chat/index.ipynb index 74e826ae9191f..2d0f1b617e6dc 100644 --- a/docs/docs/modules/model_io/chat/index.ipynb +++ b/docs/docs/modules/model_io/chat/index.ipynb @@ -17,7 +17,9 @@ "metadata": {}, "source": [ ":::info\n", + "\n", "Head to [Integrations](/docs/integrations/chat/) for documentation on built-in integrations with chat model providers.\n", + "\n", ":::\n", "\n", "Chat models are a variation on language models.\n", diff --git a/docs/docs/modules/model_io/llms/index.ipynb b/docs/docs/modules/model_io/llms/index.ipynb index 64c9449e3e6d1..3fcfb182e4e04 100644 --- a/docs/docs/modules/model_io/llms/index.ipynb +++ b/docs/docs/modules/model_io/llms/index.ipynb @@ -17,7 +17,9 @@ "metadata": {}, "source": [ ":::info\n", + "\n", "Head to [Integrations](/docs/integrations/llms/) for documentation on built-in integrations with LLM providers.\n", + "\n", ":::\n", "\n", "Large Language Models (LLMs) are a core component of LangChain.\n", diff --git a/docs/docs/use_cases/question_answering/index.ipynb b/docs/docs/use_cases/question_answering/index.ipynb index 8e5fbe2a2c14d..4bfc6b91c3a29 100644 --- a/docs/docs/use_cases/question_answering/index.ipynb +++ b/docs/docs/use_cases/question_answering/index.ipynb @@ -243,7 +243,10 @@ "id": "639dc31a-7f16-40f6-ba2a-20e7c2ecfe60", "metadata": {}, "source": [ - ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/1c6ca97e-445b-4d00-84b4-c7befcbc59fe/r) \n", + ":::tip\n", + "\n", + "Check out the [LangSmith trace](https://smith.langchain.com/public/1c6ca97e-445b-4d00-84b4-c7befcbc59fe/r) \n", + "\n", ":::" ] }, @@ -723,7 +726,10 @@ "id": "2c000e5f-2b7f-4eb9-8876-9f4b186b4a08", "metadata": {}, "source": [ - ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r) \n", + ":::tip\n", + "\n", + "Check out the [LangSmith trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r) \n", + "\n", ":::" ] }, @@ -800,7 +806,10 @@ "id": "94b952e6-dc4b-415b-9cf3-1ad333e48366", "metadata": {}, "source": [ - ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/da23c4d8-3b33-47fd-84df-a3a582eedf84/r) \n", + ":::tip\n", + "\n", + "Check out the [LangSmith trace](https://smith.langchain.com/public/da23c4d8-3b33-47fd-84df-a3a582eedf84/r) \n", + "\n", ":::" ] }, @@ -872,7 +881,10 @@ "id": "b437da5d-ca09-4d15-9be2-c35e5a1ace77", "metadata": {}, "source": [ - ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/007d7e01-cb62-4a84-8b71-b24767f953ee/r)\n", + ":::tip\n", + "\n", + "Check out the [LangSmith trace](https://smith.langchain.com/public/007d7e01-cb62-4a84-8b71-b24767f953ee/r)\n", + "\n", ":::" ] }, @@ -1050,7 +1062,10 @@ "id": "53263a65-4de2-4dd8-9291-6a8169ab6f1d", "metadata": {}, "source": [ - ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/b3001782-bb30-476a-886b-12da17ec258f/r) \n", + ":::tip\n", + "\n", + "Check out the [LangSmith trace](https://smith.langchain.com/public/b3001782-bb30-476a-886b-12da17ec258f/r) \n", + "\n", ":::" ] }, From 7bdfc43766e72e4b67512bd85119b1c797035b86 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Wed, 6 Dec 2023 10:05:43 -0800 Subject: [PATCH 2/2] core[patch], langchain[patch]: ByteStore (#14312) --- docs/.local_build.sh | 2 +- docs/docs/integrations/providers/redis.mdx | 18 + .../integrations/stores/file_system.ipynb | 100 ++++ docs/docs/integrations/stores/in_memory.ipynb | 73 +++ docs/docs/integrations/stores/index.mdx | 29 + docs/docs/integrations/stores/redis.ipynb | 83 +++ .../integrations/stores/upstash_redis.ipynb | 90 +++ .../text_embedding/caching_embeddings.ipynb | 517 ++---------------- docs/sidebars.js | 1 + libs/core/langchain_core/stores.py | 3 + libs/langchain/langchain/embeddings/cache.py | 4 +- .../langchain/retrievers/multi_vector.py | 4 +- libs/langchain/langchain/storage/__init__.py | 6 +- libs/langchain/langchain/storage/_lc_store.py | 6 +- .../langchain/storage/file_system.py | 4 +- libs/langchain/langchain/storage/in_memory.py | 29 +- libs/langchain/langchain/storage/redis.py | 4 +- .../langchain/storage/upstash_redis.py | 4 +- .../tests/unit_tests/storage/test_imports.py | 2 + 19 files changed, 497 insertions(+), 482 deletions(-) create mode 100644 docs/docs/integrations/stores/file_system.ipynb create mode 100644 docs/docs/integrations/stores/in_memory.ipynb create mode 100644 docs/docs/integrations/stores/index.mdx create mode 100644 docs/docs/integrations/stores/redis.ipynb create mode 100644 docs/docs/integrations/stores/upstash_redis.ipynb diff --git a/docs/.local_build.sh b/docs/.local_build.sh index 21d3fb3d49bc6..9a3f9a79f82d6 100755 --- a/docs/.local_build.sh +++ b/docs/.local_build.sh @@ -9,7 +9,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)" cd "${SCRIPT_DIR}" mkdir -p ../_dist -rsync -ruv --exclude node_modules . ../_dist +rsync -ruv --exclude node_modules --exclude api_reference --exclude .venv --exclude .docusaurus . ../_dist cd ../_dist poetry run python scripts/model_feat_table.py cp ../cookbook/README.md src/pages/cookbook.mdx diff --git a/docs/docs/integrations/providers/redis.mdx b/docs/docs/integrations/providers/redis.mdx index b9e4c67532112..bc1277d6d22ca 100644 --- a/docs/docs/integrations/providers/redis.mdx +++ b/docs/docs/integrations/providers/redis.mdx @@ -17,6 +17,24 @@ Install the Python SDK: pip install redis ``` +To run Redis locally, you can use Docker: + +```bash +docker run --name langchain-redis -d -p 6379:6379 redis redis-server --save 60 1 --loglevel warning +``` + +To stop the container: + +```bash +docker stop langchain-redis +``` + +And to start it again: + +```bash +docker start langchain-redis +``` + ## Wrappers All wrappers need a redis url connection string to connect to the database support either a stand alone Redis server diff --git a/docs/docs/integrations/stores/file_system.ipynb b/docs/docs/integrations/stores/file_system.ipynb new file mode 100644 index 0000000000000..b16e5f4051526 --- /dev/null +++ b/docs/docs/integrations/stores/file_system.ipynb @@ -0,0 +1,100 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Local Filesystem\n", + "sidebar_position: 3\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LocalFileStore\n", + "\n", + "The `LocalFileStore` is a persistent implementation of `ByteStore` that stores everything in a folder of your choosing." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[b'v1', b'v2']\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "from langchain.storage import LocalFileStore\n", + "\n", + "root_path = Path.cwd() / \"data\" # can also be a path set by a string\n", + "store = LocalFileStore(root_path)\n", + "\n", + "store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n", + "print(store.mget([\"k1\", \"k2\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's see which files exist in our `data` folder:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "k1 k2\n" + ] + } + ], + "source": [ + "!ls {root_path}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/stores/in_memory.ipynb b/docs/docs/integrations/stores/in_memory.ipynb new file mode 100644 index 0000000000000..03e2f2c5b638e --- /dev/null +++ b/docs/docs/integrations/stores/in_memory.ipynb @@ -0,0 +1,73 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: In Memory\n", + "sidebar_position: 2\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# InMemoryByteStore\n", + "\n", + "The `InMemoryByteStore` is a non-persistent implementation of `ByteStore` that stores everything in a Python dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[b'v1', b'v2']\n" + ] + } + ], + "source": [ + "from langchain.storage import InMemoryByteStore\n", + "\n", + "store = InMemoryByteStore()\n", + "\n", + "store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n", + "print(store.mget([\"k1\", \"k2\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/stores/index.mdx b/docs/docs/integrations/stores/index.mdx new file mode 100644 index 0000000000000..5aa9abf1f3a6a --- /dev/null +++ b/docs/docs/integrations/stores/index.mdx @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_class_name: hidden +--- + +# Stores + +In many different applications, having some sort of key-value storage is helpful. +In this section, we will look at a few different ways to store key-value pairs +using implementations of the `ByteStore` interface. + +## Features (natively supported) + +All `ByteStore`s support the following functions, which are used for modifying +**m**ultiple key-value pairs at once: + +- `mget(key: Sequence[str]) -> List[Optional[bytes]]`: get the contents of multiple keys, returning `None` if the key does not exist +- `mset(key_value_pairs: Sequence[Tuple[str, bytes]]) -> None`: set the contents of multiple keys +- `mdelete(key: Sequence[str]) -> None`: delete multiple keys +- `yield_keys(prefix: Optional[str] = None) -> Iterator[str]`: yield all keys in the store, optionally filtering by a prefix + +## How to pick one + +`ByteStore`s are designed to be interchangeable. By default, most dependent integrations +use the `InMemoryByteStore`, which is a simple in-memory key-value store. + +However, if you start having other requirements, like massive scalability or persistence, +you can swap out the `ByteStore` implementation with one of the other ones documented +in this section. diff --git a/docs/docs/integrations/stores/redis.ipynb b/docs/docs/integrations/stores/redis.ipynb new file mode 100644 index 0000000000000..251454b5e2bb3 --- /dev/null +++ b/docs/docs/integrations/stores/redis.ipynb @@ -0,0 +1,83 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Redis\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RedisStore\n", + "\n", + "The `RedisStore` is an implementation of `ByteStore` that stores everything in your Redis instance.\n", + "\n", + "To configure Redis, follow our [Redis guide](/docs/integrations/providers/redis)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install redis" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[b'v1', b'v2']\n" + ] + } + ], + "source": [ + "from langchain.storage import RedisStore\n", + "\n", + "store = RedisStore(redis_url=\"redis://localhost:6379\")\n", + "\n", + "store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n", + "print(store.mget([\"k1\", \"k2\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/stores/upstash_redis.ipynb b/docs/docs/integrations/stores/upstash_redis.ipynb new file mode 100644 index 0000000000000..b070728907f4b --- /dev/null +++ b/docs/docs/integrations/stores/upstash_redis.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Upstash Redis\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# UpstashRedisByteStore\n", + "\n", + "The `UpstashRedisStore` is an implementation of `ByteStore` that stores everything in your Upstash-hosted Redis instance.\n", + "\n", + "To use the base `RedisStore` instead, see [this guide](./redis)\n", + "\n", + "To configure Upstash Redis, follow our [Upstash guide](/docs/integrations/providers/upstash)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install upstash-redis" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[b'v1', b'v2']\n" + ] + } + ], + "source": [ + "from langchain.storage import UpstashRedisByteStore\n", + "from upstash_redis import Redis\n", + "\n", + "URL = \"\"\n", + "TOKEN = \"\"\n", + "\n", + "redis_client = Redis(url=URL, token=TOKEN)\n", + "store = UpstashRedisByteStore(client=redis_client, ttl=None, namespace=\"test-ns\")\n", + "\n", + "store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n", + "print(store.mget([\"k1\", \"k2\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb b/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb index 3b2327165013d..b3c4fd6340e3b 100644 --- a/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb +++ b/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb @@ -1,11 +1,21 @@ { "cells": [ + { + "cell_type": "raw", + "id": "8baf0f21", + "metadata": {}, + "source": [ + "--\n", + "sidebar_label: Caching\n", + "--" + ] + }, { "cell_type": "markdown", "id": "bf4061ce", "metadata": {}, "source": [ - "# Caching\n", + "# CacheBackedEmbeddings\n", "\n", "Embeddings can be stored or temporarily cached to avoid needing to recompute them.\n", "\n", @@ -15,7 +25,7 @@ "The main supported way to initialized a `CacheBackedEmbeddings` is `from_bytes_store`. This takes in the following parameters:\n", "\n", "- underlying_embedder: The embedder to use for embedding.\n", - "- document_embedding_cache: The cache to use for storing document embeddings.\n", + "- document_embedding_cache: Any [`ByteStore`](/docs/integrations/stores/) for caching document embeddings.\n", "- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n", "\n", "**Attention**: Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models." @@ -23,20 +33,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "id": "a463c3c2-749b-40d1-a433-84f68a1cd1c7", "metadata": { "tags": [] }, "outputs": [], "source": [ - "from langchain.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings\n", - "from langchain.storage import (\n", - " InMemoryStore,\n", - " LocalFileStore,\n", - " RedisStore,\n", - " UpstashRedisStore,\n", - ")" + "from langchain.embeddings import CacheBackedEmbeddings" ] }, { @@ -44,7 +48,7 @@ "id": "9ddf07dd-3e72-41de-99d4-78e9521e272f", "metadata": {}, "source": [ - "## Using with a vector store\n", + "## Using with a Vector Store\n", "\n", "First, let's see an example that uses the local file system for storing embeddings and uses FAISS vector store for retrieval." ] @@ -52,36 +56,32 @@ { "cell_type": "code", "execution_count": null, - "id": "9e4314d8-88ef-4f52-81ae-0be771168bb6", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import TextLoader\n", - "from langchain.embeddings.openai import OpenAIEmbeddings\n", - "from langchain.text_splitter import CharacterTextSplitter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e751f26-9b5b-4c10-843a-d784b5ea8538", + "id": "50183825", "metadata": {}, "outputs": [], "source": [ - "underlying_embeddings = OpenAIEmbeddings()" + "!pip install openai faiss-cpu" ] }, { "cell_type": "code", - "execution_count": null, - "id": "30743664-38f5-425d-8216-772b64e7f348", + "execution_count": 3, + "id": "9e4314d8-88ef-4f52-81ae-0be771168bb6", "metadata": {}, "outputs": [], "source": [ - "fs = LocalFileStore(\"./cache/\")\n", + "from langchain.document_loaders import TextLoader\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.storage import LocalFileStore\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.vectorstores import FAISS\n", + "\n", + "underlying_embeddings = OpenAIEmbeddings()\n", + "\n", + "store = LocalFileStore(\"./cache/\")\n", "\n", "cached_embedder = CacheBackedEmbeddings.from_bytes_store(\n", - " underlying_embeddings, fs, namespace=underlying_embeddings.model\n", + " underlying_embeddings, store, namespace=underlying_embeddings.model\n", ")" ] }, @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "f9ad627f-ced2-4277-b336-2434f22f2c8a", "metadata": {}, "outputs": [ @@ -105,13 +105,13 @@ "[]" ] }, - "execution_count": 9, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "list(fs.yield_keys())" + "list(store.yield_keys())" ] }, { @@ -124,12 +124,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "cf958ac2-e60e-4668-b32c-8bb2d78b3c61", "metadata": {}, "outputs": [], "source": [ - "raw_documents = TextLoader(\"../state_of_the_union.txt\").load()\n", + "raw_documents = TextLoader(\"../../state_of_the_union.txt\").load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "documents = text_splitter.split_documents(raw_documents)" ] @@ -144,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "3a1d7bb8-3b72-4bb5-9013-cf7729caca61", "metadata": {}, "outputs": [ @@ -152,8 +152,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 608 ms, sys: 58.9 ms, total: 667 ms\n", - "Wall time: 1.3 s\n" + "CPU times: user 218 ms, sys: 29.7 ms, total: 248 ms\n", + "Wall time: 1.02 s\n" ] } ], @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "714cb2e2-77ba-41a8-bb83-84e75342af2d", "metadata": {}, "outputs": [ @@ -180,8 +180,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 33.6 ms, sys: 3.96 ms, total: 37.6 ms\n", - "Wall time: 36.8 ms\n" + "CPU times: user 15.7 ms, sys: 2.22 ms, total: 18 ms\n", + "Wall time: 17.2 ms\n" ] } ], @@ -200,458 +200,55 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "f2ca32dd-3712-4093-942b-4122f3dc8a8e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['text-embedding-ada-002614d7cf6-46f1-52fa-9d3a-740c39e7a20e',\n", - " 'text-embedding-ada-0020fc1ede2-407a-5e14-8f8f-5642214263f5',\n", + "['text-embedding-ada-00217a6727d-8916-54eb-b196-ec9c9d6ca472',\n", + " 'text-embedding-ada-0025fc0d904-bd80-52da-95c9-441015bfb438',\n", " 'text-embedding-ada-002e4ad20ef-dfaa-5916-9459-f90c6d8e8159',\n", - " 'text-embedding-ada-002a5ef11e4-0474-5725-8d80-81c91943b37f',\n", - " 'text-embedding-ada-00281426526-23fe-58be-9e84-6c7c72c8ca9a']" + " 'text-embedding-ada-002ed199159-c1cd-5597-9757-f80498e8f17b',\n", + " 'text-embedding-ada-0021297d37a-2bc1-5e19-bf13-6c950f075062']" ] }, - "execution_count": 13, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "list(fs.yield_keys())[:5]" + "list(store.yield_keys())[:5]" ] }, { "cell_type": "markdown", - "id": "564c9801-29f0-4452-aeac-527382e2c0e8", + "id": "c1a7fafd", "metadata": {}, "source": [ - "## In Memory\n", + "# Swapping the `ByteStore`\n", "\n", - "This section shows how to set up an in memory cache for embeddings. This type of cache is primarily \n", - "useful for unit tests or prototyping. Do **not** use this cache if you need to actually store the embeddings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13bd1c5b-b7ba-4394-957c-7d5b5a841972", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "store = InMemoryStore()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9d99885f-99e1-498c-904d-6db539ac9466", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "underlying_embeddings = OpenAIEmbeddings()\n", - "embedder = CacheBackedEmbeddings.from_bytes_store(\n", - " underlying_embeddings, store, namespace=underlying_embeddings.model\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "682eb5d4-0b7a-4dac-b8fb-3de4ca6e421c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 10.9 ms, sys: 916 µs, total: 11.8 ms\n", - "Wall time: 159 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])" - ] - }, - { - "cell_type": "markdown", - "id": "95233026-147f-49d1-bd87-e1e8b88ebdbc", - "metadata": {}, - "source": [ - "The second time we try to embed the embedding time is only 2 ms because the embeddings are looked up in the cache." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f819c3ff-a212-4d06-a5f7-5eb1435c1feb", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 1.67 ms, sys: 342 µs, total: 2.01 ms\n", - "Wall time: 2.01 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "embeddings_from_cache = embedder.embed_documents([\"hello\", \"goodbye\"])" + "In order to use a different `ByteStore`, just use it when creating your `CacheBackedEmbeddings`. Below, we create an equivalent cached embeddings object, except using the non-persistent `InMemoryByteStore` instead:" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ec38fb72-90a9-4687-a483-c62c87d1f4dd", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "embeddings == embeddings_from_cache" - ] - }, - { - "cell_type": "markdown", - "id": "f6cbe100-8587-4830-b207-fb8b524a9854", + "execution_count": 9, + "id": "336a0538", "metadata": {}, - "source": [ - "## File system\n", - "\n", - "This section covers how to use a file system store." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0070271-0809-4528-97e0-2a88216846f3", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "fs = LocalFileStore(\"./test_cache/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b20e9fe-f57f-4d7c-9f81-105c5f8726f4", - "metadata": { - "tags": [] - }, "outputs": [], "source": [ - "embedder2 = CacheBackedEmbeddings.from_bytes_store(\n", - " underlying_embeddings, fs, namespace=underlying_embeddings.model\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "630515fd-bf5c-4d9c-a404-9705308f3a2c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 6.89 ms, sys: 4.89 ms, total: 11.8 ms\n", - "Wall time: 184 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "embeddings = embedder2.embed_documents([\"hello\", \"goodbye\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30e6bb87-42c9-4d08-88ac-0d22c9c449a1", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 0 ns, sys: 3.24 ms, total: 3.24 ms\n", - "Wall time: 2.84 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "embeddings = embedder2.embed_documents([\"hello\", \"goodbye\"])" - ] - }, - { - "cell_type": "markdown", - "id": "12ed5a45-8352-4e0f-8583-5537397f53c0", - "metadata": {}, - "source": [ - "Here are the embeddings that have been persisted to the directory `./test_cache`. \n", - "\n", - "Notice that the embedder takes a namespace parameter." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "658e2914-05e9-44a3-a8fe-3fe17ca84039", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n", - " 'text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(fs.yield_keys())" - ] - }, - { - "cell_type": "markdown", - "id": "904c1d47", - "metadata": {}, - "source": [ - "## Upstash Redis Store" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0f9f212", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.storage.upstash_redis import UpstashRedisStore" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45bf62e4", - "metadata": {}, - "outputs": [], - "source": [ - "from upstash_redis import Redis\n", + "from langchain.embeddings import CacheBackedEmbeddings\n", + "from langchain.storage import InMemoryByteStore\n", "\n", - "URL = \"\"\n", - "TOKEN = \"\"\n", + "store = InMemoryByteStore()\n", "\n", - "redis_client = Redis(url=URL, token=TOKEN)\n", - "store = UpstashRedisStore(client=redis_client, ttl=None, namespace=\"test-ns\")\n", - "\n", - "underlying_embeddings = OpenAIEmbeddings()\n", - "embedder = CacheBackedEmbeddings.from_bytes_store(\n", - " underlying_embeddings, store, namespace=underlying_embeddings.model\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3eac3504", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "embeddings = embedder.embed_documents([\"welcome\", \"goodbye\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "085dcd30", - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "embeddings = embedder.embed_documents([\"welcome\", \"goodbye\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3570e83f", - "metadata": {}, - "outputs": [], - "source": [ - "list(store.yield_keys())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d7dc8e51", - "metadata": {}, - "outputs": [], - "source": [ - "list(store.client.scan(0))" - ] - }, - { - "cell_type": "markdown", - "id": "cd5f5a96-6ffa-429d-aa82-00b3f6532871", - "metadata": {}, - "source": [ - "## Redis Store\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4879c134-141f-48a0-acfe-7d6f30253af0", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.storage import RedisStore" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b2bb9a0-6549-4487-8532-29ab4ab7336f", - "metadata": {}, - "outputs": [], - "source": [ - "# For cache isolation can use a separate DB\n", - "# Or additional namepace\n", - "store = RedisStore(\n", - " redis_url=\"redis://localhost:6379\",\n", - " client_kwargs={\"db\": 2},\n", - " namespace=\"embedding_caches\",\n", - ")\n", - "\n", - "underlying_embeddings = OpenAIEmbeddings()\n", - "embedder = CacheBackedEmbeddings.from_bytes_store(\n", + "cached_embedder = CacheBackedEmbeddings.from_bytes_store(\n", " underlying_embeddings, store, namespace=underlying_embeddings.model\n", ")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eca3cb99-2bb3-49d5-81f9-1dee03da4b8c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 3.99 ms, sys: 0 ns, total: 3.99 ms\n", - "Wall time: 3.5 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "317ba5d8-89f9-462c-b807-ad4ef26e518b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 2.47 ms, sys: 767 µs, total: 3.24 ms\n", - "Wall time: 2.75 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a540317-5142-4491-9062-a097932b56e3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n", - " 'text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(store.yield_keys())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd9b0d4a-f816-4dce-9dde-cde1ad9a65fb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[b'embedding_caches/text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n", - " b'embedding_caches/text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(store.client.scan_iter())" - ] } ], "metadata": { @@ -670,7 +267,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/sidebars.js b/docs/sidebars.js index f5062b100f5c6..8468b3216c6ac 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -111,6 +111,7 @@ module.exports = { { type: "category", label: "Callbacks", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/callbacks" }], link: {type: "generated-index", slug: "integrations/callbacks" }}, { type: "category", label: "Chat loaders", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/chat_loaders" }], link: {type: "generated-index", slug: "integrations/chat_loaders" }}, { type: "category", label: "Adapters", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/adapters" }], link: {type: "generated-index", slug: "integrations/adapters" }}, + { type: "category", label: "Stores", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/stores" }], link: {type: "doc", id: "integrations/stores/index" }}, ], link: { type: 'generated-index', diff --git a/libs/core/langchain_core/stores.py b/libs/core/langchain_core/stores.py index bae5adc2b8ef4..8363fca3891f9 100644 --- a/libs/core/langchain_core/stores.py +++ b/libs/core/langchain_core/stores.py @@ -51,3 +51,6 @@ def yield_keys( This method is allowed to return an iterator over either K or str depending on what makes more sense for the given store. """ + + +ByteStore = BaseStore[str, bytes] diff --git a/libs/langchain/langchain/embeddings/cache.py b/libs/langchain/langchain/embeddings/cache.py index 75f1992e867d9..e578129618117 100644 --- a/libs/langchain/langchain/embeddings/cache.py +++ b/libs/langchain/langchain/embeddings/cache.py @@ -15,7 +15,7 @@ from typing import Callable, List, Sequence, Union, cast from langchain_core.embeddings import Embeddings -from langchain_core.stores import BaseStore +from langchain_core.stores import BaseStore, ByteStore from langchain.storage.encoder_backed import EncoderBackedStore @@ -151,7 +151,7 @@ def embed_query(self, text: str) -> List[float]: def from_bytes_store( cls, underlying_embeddings: Embeddings, - document_embedding_cache: BaseStore[str, bytes], + document_embedding_cache: ByteStore, *, namespace: str = "", ) -> CacheBackedEmbeddings: diff --git a/libs/langchain/langchain/retrievers/multi_vector.py b/libs/langchain/langchain/retrievers/multi_vector.py index dcc81b554c363..267095f821c50 100644 --- a/libs/langchain/langchain/retrievers/multi_vector.py +++ b/libs/langchain/langchain/retrievers/multi_vector.py @@ -3,7 +3,7 @@ from langchain_core.documents import Document from langchain_core.retrievers import BaseRetriever -from langchain_core.stores import BaseStore +from langchain_core.stores import BaseStore, ByteStore from langchain_core.vectorstores import VectorStore from langchain.callbacks.manager import CallbackManagerForRetrieverRun @@ -38,7 +38,7 @@ def __init__( *, vectorstore: VectorStore, docstore: Optional[BaseStore[str, Document]] = None, - base_store: Optional[BaseStore[str, bytes]] = None, + base_store: Optional[ByteStore] = None, id_key: str = "doc_id", search_kwargs: Optional[dict] = None, search_type: SearchType = SearchType.similarity, diff --git a/libs/langchain/langchain/storage/__init__.py b/libs/langchain/langchain/storage/__init__.py index bf95c8b9d3989..5722213f93db9 100644 --- a/libs/langchain/langchain/storage/__init__.py +++ b/libs/langchain/langchain/storage/__init__.py @@ -9,16 +9,18 @@ from langchain.storage._lc_store import create_kv_docstore, create_lc_store from langchain.storage.encoder_backed import EncoderBackedStore from langchain.storage.file_system import LocalFileStore -from langchain.storage.in_memory import InMemoryStore +from langchain.storage.in_memory import InMemoryByteStore, InMemoryStore from langchain.storage.redis import RedisStore -from langchain.storage.upstash_redis import UpstashRedisStore +from langchain.storage.upstash_redis import UpstashRedisByteStore, UpstashRedisStore __all__ = [ "EncoderBackedStore", "InMemoryStore", + "InMemoryByteStore", "LocalFileStore", "RedisStore", "create_lc_store", "create_kv_docstore", + "UpstashRedisByteStore", "UpstashRedisStore", ] diff --git a/libs/langchain/langchain/storage/_lc_store.py b/libs/langchain/langchain/storage/_lc_store.py index c38f66cb72e6b..3574749e7590b 100644 --- a/libs/langchain/langchain/storage/_lc_store.py +++ b/libs/langchain/langchain/storage/_lc_store.py @@ -3,7 +3,7 @@ from langchain_core.documents import Document from langchain_core.load import Serializable, dumps, loads -from langchain_core.stores import BaseStore +from langchain_core.stores import BaseStore, ByteStore from langchain.storage.encoder_backed import EncoderBackedStore @@ -42,7 +42,7 @@ def _identity(x: str) -> str: def create_lc_store( - store: BaseStore[str, bytes], + store: ByteStore, *, key_encoder: Optional[Callable[[str], str]] = None, ) -> BaseStore[str, Serializable]: @@ -64,7 +64,7 @@ def create_lc_store( def create_kv_docstore( - store: BaseStore[str, bytes], + store: ByteStore, *, key_encoder: Optional[Callable[[str], str]] = None, ) -> BaseStore[str, Document]: diff --git a/libs/langchain/langchain/storage/file_system.py b/libs/langchain/langchain/storage/file_system.py index dadd3720bda0b..720acf085a133 100644 --- a/libs/langchain/langchain/storage/file_system.py +++ b/libs/langchain/langchain/storage/file_system.py @@ -2,12 +2,12 @@ from pathlib import Path from typing import Iterator, List, Optional, Sequence, Tuple, Union -from langchain_core.stores import BaseStore +from langchain_core.stores import ByteStore from langchain.storage.exceptions import InvalidKeyException -class LocalFileStore(BaseStore[str, bytes]): +class LocalFileStore(ByteStore): """BaseStore interface that works on the local file system. Examples: diff --git a/libs/langchain/langchain/storage/in_memory.py b/libs/langchain/langchain/storage/in_memory.py index 60d8ad5516c3e..03679a34909d9 100644 --- a/libs/langchain/langchain/storage/in_memory.py +++ b/libs/langchain/langchain/storage/in_memory.py @@ -3,12 +3,24 @@ This is a simple implementation of the BaseStore using a dictionary that is useful primarily for unit testing purposes. """ -from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple +from typing import ( + Any, + Dict, + Generic, + Iterator, + List, + Optional, + Sequence, + Tuple, + TypeVar, +) from langchain_core.stores import BaseStore +V = TypeVar("V") -class InMemoryStore(BaseStore[str, Any]): + +class InMemoryBaseStore(BaseStore[str, V], Generic[V]): """In-memory implementation of the BaseStore using a dictionary. Attributes: @@ -34,9 +46,9 @@ class InMemoryStore(BaseStore[str, Any]): def __init__(self) -> None: """Initialize an empty store.""" - self.store: Dict[str, Any] = {} + self.store: Dict[str, V] = {} - def mget(self, keys: Sequence[str]) -> List[Optional[Any]]: + def mget(self, keys: Sequence[str]) -> List[Optional[V]]: """Get the values associated with the given keys. Args: @@ -48,7 +60,7 @@ def mget(self, keys: Sequence[str]) -> List[Optional[Any]]: """ return [self.store.get(key) for key in keys] - def mset(self, key_value_pairs: Sequence[Tuple[str, Any]]) -> None: + def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: """Set the values for the given keys. Args: @@ -67,7 +79,8 @@ def mdelete(self, keys: Sequence[str]) -> None: keys (Sequence[str]): A sequence of keys to delete. """ for key in keys: - self.store.pop(key, None) + if key in self.store: + del self.store[key] def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]: """Get an iterator over keys that match the given prefix. @@ -84,3 +97,7 @@ def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]: for key in self.store.keys(): if key.startswith(prefix): yield key + + +InMemoryStore = InMemoryBaseStore[Any] +InMemoryByteStore = InMemoryBaseStore[bytes] diff --git a/libs/langchain/langchain/storage/redis.py b/libs/langchain/langchain/storage/redis.py index d213f8cc051cb..3196ca8dfd7f8 100644 --- a/libs/langchain/langchain/storage/redis.py +++ b/libs/langchain/langchain/storage/redis.py @@ -1,11 +1,11 @@ from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast -from langchain_core.stores import BaseStore +from langchain_core.stores import ByteStore from langchain.utilities.redis import get_client -class RedisStore(BaseStore[str, bytes]): +class RedisStore(ByteStore): """BaseStore implementation using Redis as the underlying store. Examples: diff --git a/libs/langchain/langchain/storage/upstash_redis.py b/libs/langchain/langchain/storage/upstash_redis.py index 7dc436ce33e2c..7fc49b49c768c 100644 --- a/libs/langchain/langchain/storage/upstash_redis.py +++ b/libs/langchain/langchain/storage/upstash_redis.py @@ -1,7 +1,7 @@ from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast from langchain_core._api.deprecation import deprecated -from langchain_core.stores import BaseStore +from langchain_core.stores import BaseStore, ByteStore class _UpstashRedisStore(BaseStore[str, str]): @@ -130,7 +130,7 @@ class UpstashRedisStore(_UpstashRedisStore): """ -class UpstashRedisByteStore(BaseStore[str, bytes]): +class UpstashRedisByteStore(ByteStore): """ BaseStore implementation using Upstash Redis as the underlying store to store raw bytes. diff --git a/libs/langchain/tests/unit_tests/storage/test_imports.py b/libs/langchain/tests/unit_tests/storage/test_imports.py index 35554231b74ac..8c603c11dc9fb 100644 --- a/libs/langchain/tests/unit_tests/storage/test_imports.py +++ b/libs/langchain/tests/unit_tests/storage/test_imports.py @@ -3,10 +3,12 @@ EXPECTED_ALL = [ "EncoderBackedStore", "InMemoryStore", + "InMemoryByteStore", "LocalFileStore", "RedisStore", "create_lc_store", "create_kv_docstore", + "UpstashRedisByteStore", "UpstashRedisStore", ]