diff --git a/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py b/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py index 791e1f570afcd..d629ecf019cff 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py +++ b/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py @@ -15,16 +15,12 @@ class ReadWriteTestSuite(BaseStandardTests): - """Test suite for checking the read-write API of a vectorstore. - - This test suite verifies the basic read-write API of a vectorstore. - - The test suite is designed for synchronous vectorstores. + """Test suite for checking the synchronous read-write API of a vectorstore. Implementers should subclass this test suite and provide a fixture that returns an empty vectorstore for each test. - The fixture should use the `get_embeddings` method to get a pre-defined + The fixture should use the ``get_embeddings`` method to get a pre-defined embeddings model that should be used for this test suite. Here is a template: @@ -109,7 +105,7 @@ def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None: .. dropdown:: Troubleshooting If this test fails, check that the test class (i.e., sub class of - ReadWriteTestSuite) initializes an empty vector store in the + ``ReadWriteTestSuite``) initializes an empty vector store in the ``vectorestore`` fixture. """ assert vectorstore.similarity_search("foo", k=1) == [] @@ -151,8 +147,8 @@ def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None: .. dropdown:: Troubleshooting If this test fails, check that the test class (i.e., sub class of - ReadWriteTestSuite) correctly clears the vector store in the ``finally`` - block. + ``ReadWriteTestSuite``) correctly clears the vector store in the + ``finally`` block. """ assert vectorstore.similarity_search("foo", k=1) == [] @@ -181,7 +177,7 @@ def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None: .. dropdown:: Troubleshooting If this test fails, check that ``delete`` correctly removes multiple - documents when givena list of IDs. + documents when given a list of IDs. """ documents = [ Document(page_content="foo", metadata={"id": 1}), @@ -388,18 +384,68 @@ def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None class AsyncReadWriteTestSuite(BaseStandardTests): - """Test suite for checking the **async** read-write API of a vectorstore. - - This test suite verifies the basic read-write API of a vectorstore. - - The test suite is designed for asynchronous vectorstores. + """Test suite for checking the async read-write API of a vectorstore. Implementers should subclass this test suite and provide a fixture that returns an empty vectorstore for each test. - The fixture should use the `get_embeddings` method to get a pre-defined + The fixture should use the ``get_embeddings`` method to get a pre-defined embeddings model that should be used for this test suite. - """ + + Here is a template: + + .. code-block:: python + + from typing import AsyncGenerator + + import pytest + from langchain_core.vectorstores import VectorStore + from langchain_parrot_link.vectorstores import ParrotVectorStore + from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite + + + class TestAsync(AsyncReadWriteTestSuite): + @pytest.fixture() + def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore + \"\"\"Get an empty vectorstore.\"\"\" + store = ParrotVectorStore(self.get_embeddings()) + # note: store should be EMPTY at this point + # if you need to delete data, you may do so here + try: + yield store + finally: + # cleanup operations, or deleting data + pass + + In the fixture, before the ``yield`` we instantiate an empty vector store. In the + ``finally`` block, we call whatever logic is necessary to bring the vector store + to a clean state. + + Example: + + .. code-block:: python + + from typing import AsyncGenerator, Generator + + import pytest + from langchain_core.vectorstores import VectorStore + from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite + + from langchain_chroma import Chroma + + + class TestAsync(AsyncReadWriteTestSuite): + @pytest.fixture() + async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore + \"\"\"Get an empty vectorstore for unit tests.\"\"\" + store = Chroma(embedding_function=self.get_embeddings()) + try: + yield store + finally: + store.delete_collection() + pass + + """ # noqa: E501 @abstractmethod @pytest.fixture @@ -411,17 +457,39 @@ async def vectorstore(self) -> VectorStore: @staticmethod def get_embeddings() -> Embeddings: - """A pre-defined embeddings model that should be used for this test.""" + """A pre-defined embeddings model that should be used for this test. + + This currently uses ``DeterministicFakeEmbedding`` from ``langchain-core``, + which uses numpy to generate random numbers based on a hash of the input text. + + The resulting embeddings are not meaningful, but they are deterministic. + """ return DeterministicFakeEmbedding( size=EMBEDDING_SIZE, ) async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None: - """Test that the vectorstore is empty.""" + """Test that the vectorstore is empty. + + .. dropdown:: Troubleshooting + + If this test fails, check that the test class (i.e., sub class of + ``AsyncReadWriteTestSuite``) initializes an empty vector store in the + ``vectorestore`` fixture. + """ assert await vectorstore.asimilarity_search("foo", k=1) == [] async def test_add_documents(self, vectorstore: VectorStore) -> None: - """Test adding documents into the vectorstore.""" + """Test adding documents into the vectorstore. + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. We correctly initialize an empty vector store in the ``vectorestore`` fixture. + 2. Calling ``.asimilarity_search`` for the top ``k`` similar documents does not threshold by score. + 3. We do not mutate the original document object when adding it to the vector store (e.g., by adding an ID). + """ # noqa: E501 original_documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -445,11 +513,24 @@ async def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None: This just verifies that the fixture is set up properly to be empty after each test. + + .. dropdown:: Troubleshooting + + If this test fails, check that the test class (i.e., sub class of + ``AsyncReadWriteTestSuite``) correctly clears the vector store in the + ``finally`` block. """ assert await vectorstore.asimilarity_search("foo", k=1) == [] async def test_deleting_documents(self, vectorstore: VectorStore) -> None: - """Test deleting documents from the vectorstore.""" + """Test deleting documents from the vectorstore. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``aadd_documents`` preserves identifiers + passed in through ``ids``, and that ``delete`` correctly removes + documents. + """ documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -461,7 +542,13 @@ async def test_deleting_documents(self, vectorstore: VectorStore) -> None: assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")] async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None: - """Test that we can delete several documents at once.""" + """Test that we can delete several documents at once. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``adelete`` correctly removes multiple + documents when given a list of IDs. + """ documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -474,14 +561,27 @@ async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None: assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")] async def test_delete_missing_content(self, vectorstore: VectorStore) -> None: - """Deleting missing content should not raise an exception.""" + """Deleting missing content should not raise an exception. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``adelete`` does not raise an exception + when deleting IDs that do not exist. + """ await vectorstore.adelete(["1"]) await vectorstore.adelete(["1", "2", "3"]) async def test_add_documents_with_ids_is_idempotent( self, vectorstore: VectorStore ) -> None: - """Adding by ID should be idempotent.""" + """Adding by ID should be idempotent. + + .. dropdown:: Troubleshooting + + If this test fails, check that adding the same document twice with the + same IDs has the same effect as adding it once (i.e., it does not + duplicate the documents). + """ documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -497,7 +597,14 @@ async def test_add_documents_with_ids_is_idempotent( async def test_add_documents_by_id_with_mutation( self, vectorstore: VectorStore ) -> None: - """Test that we can overwrite by ID using add_documents.""" + """Test that we can overwrite by ID using add_documents. + + .. dropdown:: Troubleshooting + + If this test fails, check that when ``aadd_documents`` is called with an + ID that already exists in the vector store, the content is updated + rather than duplicated. + """ documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -526,7 +633,26 @@ async def test_add_documents_by_id_with_mutation( ] async def test_get_by_ids(self, vectorstore: VectorStore) -> None: - """Test get by IDs.""" + """Test get by IDs. + + This test requires that ``get_by_ids`` be implemented on the vector store. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``get_by_ids`` is implemented and returns + documents in the same order as the IDs passed in. + + .. note:: + ``get_by_ids`` was added to the ``VectorStore`` interface in + ``langchain-core`` version 0.2.11. If difficult to implement, this + test can be skipped using a pytest ``xfail`` on the test class: + + .. code-block:: python + + @pytest.mark.xfail(reason=("get_by_ids not implemented.")) + async def test_get_by_ids(self, vectorstore: VectorStore) -> None: + await super().test_get_by_ids(vectorstore) + """ documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -539,12 +665,49 @@ async def test_get_by_ids(self, vectorstore: VectorStore) -> None: ] async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None: - """Test get by IDs with missing IDs.""" + """Test get by IDs with missing IDs. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``get_by_ids`` is implemented and does not + raise an exception when given IDs that do not exist. + + .. note:: + ``get_by_ids`` was added to the ``VectorStore`` interface in + ``langchain-core`` version 0.2.11. If difficult to implement, this + test can be skipped using a pytest ``xfail`` on the test class: + + .. code-block:: python + + @pytest.mark.xfail(reason=("get_by_ids not implemented.")) + async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None: + await super().test_get_by_ids_missing(vectorstore) + """ # noqa: E501 # This should not raise an exception assert await vectorstore.aget_by_ids(["1", "2", "3"]) == [] async def test_add_documents_documents(self, vectorstore: VectorStore) -> None: - """Run add_documents tests.""" + """Run add_documents tests. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``get_by_ids`` is implemented and returns + documents in the same order as the IDs passed in. + + Check also that ``aadd_documents`` will correctly generate string IDs if + none are provided. + + .. note:: + ``get_by_ids`` was added to the ``VectorStore`` interface in + ``langchain-core`` version 0.2.11. If difficult to implement, this + test can be skipped using a pytest ``xfail`` on the test class: + + .. code-block:: python + + @pytest.mark.xfail(reason=("get_by_ids not implemented.")) + async def test_add_documents_documents(self, vectorstore: VectorStore) -> None: + await super().test_add_documents_documents(vectorstore) + """ # noqa: E501 documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), @@ -558,7 +721,29 @@ async def test_add_documents_documents(self, vectorstore: VectorStore) -> None: async def test_add_documents_with_existing_ids( self, vectorstore: VectorStore ) -> None: - """Test that add_documentsing with existing IDs is idempotent.""" + """Test that add_documents with existing IDs is idempotent. + + .. dropdown:: Troubleshooting + + If this test fails, check that ``get_by_ids`` is implemented and returns + documents in the same order as the IDs passed in. + + This test also verifies that: + + 1. IDs specified in the ``Document.id`` field are assigned when adding documents. + 2. If some documents include IDs and others don't string IDs are generated for the latter. + + .. note:: + ``get_by_ids`` was added to the ``VectorStore`` interface in + ``langchain-core`` version 0.2.11. If difficult to implement, this + test can be skipped using a pytest ``xfail`` on the test class: + + .. code-block:: python + + @pytest.mark.xfail(reason=("get_by_ids not implemented.")) + async def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None: + await super().test_add_documents_with_existing_ids(vectorstore) + """ # noqa: E501 documents = [ Document(id="foo", page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}),