langchain-ai · ccurme · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py
@@ -16,6 +16,7 @@
     Iterable,
     List,
     Optional,
+    Sequence,
     Tuple,
     Type,
     Union,
@@ -517,6 +518,11 @@ def add_texts(
         """
         if ids is None:
             ids = [str(uuid.uuid4()) for _ in texts]
+        else:
+            # Assign strings to any null IDs
+            for idx, _id in enumerate(ids):
+                if _id is None:
+                    ids[idx] = str(uuid.uuid4())
         embeddings = None
         texts = list(texts)
         if self._embedding_function is not None:
@@ -1028,6 +1034,38 @@ def get(
 
         return self._collection.get(**kwargs)  # type: ignore
 
+    def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
+        """Get documents by their IDs.
+
+        The returned documents are expected to have the ID field set to the ID of the
+        document in the vector store.
+
+        Fewer documents may be returned than requested if some IDs are not found or
+        if there are duplicated IDs.
+
+        Users should not assume that the order of the returned documents matches
+        the order of the input IDs. Instead, users should rely on the ID field of the
+        returned documents.
+
+        This method should **NOT** raise exceptions if no documents are found for
+        some IDs.
+
+        Args:
+            ids: List of ids to retrieve.
+
+        Returns:
+            List of Documents.
+
+        .. versionadded:: 0.2.1
+        """
+        results = self.get(ids=list(ids))
+        return [
+            Document(page_content=doc, metadata=meta, id=doc_id)
+            for doc, meta, doc_id in zip(
+                results["documents"], results["metadatas"], results["ids"]
+            )
+        ]
+
     def update_document(self, document_id: str, document: Document) -> None:
         """Update a document in the collection.
 

diff --git a/libs/partners/chroma/poetry.lock b/libs/partners/chroma/poetry.lock
diff --git a/libs/partners/chroma/pyproject.toml b/libs/partners/chroma/pyproject.toml
@@ -90,6 +90,10 @@ python = ">=3.9"
 version = ">=0.1.40,<0.3"
 python = "<3.9"
 
+[[tool.poetry.group.test.dependencies.langchain-tests]]
+path = "../../standard-tests"
+develop = true
+
 [tool.poetry.group.codespell.dependencies]
 codespell = "^2.2.0"
 

diff --git a/libs/partners/chroma/tests/integration_tests/test_standard.py b/libs/partners/chroma/tests/integration_tests/test_standard.py
@@ -0,0 +1,37 @@
+from typing import AsyncGenerator, Generator
+
+import pytest
+from langchain_core.embeddings.fake import DeterministicFakeEmbedding
+from langchain_core.vectorstores import VectorStore
+from langchain_tests.integration_tests.vectorstores import (
+    AsyncReadWriteTestSuite,
+    ReadWriteTestSuite,
+)
+
+from langchain_chroma import Chroma
+
+
+class TestSync(ReadWriteTestSuite):
+    @pytest.fixture()
+    def vectorstore(self) -> Generator[VectorStore, None, None]:  # type: ignore
+        """Get an empty vectorstore for unit tests."""
+        embeddings = DeterministicFakeEmbedding(size=10)
+        store = Chroma(embedding_function=embeddings)
+        try:
+            yield store
+        finally:
+            store.delete_collection()
+            pass
+
+
+class TestAsync(AsyncReadWriteTestSuite):
+    @pytest.fixture()
+    async def vectorstore(self) -> AsyncGenerator[VectorStore, None]:  # type: ignore
+        """Get an empty vectorstore for unit tests."""
+        embeddings = DeterministicFakeEmbedding(size=10)
+        store = Chroma(embedding_function=embeddings)
+        try:
+            yield store
+        finally:
+            store.delete_collection()
+            pass