From 8f9b3b7498d7446201b54d5b6e347cacea0f04ad Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 5 Dec 2024 10:37:19 -0500 Subject: [PATCH] chroma[patch]: fix bug (#28538) Fix bug introduced in https://github.com/langchain-ai/langchain/pull/27995 If all document IDs are `""`, the chroma SDK will raise ``` DuplicateIDError: Expected IDs to be unique ``` Caught by [docs tests](https://github.com/langchain-ai/langchain/actions/runs/12180395579/job/33974633950), but added a test to langchain-chroma as well. --- .../chroma/langchain_chroma/vectorstores.py | 2 +- .../tests/integration_tests/test_vectorstores.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index d8d8f5de5e425..9967ac6d29488 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -1228,7 +1228,7 @@ def from_documents( texts = [doc.page_content for doc in documents] metadatas = [doc.metadata for doc in documents] if ids is None: - ids = [doc.id if doc.id else "" for doc in documents] + ids = [doc.id if doc.id else str(uuid.uuid4()) for doc in documents] return cls.from_texts( texts=texts, embedding=embedding, diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index bea50c909748f..f7bed4cfa5588 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -51,6 +51,22 @@ def test_chroma() -> None: assert output[0].id is not None +def test_from_documents() -> None: + """Test init using .from_documents.""" + documents = [ + Document(page_content="foo"), + Document(page_content="bar"), + Document(page_content="baz"), + ] + docsearch = Chroma.from_documents(documents=documents, embedding=FakeEmbeddings()) + output = docsearch.similarity_search("foo", k=1) + + docsearch.delete_collection() + assert len(output) == 1 + assert output[0].page_content == "foo" + assert output[0].id is not None + + def test_chroma_with_ids() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"]