Skip to content

Commit

Permalink
chroma[patch]: fix bug (#28538)
Browse files Browse the repository at this point in the history
Fix bug introduced in
#27995

If all document IDs are `""`, the chroma SDK will raise
```
DuplicateIDError: Expected IDs to be unique
```

Caught by [docs
tests](https://github.com/langchain-ai/langchain/actions/runs/12180395579/job/33974633950),
but added a test to langchain-chroma as well.
  • Loading branch information
ccurme authored Dec 5, 2024
1 parent ecff9a0 commit 8f9b3b7
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
2 changes: 1 addition & 1 deletion libs/partners/chroma/langchain_chroma/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ def from_documents(
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
if ids is None:
ids = [doc.id if doc.id else "" for doc in documents]
ids = [doc.id if doc.id else str(uuid.uuid4()) for doc in documents]
return cls.from_texts(
texts=texts,
embedding=embedding,
Expand Down
16 changes: 16 additions & 0 deletions libs/partners/chroma/tests/integration_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,22 @@ def test_chroma() -> None:
assert output[0].id is not None


def test_from_documents() -> None:
"""Test init using .from_documents."""
documents = [
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="baz"),
]
docsearch = Chroma.from_documents(documents=documents, embedding=FakeEmbeddings())
output = docsearch.similarity_search("foo", k=1)

docsearch.delete_collection()
assert len(output) == 1
assert output[0].page_content == "foo"
assert output[0].id is not None


def test_chroma_with_ids() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
Expand Down

0 comments on commit 8f9b3b7

Please sign in to comment.