Skip to content

Commit

Permalink
[chroma] Update the code to account for the latest changes in haystac…
Browse files Browse the repository at this point in the history
…k Document (#55)

* update the code to the latest changes in haystack Document

* pin haystack-ai

* fix linter

* do not add None as metadata
  • Loading branch information
masci authored Nov 16, 2023
1 parent 5ecacc5 commit 48c0d5f
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
2 changes: 1 addition & 1 deletion document_stores/chroma/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"haystack-ai",
"haystack-ai<0.144.0",
"chromadb",
]

Expand Down
34 changes: 19 additions & 15 deletions document_stores/chroma/src/chroma_haystack/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ def _get_result_to_documents(self, result: GetResult) -> List[Document]:
document_dict["content"] = result_documents[i]

result_metadata = result.get("metadatas")
if result_metadata:
# Ensure metadata[i] is not None or don't add it to the document dict
if result_metadata and result_metadata[i]:
document_dict["meta"] = result_metadata[i]

result_embeddings = result.get("embeddings")
Expand All @@ -296,26 +297,29 @@ def _query_result_to_documents(self, result: QueryResult) -> List[List[Document]
"""
Helper function to convert Chroma results into Haystack Documents
"""
retval = []
for i, answers in enumerate(result["documents"]):
retval: List[List[Document]] = []
documents = result.get("documents")
if documents is None:
return retval

for i, answers in enumerate(documents):
converted_answers = []
for j in range(len(answers)):
# prepare metadata
metadata = result["metadatas"][i][j]
mime_type = metadata.pop("_mime_type")

document_dict = {
document_dict: Dict[str, Any] = {
"id": result["ids"][i][j],
"text": result["documents"][i][j].text,
"metadata": metadata,
"mime_type": mime_type,
"content": documents[i][j],
}

if result["embeddings"][i][j]:
document_dict["embedding"] = np.array(result["embeddings"][i][j])
# prepare metadata
if metadatas := result.get("metadatas"):
document_dict["metadata"] = dict(metadatas[i][j])
document_dict["mime_type"] = document_dict["metadata"].pop("_mime_type")

if embeddings := result.get("embeddings"):
document_dict["embedding"] = np.array(embeddings[i][j])

if result["distances"][i][j]:
document_dict["score"] = result["distances"][i][j]
if distances := result.get("distances"):
document_dict["score"] = distances[i][j]

converted_answers.append(Document.from_dict(document_dict))
retval.append(converted_answers)
Expand Down
2 changes: 1 addition & 1 deletion document_stores/chroma/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_delete_not_empty_nonexisting(self, docstore: ChromaDocumentStore):
"""
Deleting a non-existing document should not raise with Chroma
"""
doc = Document(text="test doc")
doc = Document(content="test doc")
docstore.write_documents([doc])
docstore.delete_documents(["non_existing"])

Expand Down

0 comments on commit 48c0d5f

Please sign in to comment.