Skip to content

Commit

Permalink
fix: make Weaviate DS handle fields consisting in empty lists (#6925)
Browse files Browse the repository at this point in the history
* make weaviate ds handle empty lists in meta

* improve test

* reno

* revert wrong change

* add comment

* Update releasenotes/notes/weaviate-handle-empty-list-8d3432080f8bfefd.yaml

Co-authored-by: Julian Risch <[email protected]>

---------

Co-authored-by: Julian Risch <[email protected]>
  • Loading branch information
anakin87 and julian-risch authored Feb 6, 2024
1 parent 29f95be commit af0166f
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
6 changes: 4 additions & 2 deletions haystack/document_stores/weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,8 +684,10 @@ def write_documents(
property_value = _doc[property]
if property in json_fields:
property_value = doc.meta[property]
self._update_schema(property, property_value, index)
current_properties.append(property)
# if the property_value is an empty list, we can't infer the type
if not isinstance(property_value, list) or len(property_value) > 0:
self._update_schema(property, property_value, index)
current_properties.append(property)
# update the date fields as there might be new ones
date_fields = self._get_date_properties(index)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
fixes:
- |
Fixed a bug that made it impossible to write Documents
to Weaviate when some of the fields were empty lists
(e.g. `split_overlap` for preprocessed documents).
28 changes: 28 additions & 0 deletions test/document_stores/test_weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from haystack.document_stores.weaviate import WeaviateDocumentStore
from haystack.schema import Document
from haystack.testing import DocumentStoreBaseTestAbstract
from haystack.nodes.preprocessor import PreProcessor

embedding_dim = 768

Expand Down Expand Up @@ -267,6 +268,33 @@ def test_get_embedding_count(self, ds, documents):
ds.write_documents(documents)
assert ds.get_embedding_count() == 9

@pytest.mark.integration
def test_write_preprocessed_docs(self, ds, documents):
"""
Test that preprocessed documents can be correctly written to Weaviate
even if the meta field `_split_overlap` is an empty list for some documents.
"""
preprocessor = PreProcessor(
clean_empty_lines=True,
clean_whitespace=True,
clean_header_footer=True,
split_by="word",
split_length=5,
split_overlap=2,
split_respect_sentence_boundary=False,
)

longer_doc = Document(content="This is a longer document that will be split into multiple parts.")
documents.append(longer_doc)

preprocessed_docs = preprocessor.process(documents)

ds.write_documents(preprocessed_docs)

docs_from_weaviate = ds.get_all_documents()
for doc in docs_from_weaviate:
assert "_split_overlap" in doc.meta

@pytest.mark.unit
def test__get_auth_secret(self):
# Test with username and password
Expand Down

0 comments on commit af0166f

Please sign in to comment.