Skip to content

Commit

Permalink
_discard_invalid_meta
Browse files Browse the repository at this point in the history
Signed-off-by: ChengZi <[email protected]>
  • Loading branch information
zc277584121 committed Oct 16, 2024
1 parent 2bb7b65 commit df0cfee
Showing 1 changed file with 29 additions and 1 deletion.
30 changes: 29 additions & 1 deletion src/milvus_haystack/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D

from pymilvus import Collection, MilvusException

documents_cp = deepcopy(documents)
documents_cp = [MilvusDocumentStore._discard_invalid_meta(doc) for doc in deepcopy(documents)]
if len(documents_cp) > 0 and not isinstance(documents_cp[0], Document):
err_msg = "param 'documents' must contain a list of objects of type Document"
raise ValueError(err_msg)
Expand Down Expand Up @@ -905,3 +905,31 @@ def _convert_sparse_to_dict(self, sparse_embedding: SparseEmbedding) -> Dict:

def _convert_dict_to_sparse(self, sparse_dict: Dict) -> SparseEmbedding:
return SparseEmbedding(indices=list(sparse_dict.keys()), values=list(sparse_dict.values()))

@staticmethod
def _discard_invalid_meta(document: Document):
"""
Remove metadata fields with unsupported types from the document.
"""
from pymilvus import DataType
from pymilvus.orm.types import infer_dtype_bydata

if document.meta:
discarded_keys = []
new_meta = {}
for key, value in document.meta.items():
dtype = infer_dtype_bydata(value)
if dtype in (DataType.UNKNOWN, DataType.NONE):
discarded_keys.append(key)
else:
new_meta[key] = value

if discarded_keys:
msg = (
f"Document {document.id} has metadata fields with unsupported types: {discarded_keys}. "
f"Supported types refer to Pymilvus DataType. The values of these fields will be discarded."
)
logger.warning(msg)
document.meta = new_meta

return document

0 comments on commit df0cfee

Please sign in to comment.