From 91b928e40b9ef6166f0a0ad000036e149425d99a Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 18 Oct 2024 17:26:54 +0200 Subject: [PATCH] simplifying test_multiple_contains - so that it also uses the fixtures --- .../document_stores/chroma/filters.py | 2 +- .../chroma/tests/test_document_store.py | 58 +++++-------------- 2 files changed, 15 insertions(+), 45 deletions(-) diff --git a/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py b/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py index ef5c920a7..60046b6ad 100644 --- a/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py +++ b/integrations/chroma/src/haystack_integrations/document_stores/chroma/filters.py @@ -27,7 +27,7 @@ class ChromaFilter: """ Dataclass to store the converted filter structure used in Chroma queries. - Following filter criterias are supported: + Following filter criteria are supported: - `ids`: A list of document IDs to filter by in Chroma collection. - `where`: A dictionary of metadata filters applied to the documents. - `where_document`: A dictionary of content-based filters applied to the documents' content. diff --git a/integrations/chroma/tests/test_document_store.py b/integrations/chroma/tests/test_document_store.py index 5777051a3..c05264d22 100644 --- a/integrations/chroma/tests/test_document_store.py +++ b/integrations/chroma/tests/test_document_store.py @@ -1,19 +1,17 @@ # SPDX-FileCopyrightText: 2023-present John Doe # # SPDX-License-Identifier: Apache-2.0 -from unittest import mock - import logging -import numpy as np import operator -import pandas as pd -import pytest import sys import uuid -from chromadb.api.types import Documents, EmbeddingFunction, Embeddings -from haystack_integrations.document_stores.chroma import ChromaDocumentStore from typing import List +from unittest import mock +import numpy as np +import pandas as pd +import pytest +from chromadb.api.types import Documents, EmbeddingFunction, Embeddings from haystack import Document from haystack.testing.document_store import ( CountDocumentsTest, @@ -21,6 +19,8 @@ FilterDocumentsTest, ) +from haystack_integrations.document_stores.chroma import ChromaDocumentStore + class _TestEmbeddingFunction(EmbeddingFunction): """ @@ -283,31 +283,20 @@ def test_contains(self, document_store: ChromaDocumentStore, filterable_docs: Li [doc for doc in filterable_docs if doc.content and "FOO" in doc.content], ) - def test_multiple_contains(self, document_store: ChromaDocumentStore): - - documents = [ - Document(content="The cat chased the mouse in the garden."), - Document(content="The cat sat on the windowsill watching the birds."), - Document(content="The cat played with a ball of yarn."), - Document(content="The cat napped peacefully in the sun."), - Document(content=None), - Document(dataframe=pd.DataFrame({"text": ["Something irrelevant"]})), - ] - - document_store.write_documents(documents) - + def test_multiple_contains(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): + filterable_docs = [doc for doc in filterable_docs if doc.content] # remove documents without content + document_store.write_documents(filterable_docs) filters = { - "operator": "AND", + "operator": "OR", "conditions": [ - {"field": "content", "operator": "contains", "value": "cat"}, - {"field": "content", "operator": "not contains", "value": "birds"}, + {"field": "content", "operator": "contains", "value": "FOO"}, + {"field": "content", "operator": "not contains", "value": "BAR"}, ], } result = document_store.filter_documents(filters=filters) - self.assert_documents_are_equal( result, - [doc for doc in documents if doc.content and "cat" in doc.content and "birds" not in doc.content], + [doc for doc in filterable_docs if doc.content and ("FOO" in doc.content or "BAR" not in doc.content)], ) def test_nested_logical_filters(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]): @@ -355,25 +344,6 @@ def test_nested_logical_filters(self, document_store: ChromaDocumentStore, filte ], ) - # Override inequality tests from FilterDocumentsTest - # because chroma doesn't return documents with absent meta fields - - def test_comparison_not_equal(self, document_store, filterable_docs): - """Test filter_documents() with != comparator""" - document_store.write_documents(filterable_docs) - result = document_store.filter_documents({"field": "meta.number", "operator": "!=", "value": 100}) - self.assert_documents_are_equal( - result, [d for d in filterable_docs if "number" in d.meta and d.meta.get("number") != 100] - ) - - def test_comparison_not_in(self, document_store, filterable_docs): - """Test filter_documents() with 'not in' comparator""" - document_store.write_documents(filterable_docs) - result = document_store.filter_documents({"field": "meta.number", "operator": "not in", "value": [2, 9]}) - self.assert_documents_are_equal( - result, [d for d in filterable_docs if "number" in d.meta and d.meta.get("number") not in [2, 9]] - ) - @pytest.mark.skip(reason="Filter on dataframe contents is not supported.") def test_comparison_equal_with_dataframe( self, document_store: ChromaDocumentStore, filterable_docs: List[Document]