diff --git a/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py b/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py index b552a7e06..568e78ac5 100644 --- a/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py +++ b/integrations/elasticsearch/src/elasticsearch_haystack/document_store.py @@ -263,7 +263,7 @@ def _bm25_retrieval( "query": query, "fuzziness": fuzziness, "type": "most_fields", - "operator": "AND", + "operator": "OR", } } ] diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index fbc850182..906a023da 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -182,6 +182,33 @@ def test_bm25_retrieval_with_fuzziness(self, document_store: ElasticsearchDocume assert "functional" in res[1].content assert "functional" in res[2].content + def test_bm25_not_all_terms_must_match(self, document_store: ElasticsearchDocumentStore): + """ + Test that not all terms must mandatorily match for BM25 retrieval to return a result. + """ + documents = [ + Document(id=1, content="There are over 7,000 languages spoken around the world today."), + Document( + id=2, + content=( + "Elephants have been observed to behave in a way that indicates a high level of self-awareness" + " such as recognizing themselves in mirrors." + ), + ), + Document( + id=3, + content=( + "In certain parts of the world, like the Maldives, Puerto Rico, and San Diego, you can witness" + " the phenomenon of bioluminescent waves." + ), + ), + ] + document_store.write_documents(documents) + + res = document_store._bm25_retrieval("How much self awareness do elephants have?", top_k=3) + assert len(res) == 1 + assert res[0].id == 2 + def test_embedding_retrieval(self, document_store: ElasticsearchDocumentStore): docs = [ Document(content="Most similar document", embedding=[1.0, 1.0, 1.0, 1.0]),