From 6d340795137a3315cb58b21c1098260610ea548e Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Wed, 6 Mar 2024 12:23:02 +0100 Subject: [PATCH] mongodb: improve example (#546) --- .../mongodb_atlas/examples/example.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/integrations/mongodb_atlas/examples/example.py b/integrations/mongodb_atlas/examples/example.py index 4b02bfd59..4cd3edc21 100644 --- a/integrations/mongodb_atlas/examples/example.py +++ b/integrations/mongodb_atlas/examples/example.py @@ -10,18 +10,24 @@ from haystack import Pipeline from haystack.components.converters import MarkdownToDocument -from haystack.components.embedders import SentenceTransformersDocumentEmbedder +from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder from haystack.components.preprocessors import DocumentSplitter from haystack.components.writers import DocumentWriter +from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore -# Provide your connection string -connection_string = input("Enter your MongoDB Atlas connection string: ") +# To use the MongoDBAtlasDocumentStore, you must have a running MongoDB Atlas database. +# For details, see https://www.mongodb.com/docs/atlas/getting-started/ + +# Once your database is set, set the environment variable `MONGO_CONNECTION_STRING` +# with the connection string to your MongoDB Atlas database. +# format: "mongodb+srv://{mongo_atlas_username}:{mongo_atlas_password}@{mongo_atlas_host}/?{mongo_atlas_params_string}". # Initialize the document store document_store = MongoDBAtlasDocumentStore( database_name="haystack_test", collection_name="test_collection", + vector_search_index="test_vector_search_index", ) # Create the indexing Pipeline and index some documents @@ -39,4 +45,15 @@ indexing.run({"converter": {"sources": file_paths}}) -print("Indexed documents:" + document_store.count_documents() + "\n - ".join(document_store.filter_documents())) + +# Create the querying Pipeline and try a query +querying = Pipeline() +querying.add_component("embedder", SentenceTransformersTextEmbedder()) +querying.add_component("retriever", MongoDBAtlasEmbeddingRetriever(document_store=document_store, top_k=3)) +querying.connect("embedder", "retriever") + +results = querying.run({"embedder": {"text": "What is a cross-encoder?"}}) + +for doc in results["retriever"]["documents"]: + print(doc) + print("-" * 10)