Merge pull request #19 from zc277584121/main

Update with milvus lite
milvus-io · May 10, 2024 · b7448d8 · b7448d8
2 parents 3201558 + 68597d1
commit b7448d8
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -5,35 +5,51 @@
 
 ## Installation
 
-```console
-pip install milvus-haystack
+```shell
+pip install --upgrade pymilvus milvus-haystack
 ```
 
 ## Usage
 
-First, to start up a Milvus service, follow
-the ['Start Milvus'](https://milvus.io/docs/install_standalone-docker.md#Start-Milvus) instructions in the
-documentation.
+By default, if you install the latest version of pymilvus, you don't need to start the milvus service manually.
+Optionally, you
+can [start the Milvus service by docker](https://milvus.io/docs/install_standalone-docker.md#Start-Milvus).
 
-Then, to use the `MilvusDocumentStore` in a Haystack pipeline
+Use the `MilvusDocumentStore` in a Haystack pipeline as a quick start.
 
 ```python
 from haystack import Document
 from milvus_haystack import MilvusDocumentStore
 
-document_store = MilvusDocumentStore()
+document_store = MilvusDocumentStore(
+    # If you have installed the latest version of pymilvus with milvus lite, you can use a local path as the uri without starting the milvus service.
+    connection_args={"uri": "./milvus.db"},
+    # Or, if you have started the milvus standalone service by docker, you can use the specified uri to connect to the service.
+    # connection_args={"uri": "http://localhost:19530"},
+    drop_old=True,
+)
 documents = [Document(
     content="A Foo Document",
     meta={"page": "100", "chapter": "intro"},
     embedding=[-10.0] * 128,
 )]
 document_store.write_documents(documents)
-document_store.count_documents()  # 1
+print(document_store.count_documents())  # 1
 ```
 
 ## Dive deep usage
 
-Here are the ways to build index, retrieval, and build rag pipeline respectively.
+Prepare an OpenAI API key and set it as an environment variable:
+
+```shell
+export OPENAI_API_KEY=<your_api_key>
+```
+
+Here are the ways to
+
+- Create the indexing Pipeline
+- Create the retrieval pipeline
+- Create the RAG pipeline
 
 ### Create the indexing Pipeline and index some documents
 
@@ -43,29 +59,27 @@ import os
 
 from haystack import Pipeline
 from haystack.components.converters import MarkdownToDocument
-from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
+from haystack.components.embedders import OpenAIDocumentEmbedder, OpenAITextEmbedder
 from haystack.components.preprocessors import DocumentSplitter
 from haystack.components.writers import DocumentWriter
 
 from milvus_haystack import MilvusDocumentStore
 from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever
 
-file_paths = glob.glob("./your_docs.md")
+current_file_path = os.path.abspath(__file__)
+file_paths = [current_file_path]  # You can replace it with your own file paths.
 
 document_store = MilvusDocumentStore(
-    connection_args={
-        "host": "localhost",
-        "port": "19530",
-        "user": "",
-        "password": "",
-        "secure": False,
-    },
+    # If you have installed the latest version of pymilvus with milvus lite, you can use a local path as the uri without starting the milvus service.
+    connection_args={"uri": "./milvus.db"},
+    # Or, if you have started the milvus standalone service by docker, you can use the specified uri to connect to the service.
+    # connection_args={"uri": "http://localhost:19530"},
     drop_old=True,
 )
 indexing_pipeline = Pipeline()
 indexing_pipeline.add_component("converter", MarkdownToDocument())
 indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
-indexing_pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder())
+indexing_pipeline.add_component("embedder", OpenAIDocumentEmbedder())
 indexing_pipeline.add_component("writer", DocumentWriter(document_store))
 indexing_pipeline.connect("converter", "splitter")
 indexing_pipeline.connect("splitter", "embedder")
@@ -78,10 +92,10 @@ print("Number of documents:", document_store.count_documents())
 ### Create the retrieval pipeline and try a query
 
 ```python
-question = "What is Milvus?"
+question = "How to set the service uri with milvus lite?"  # You can replace it with your own question. 
 
 retrieval_pipeline = Pipeline()
-retrieval_pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
+retrieval_pipeline.add_component("embedder", OpenAITextEmbedder())
 retrieval_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
 retrieval_pipeline.connect("embedder", "retriever")
 
@@ -96,7 +110,6 @@ for doc in retrieval_results["retriever"]["documents"]:
 
 ```python
 from haystack.utils import Secret
-from haystack.components.embedders import SentenceTransformersTextEmbedder
 from haystack.components.builders import PromptBuilder
 from haystack.components.generators import OpenAIGenerator
 
@@ -111,7 +124,7 @@ prompt_template = """Answer the following query based on the provided context. I
                   """
 
 rag_pipeline = Pipeline()
-rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
+rag_pipeline.add_component("text_embedder", OpenAITextEmbedder())
 rag_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
 rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
 rag_pipeline.add_component("generator", OpenAIGenerator(api_key=Secret.from_token(os.getenv("OPENAI_API_KEY")),

diff --git a/src/milvus_haystack/__about__.py b/src/milvus_haystack/__about__.py
@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: 2023-present Tuana Celik <[email protected]>
 #
 # SPDX-License-Identifier: Apache-2.0
-__version__ = "0.0.5"
+__version__ = "0.0.6"
diff --git a/src/milvus_haystack/document_store.py b/src/milvus_haystack/document_store.py
@@ -409,9 +409,7 @@ def _create_connection_alias(self, connection_args: dict) -> str:
             elif uri.startswith("http://"):
                 given_address = uri.split("http://")[1]
             else:
-                err_msg = "Invalid Milvus URI: %s", uri
-                logger.error(err_msg)
-                raise ValueError(err_msg)
+                given_address = uri  # Milvus lite
         elif address is not None:
             given_address = address
         else:

diff --git a/src/milvus_haystack/filters.py b/src/milvus_haystack/filters.py
@@ -60,7 +60,7 @@ def _assert_comparison_filter(filters: Dict[str, Any]):
     assert "field" in filters, "field must be specified in filters"  # noqa: S101
     assert "value" in filters, "value must be specified in filters"  # noqa: S101
     assert filters["operator"] in COMPARISON_OPERATORS, FilterError(  # noqa: S101
-        "operator must be one of: %s" % LOGIC_OPERATORS
+        f"operator must be one of: {LOGIC_OPERATORS}"
     )
 
 
@@ -85,5 +85,5 @@ def _parse_logic(filters: Dict[str, Any]) -> str:
 def _assert_logic_filter(filters: Dict[str, Any]):
     assert "operator" in filters, "operator must be specified in filters"  # noqa: S101
     assert "conditions" in filters, "conditions must be specified in filters"  # noqa: S101
-    assert filters["operator"] in LOGIC_OPERATORS, "operator must be one of: %s" % LOGIC_OPERATORS  # noqa: S101
+    assert filters["operator"] in LOGIC_OPERATORS, f"operator must be one of: {LOGIC_OPERATORS}"  # noqa: S101
     assert isinstance(filters["conditions"], list), "conditions must be a list"  # noqa: S101