From 300aca3b3c0b588fd8566d4a368e2982938f5cf7 Mon Sep 17 00:00:00 2001
From: ChengZi <chen.zhang@zilliz.com>
Date: Fri, 10 May 2024 16:12:14 +0800
Subject: [PATCH 1/2] update with milvus lite

Signed-off-by: ChengZi <chen.zhang@zilliz.com>
---
 README.md                             | 59 ++++++++++++++++-----------
 src/milvus_haystack/document_store.py |  4 +-
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index cc5b4bc..0ac5397 100644
--- a/README.md
+++ b/README.md
@@ -5,35 +5,51 @@
 
 ## Installation
 
-```console
-pip install milvus-haystack
+```shell
+pip install --upgrade pymilvus milvus-haystack
 ```
 
 ## Usage
 
-First, to start up a Milvus service, follow
-the ['Start Milvus'](https://milvus.io/docs/install_standalone-docker.md#Start-Milvus) instructions in the
-documentation.
+By default, if you install the latest version of pymilvus, you don't need to start the milvus service manually.
+Optionally, you
+can [start the Milvus service by docker](https://milvus.io/docs/install_standalone-docker.md#Start-Milvus).
 
-Then, to use the `MilvusDocumentStore` in a Haystack pipeline
+Use the `MilvusDocumentStore` in a Haystack pipeline as a quick start.
 
 ```python
 from haystack import Document
 from milvus_haystack import MilvusDocumentStore
 
-document_store = MilvusDocumentStore()
+document_store = MilvusDocumentStore(
+    # If you have installed the latest version of pymilvus with milvus lite, you can use a local path as the uri without starting the milvus service.
+    connection_args={"uri": "./milvus.db"},
+    # Or, if you have started the milvus standalone service by docker, you can use the specified uri to connect to the service.
+    # connection_args={"uri": "http://localhost:19530"},
+    drop_old=True,
+)
 documents = [Document(
     content="A Foo Document",
     meta={"page": "100", "chapter": "intro"},
     embedding=[-10.0] * 128,
 )]
 document_store.write_documents(documents)
-document_store.count_documents()  # 1
+print(document_store.count_documents())  # 1
 ```
 
 ## Dive deep usage
 
-Here are the ways to build index, retrieval, and build rag pipeline respectively.
+Prepare an OpenAI API key and set it as an environment variable:
+
+```shell
+export OPENAI_API_KEY=<your_api_key>
+```
+
+Here are the ways to
+
+- Create the indexing Pipeline
+- Create the retrieval pipeline
+- Create the RAG pipeline
 
 ### Create the indexing Pipeline and index some documents
 
@@ -43,29 +59,27 @@ import os
 
 from haystack import Pipeline
 from haystack.components.converters import MarkdownToDocument
-from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
+from haystack.components.embedders import OpenAIDocumentEmbedder, OpenAITextEmbedder
 from haystack.components.preprocessors import DocumentSplitter
 from haystack.components.writers import DocumentWriter
 
 from milvus_haystack import MilvusDocumentStore
 from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever
 
-file_paths = glob.glob("./your_docs.md")
+current_file_path = os.path.abspath(__file__)
+file_paths = [current_file_path]  # You can replace it with your own file paths.
 
 document_store = MilvusDocumentStore(
-    connection_args={
-        "host": "localhost",
-        "port": "19530",
-        "user": "",
-        "password": "",
-        "secure": False,
-    },
+    # If you have installed the latest version of pymilvus with milvus lite, you can use a local path as the uri without starting the milvus service.
+    connection_args={"uri": "./milvus.db"},
+    # Or, if you have started the milvus standalone service by docker, you can use the specified uri to connect to the service.
+    # connection_args={"uri": "http://localhost:19530"},
     drop_old=True,
 )
 indexing_pipeline = Pipeline()
 indexing_pipeline.add_component("converter", MarkdownToDocument())
 indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
-indexing_pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder())
+indexing_pipeline.add_component("embedder", OpenAIDocumentEmbedder())
 indexing_pipeline.add_component("writer", DocumentWriter(document_store))
 indexing_pipeline.connect("converter", "splitter")
 indexing_pipeline.connect("splitter", "embedder")
@@ -78,10 +92,10 @@ print("Number of documents:", document_store.count_documents())
 ### Create the retrieval pipeline and try a query
 
 ```python
-question = "What is Milvus?"
+question = "How to set the service uri with milvus lite?"  # You can replace it with your own question. 
 
 retrieval_pipeline = Pipeline()
-retrieval_pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
+retrieval_pipeline.add_component("embedder", OpenAITextEmbedder())
 retrieval_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
 retrieval_pipeline.connect("embedder", "retriever")
 
@@ -96,7 +110,6 @@ for doc in retrieval_results["retriever"]["documents"]:
 
 ```python
 from haystack.utils import Secret
-from haystack.components.embedders import SentenceTransformersTextEmbedder
 from haystack.components.builders import PromptBuilder
 from haystack.components.generators import OpenAIGenerator
 
@@ -111,7 +124,7 @@ prompt_template = """Answer the following query based on the provided context. I
                   """
 
 rag_pipeline = Pipeline()
-rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
+rag_pipeline.add_component("text_embedder", OpenAITextEmbedder())
 rag_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
 rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
 rag_pipeline.add_component("generator", OpenAIGenerator(api_key=Secret.from_token(os.getenv("OPENAI_API_KEY")),
diff --git a/src/milvus_haystack/document_store.py b/src/milvus_haystack/document_store.py
index c4c8813..2399842 100644
--- a/src/milvus_haystack/document_store.py
+++ b/src/milvus_haystack/document_store.py
@@ -409,9 +409,7 @@ def _create_connection_alias(self, connection_args: dict) -> str:
             elif uri.startswith("http://"):
                 given_address = uri.split("http://")[1]
             else:
-                err_msg = "Invalid Milvus URI: %s", uri
-                logger.error(err_msg)
-                raise ValueError(err_msg)
+                given_address = uri  # Milvus lite
         elif address is not None:
             given_address = address
         else:

From 68597d1bf3613abc1abaaf163b9037346c85725b Mon Sep 17 00:00:00 2001
From: ChengZi <chen.zhang@zilliz.com>
Date: Fri, 10 May 2024 16:14:10 +0800
Subject: [PATCH 2/2] v0.0.6

Signed-off-by: ChengZi <chen.zhang@zilliz.com>
---
 src/milvus_haystack/__about__.py | 2 +-
 src/milvus_haystack/filters.py   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/milvus_haystack/__about__.py b/src/milvus_haystack/__about__.py
index cbd11ba..c4f3fd4 100644
--- a/src/milvus_haystack/__about__.py
+++ b/src/milvus_haystack/__about__.py
@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: 2023-present Tuana Celik <tuana.celik@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-__version__ = "0.0.5"
+__version__ = "0.0.6"
diff --git a/src/milvus_haystack/filters.py b/src/milvus_haystack/filters.py
index 5f527d9..21ffa94 100644
--- a/src/milvus_haystack/filters.py
+++ b/src/milvus_haystack/filters.py
@@ -60,7 +60,7 @@ def _assert_comparison_filter(filters: Dict[str, Any]):
     assert "field" in filters, "field must be specified in filters"  # noqa: S101
     assert "value" in filters, "value must be specified in filters"  # noqa: S101
     assert filters["operator"] in COMPARISON_OPERATORS, FilterError(  # noqa: S101
-        "operator must be one of: %s" % LOGIC_OPERATORS
+        f"operator must be one of: {LOGIC_OPERATORS}"
     )
 
 
@@ -85,5 +85,5 @@ def _parse_logic(filters: Dict[str, Any]) -> str:
 def _assert_logic_filter(filters: Dict[str, Any]):
     assert "operator" in filters, "operator must be specified in filters"  # noqa: S101
     assert "conditions" in filters, "conditions must be specified in filters"  # noqa: S101
-    assert filters["operator"] in LOGIC_OPERATORS, "operator must be one of: %s" % LOGIC_OPERATORS  # noqa: S101
+    assert filters["operator"] in LOGIC_OPERATORS, f"operator must be one of: {LOGIC_OPERATORS}"  # noqa: S101
     assert isinstance(filters["conditions"], list), "conditions must be a list"  # noqa: S101