Skip to content

Commit

Permalink
Merge pull request #16 from zc277584121/main
Browse files Browse the repository at this point in the history
refine doc
  • Loading branch information
jaelgu authored Mar 14, 2024
2 parents 463329b + 1ced93c commit cf0a1f8
Showing 1 changed file with 98 additions and 1 deletion.
99 changes: 98 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Milvus Document Store for Haystack

[![PyPI - Version](https://img.shields.io/pypi/v/milvus-haystack.svg)](https://pypi.org/project/milvus-haystack)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/milvus-haystack.svg)](https://pypi.org/project/milvus-haystack)


## Installation

```console
pip install -e milvus-haystack
pip install milvus-haystack
```

## Usage
Expand All @@ -26,6 +29,100 @@ document_store.write_documents(documents)
document_store.count_documents() # 1
```

## Dive deep usage

Here are the ways to build index, retrieval, and build rag pipeline respectively.

```py
# Create the indexing Pipeline and index some documents
import glob
import os

from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter

from milvus_haystack import MilvusDocumentStore
from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever

file_paths = glob.glob("./your_docs.md")

document_store = MilvusDocumentStore(
connection_args={
"host": "localhost",
"port": "19530",
"user": "",
"password": "",
"secure": False,
},
drop_old=True,
)
indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", MarkdownToDocument())
indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing_pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing_pipeline.add_component("writer", DocumentWriter(document_store))
indexing_pipeline.connect("converter", "splitter")
indexing_pipeline.connect("splitter", "embedder")
indexing_pipeline.connect("embedder", "writer")
indexing_pipeline.run({"converter": {"sources": file_paths}})

print("Number of documents:", document_store.count_documents())

# ------------------------------------------------------------------------------------
# Create the retrieval pipeline and try a query
question = "What is Milvus?"

retrieval_pipeline = Pipeline()
retrieval_pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
retrieval_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
retrieval_pipeline.connect("embedder", "retriever")

retrieval_results = retrieval_pipeline.run({"embedder": {"text": question}})

for doc in retrieval_results["retriever"]["documents"]:
print(doc.content)
print("-" * 10)

# ------------------------------------------------------------------------------------
# Create the RAG pipeline and try a query
from haystack.utils import Secret
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator

prompt_template = """Answer the following query based on the provided context. If the context does
not include an answer, reply with 'I don't know'.\n
Query: {{query}}
Documents:
{% for doc in documents %}
{{ doc.content }}
{% endfor %}
Answer:
"""

rag_pipeline = Pipeline()
rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
rag_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
rag_pipeline.add_component("generator", OpenAIGenerator(api_key=Secret.from_token(os.getenv("OPENAI_API_KEY")),
generation_kwargs={"temperature": 0}))
rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")

results = rag_pipeline.run(
{
"text_embedder": {"text": question},
"prompt_builder": {"query": question},
}
)
print('RAG answer:', results["generator"]["replies"][0])

```

## License

`milvus-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license.

0 comments on commit cf0a1f8

Please sign in to comment.