Skip to content

Commit

Permalink
Tutorial: Milvus rag with Dynamiq, run black
Browse files Browse the repository at this point in the history
  • Loading branch information
jinhonglin-ryan committed Nov 20, 2024
1 parent f5019b3 commit 569e532
Showing 1 changed file with 56 additions and 43 deletions.
99 changes: 56 additions & 43 deletions bootcamp/tutorials/integration/milvus_rag_with_dynamiq.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
},
"outputs": [],
"source": [
"! pip install dynamiq pymilvus\n"
"! pip install dynamiq pymilvus"
]
},
{
Expand Down Expand Up @@ -151,14 +151,18 @@
"from io import BytesIO\n",
"from dynamiq import Workflow\n",
"from dynamiq.nodes import InputTransformer\n",
"from dynamiq.connections import OpenAI as OpenAIConnection, Milvus as MilvusConnection, MilvusDeploymentType\n",
"from dynamiq.connections import (\n",
" OpenAI as OpenAIConnection,\n",
" Milvus as MilvusConnection,\n",
" MilvusDeploymentType,\n",
")\n",
"from dynamiq.nodes.converters import PyPDFConverter\n",
"from dynamiq.nodes.splitters.document import DocumentSplitter\n",
"from dynamiq.nodes.embedders import OpenAIDocumentEmbedder\n",
"from dynamiq.nodes.writers import MilvusDocumentWriter\n",
"\n",
"# Initialize the workflow\n",
"rag_wf = Workflow()\n"
"rag_wf = Workflow()"
]
},
{
Expand All @@ -185,7 +189,9 @@
"outputs": [],
"source": [
"converter = PyPDFConverter(document_creation_mode=\"one-doc-per-page\")\n",
"converter_added = rag_wf.flow.add_nodes(converter) # Add node to the DAG (Directed Acyclic Graph)\n"
"converter_added = rag_wf.flow.add_nodes(\n",
" converter\n",
") # Add node to the DAG (Directed Acyclic Graph)"
]
},
{
Expand Down Expand Up @@ -213,15 +219,17 @@
"source": [
"document_splitter = DocumentSplitter(\n",
" split_by=\"sentence\", # Splits documents into sentences\n",
" split_length=10, \n",
" split_overlap=1, \n",
" split_length=10,\n",
" split_overlap=1,\n",
" input_transformer=InputTransformer(\n",
" selector={\n",
" \"documents\": f\"${[converter.id]}.output.documents\", \n",
" \"documents\": f\"${[converter.id]}.output.documents\",\n",
" },\n",
" ),\n",
").depends_on(converter) # Set dependency on the PDF converter\n",
"splitter_added = rag_wf.flow.add_nodes(document_splitter) # Add to the DAG\n"
").depends_on(\n",
" converter\n",
") # Set dependency on the PDF converter\n",
"splitter_added = rag_wf.flow.add_nodes(document_splitter) # Add to the DAG"
]
},
{
Expand All @@ -248,14 +256,16 @@
"outputs": [],
"source": [
"embedder = OpenAIDocumentEmbedder(\n",
" connection=OpenAIConnection(api_key=os.environ[\"OPENAI_API_KEY\"]), \n",
" connection=OpenAIConnection(api_key=os.environ[\"OPENAI_API_KEY\"]),\n",
" input_transformer=InputTransformer(\n",
" selector={\n",
" \"documents\": f\"${[document_splitter.id]}.output.documents\", \n",
" \"documents\": f\"${[document_splitter.id]}.output.documents\",\n",
" },\n",
" ),\n",
").depends_on(document_splitter) # Set dependency on the splitter\n",
"document_embedder_added = rag_wf.flow.add_nodes(embedder) # Add to the DAG\n"
").depends_on(\n",
" document_splitter\n",
") # Set dependency on the splitter\n",
"document_embedder_added = rag_wf.flow.add_nodes(embedder) # Add to the DAG"
]
},
{
Expand Down Expand Up @@ -297,16 +307,18 @@
"source": [
"vector_store = (\n",
" MilvusDocumentWriter(\n",
" connection=MilvusConnection(deployment_type=MilvusDeploymentType.FILE, uri=\"./milvus.db\"),\n",
" index_name=\"my_milvus_collection\", \n",
" dimension=1536, \n",
" create_if_not_exist=True, \n",
" metric_type=\"COSINE\" \n",
" connection=MilvusConnection(\n",
" deployment_type=MilvusDeploymentType.FILE, uri=\"./milvus.db\"\n",
" ),\n",
" index_name=\"my_milvus_collection\",\n",
" dimension=1536,\n",
" create_if_not_exist=True,\n",
" metric_type=\"COSINE\",\n",
" )\n",
" .inputs(documents=embedder.outputs.documents) # Connect to embedder output\n",
" .depends_on(embedder) # Set dependency on the embedder\n",
")\n",
"milvus_writer_added = rag_wf.flow.add_nodes(vector_store) # Add to the DAG\n"
"milvus_writer_added = rag_wf.flow.add_nodes(vector_store) # Add to the DAG"
]
},
{
Expand Down Expand Up @@ -400,16 +412,12 @@
"source": [
"file_paths = [\"./pdf_files/WhatisMilvus.pdf\"]\n",
"input_data = {\n",
" \"files\": [\n",
" BytesIO(open(path, \"rb\").read()) for path in file_paths \n",
" ],\n",
" \"metadata\": [\n",
" {\"filename\": path} for path in file_paths \n",
" ],\n",
" \"files\": [BytesIO(open(path, \"rb\").read()) for path in file_paths],\n",
" \"metadata\": [{\"filename\": path} for path in file_paths],\n",
"}\n",
"\n",
"# Run the workflow with the prepared input data\n",
"inserted_data = rag_wf.run(input_data=input_data)\n"
"inserted_data = rag_wf.run(input_data=input_data)"
]
},
{
Expand Down Expand Up @@ -462,14 +470,18 @@
"outputs": [],
"source": [
"from dynamiq import Workflow\n",
"from dynamiq.connections import OpenAI as OpenAIConnection, Milvus as MilvusConnection, MilvusDeploymentType\n",
"from dynamiq.connections import (\n",
" OpenAI as OpenAIConnection,\n",
" Milvus as MilvusConnection,\n",
" MilvusDeploymentType,\n",
")\n",
"from dynamiq.nodes.embedders import OpenAITextEmbedder\n",
"from dynamiq.nodes.retrievers import MilvusDocumentRetriever\n",
"from dynamiq.nodes.llms import OpenAI\n",
"from dynamiq.prompts import Message, Prompt\n",
"\n",
"# Initialize the workflow\n",
"retrieval_wf = Workflow()\n"
"retrieval_wf = Workflow()"
]
},
{
Expand Down Expand Up @@ -501,11 +513,11 @@
"# Define the text embedder node\n",
"embedder = OpenAITextEmbedder(\n",
" connection=openai_connection,\n",
" model=\"text-embedding-3-small\", \n",
" model=\"text-embedding-3-small\",\n",
")\n",
"\n",
"# Add the embedder node to the workflow\n",
"embedder_added = retrieval_wf.flow.add_nodes(embedder)\n"
"embedder_added = retrieval_wf.flow.add_nodes(embedder)"
]
},
{
Expand Down Expand Up @@ -545,19 +557,18 @@
"document_retriever = (\n",
" MilvusDocumentRetriever(\n",
" connection=MilvusConnection(\n",
" deployment_type=MilvusDeploymentType.FILE,\n",
" uri=\"./milvus.db\" \n",
" deployment_type=MilvusDeploymentType.FILE, uri=\"./milvus.db\"\n",
" ),\n",
" index_name=\"my_milvus_collection\", \n",
" dimension=1536, \n",
" top_k=5, \n",
" index_name=\"my_milvus_collection\",\n",
" dimension=1536,\n",
" top_k=5,\n",
" )\n",
" .inputs(embedding=embedder.outputs.embedding) # Connect to embedder output\n",
" .depends_on(embedder) # Dependency on the embedder node\n",
")\n",
"\n",
"# Add the retriever node to the workflow\n",
"milvus_retriever_added = retrieval_wf.flow.add_nodes(document_retriever)\n"
"milvus_retriever_added = retrieval_wf.flow.add_nodes(document_retriever)"
]
},
{
Expand Down Expand Up @@ -596,7 +607,7 @@
"\"\"\"\n",
"\n",
"# Create the prompt object\n",
"prompt = Prompt(messages=[Message(content=prompt_template, role=\"user\")])\n"
"prompt = Prompt(messages=[Message(content=prompt_template, role=\"user\")])"
]
},
{
Expand Down Expand Up @@ -625,18 +636,20 @@
"answer_generator = (\n",
" OpenAI(\n",
" connection=openai_connection,\n",
" model=\"gpt-4o\", \n",
" prompt=prompt, \n",
" model=\"gpt-4o\",\n",
" prompt=prompt,\n",
" )\n",
" .inputs(\n",
" documents=document_retriever.outputs.documents, \n",
" query=embedder.outputs.query, \n",
" documents=document_retriever.outputs.documents,\n",
" query=embedder.outputs.query,\n",
" )\n",
" .depends_on([document_retriever, embedder]) # Dependencies on retriever and embedder\n",
" .depends_on(\n",
" [document_retriever, embedder]\n",
" ) # Dependencies on retriever and embedder\n",
")\n",
"\n",
"# Add the answer generator node to the workflow\n",
"answer_generator_added = retrieval_wf.flow.add_nodes(answer_generator)\n"
"answer_generator_added = retrieval_wf.flow.add_nodes(answer_generator)"
]
},
{
Expand Down

0 comments on commit 569e532

Please sign in to comment.