-
Notifications
You must be signed in to change notification settings - Fork 123
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Milvus-doc-bot
authored and
Milvus-doc-bot
committed
Dec 6, 2024
1 parent
abda88c
commit cda20c8
Showing
100 changed files
with
9,559 additions
and
88 deletions.
There are no files selected for viewing
1 change: 1 addition & 0 deletions
1
localization/v2.4.x/site/de/integrations/build_RAG_with_milvus_and_cognee.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"codeList":["$ pip install pymilvus git+https://github.com/topoteretes/cognee.git\n","import os\n\nimport cognee\n\ncognee.config.set_llm_api_key(\"YOUR_OPENAI_API_KEY\")\n\n\nos.environ[\"VECTOR_DB_PROVIDER\"] = \"milvus\"\nos.environ[\"VECTOR_DB_URL\"] = \"./milvus.db\"\n","$ wget https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\n$ unzip -q milvus_docs_2.4.x_en.zip -d milvus_docs\n","from glob import glob\n\ntext_lines = []\n\nfor file_path in glob(\"milvus_docs/en/faq/*.md\", recursive=True):\n with open(file_path, \"r\") as file:\n file_text = file.read()\n\n text_lines += file_text.split(\"# \")\n","await cognee.prune.prune_data()\nawait cognee.prune.prune_system(metadata=True)\n","await cognee.add(data=text_lines, dataset_name=\"milvus_faq\")\nawait cognee.cognify()\n\n# [DocumentChunk(id=UUID('6889e7ef-3670-555c-bb16-3eb50d1d30b0'), updated_at=datetime.datetime(2024, 12, 4, 6, 29, 46, 472907, tzinfo=datetime.timezone.utc), text='Does the query perform in memory? What are incremental data and historical data?\\n\\nYes. When ...\n# ...\n","from cognee.api.v1.search import SearchType\n\nquery_text = \"How is data stored in milvus?\"\nsearch_results = await cognee.search(SearchType.SUMMARIES, query_text=query_text)\n\nprint(search_results[0])\n","from cognee.api.v1.search import SearchType\n\nquery_text = \"How is data stored in milvus?\"\nsearch_results = await cognee.search(SearchType.CHUNKS, query_text=query_text)\n","def format_and_print(data):\n print(\"ID:\", data[\"id\"])\n print(\"\\nText:\\n\")\n paragraphs = data[\"text\"].split(\"\\n\\n\")\n for paragraph in paragraphs:\n print(paragraph.strip())\n print()\n\n\nformat_and_print(search_results[0])\n","await cognee.prune.prune_data()\nawait cognee.prune.prune_system(metadata=True)\n","# We only use one line of text as the dataset, which simplifies the output later\ntext = \"\"\"\n Natural language processing (NLP) is an interdisciplinary\n subfield of computer science and information retrieval.\n \"\"\"\n\nawait cognee.add(text)\nawait cognee.cognify()\n","query_text = \"Tell me about NLP\"\nsearch_results = await cognee.search(SearchType.INSIGHTS, query_text=query_text)\n\nfor result_text in search_results:\n print(result_text)\n\n# Example output:\n# ({'id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'natural language processing', 'description': 'An interdisciplinary subfield of computer science and information retrieval.'}, {'relationship_name': 'is_a_subfield_of', 'source_node_id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'target_node_id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 15, 473137, tzinfo=datetime.timezone.utc)}, {'id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'computer science', 'description': 'The study of computation and information processing.'})\n# (...)\n#\n# It represents nodes and relationships in the knowledge graph:\n# - The first element is the source node (e.g., 'natural language processing').\n# - The second element is the relationship between nodes (e.g., 'is_a_subfield_of').\n# - The third element is the target node (e.g., 'computer science').\n"],"headingContent":"","anchorList":[{"label":"RAG erstellen","href":"Build-RAG","type":2,"isActive":false}]} |
159 changes: 159 additions & 0 deletions
159
localization/v2.4.x/site/de/integrations/build_RAG_with_milvus_and_cognee.md
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
localization/v2.4.x/site/de/integrations/build_RAG_with_milvus_and_gemini.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"codeList":["$ pip install --upgrade pymilvus google-generativeai requests tqdm\n","import os\n\nos.environ[\"GEMINI_API_KEY\"] = \"***********\"\n","$ wget https://github.com/milvus-io/milvus-docs/releases/download/v2.4.6-preview/milvus_docs_2.4.x_en.zip\n$ unzip -q milvus_docs_2.4.x_en.zip -d milvus_docs\n","from glob import glob\n\ntext_lines = []\n\nfor file_path in glob(\"milvus_docs/en/faq/*.md\", recursive=True):\n with open(file_path, \"r\") as file:\n file_text = file.read()\n\n text_lines += file_text.split(\"# \")\n","import google.generativeai as genai\n\ngenai.configure(api_key=os.environ[\"GEMINI_API_KEY\"])\n\ngemini_model = genai.GenerativeModel(\"gemini-1.5-flash\")\n\nresponse = gemini_model.generate_content(\"who are you\")\nprint(response.text)\n","test_embeddings = genai.embed_content(\n model=\"models/text-embedding-004\", content=[\"This is a test1\", \"This is a test2\"]\n)[\"embedding\"]\n\nembedding_dim = len(test_embeddings[0])\nprint(embedding_dim)\nprint(test_embeddings[0][:10])\n","from pymilvus import MilvusClient\n\nmilvus_client = MilvusClient(uri=\"./milvus_demo.db\")\n\ncollection_name = \"my_rag_collection\"\n","if milvus_client.has_collection(collection_name):\n milvus_client.drop_collection(collection_name)\n","milvus_client.create_collection(\n collection_name=collection_name,\n dimension=embedding_dim,\n metric_type=\"IP\", # Inner product distance\n consistency_level=\"Strong\", # Strong consistency level\n)\n","from tqdm import tqdm\n\ndata = []\n\ndoc_embeddings = genai.embed_content(\n model=\"models/text-embedding-004\", content=text_lines\n)[\"embedding\"]\n\nfor i, line in enumerate(tqdm(text_lines, desc=\"Creating embeddings\")):\n data.append({\"id\": i, \"vector\": doc_embeddings[i], \"text\": line})\n\nmilvus_client.insert(collection_name=collection_name, data=data)\n","question = \"How is data stored in milvus?\"\n","question_embedding = genai.embed_content(\n model=\"models/text-embedding-004\", content=question\n)[\"embedding\"]\n\nsearch_res = milvus_client.search(\n collection_name=collection_name,\n data=[question_embedding],\n limit=3, # Return top 3 results\n search_params={\"metric_type\": \"IP\", \"params\": {}}, # Inner product distance\n output_fields=[\"text\"], # Return the text field\n)\n","import json\n\nretrieved_lines_with_distances = [\n (res[\"entity\"][\"text\"], res[\"distance\"]) for res in search_res[0]\n]\nprint(json.dumps(retrieved_lines_with_distances, indent=4))\n","context = \"\\n\".join(\n [line_with_distance[0] for line_with_distance in retrieved_lines_with_distances]\n)\n","SYSTEM_PROMPT = \"\"\"\nHuman: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.\n\"\"\"\nUSER_PROMPT = f\"\"\"\nUse the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.\n<context>\n{context}\n</context>\n<question>\n{question}\n</question>\n\"\"\"\n","gemini_model = genai.GenerativeModel(\n \"gemini-1.5-flash\", system_instruction=SYSTEM_PROMPT\n)\nresponse = gemini_model.generate_content(USER_PROMPT)\nprint(response.text)\n"],"headingContent":"Build RAG with Milvus and Gemini","anchorList":[{"label":"RAG mit Milvus und Gemini aufbauen","href":"Build-RAG-with-Milvus-and-Gemini","type":1,"isActive":false},{"label":"Vorbereitung","href":"Preparation","type":2,"isActive":false},{"label":"Laden Sie Daten in Milvus","href":"Load-data-into-Milvus","type":2,"isActive":false},{"label":"RAG erstellen","href":"Build-RAG","type":2,"isActive":false}]} |
Oops, something went wrong.