From 437a2e9e6a9857d1ade0224a90d9542a7c70c8cc Mon Sep 17 00:00:00 2001 From: John Aziz Date: Fri, 19 Jul 2024 16:35:30 +0300 Subject: [PATCH] update notebook and use url.parse --- .env.example | 2 + infra/main.bicep | 10 +- ...e-openai-cosmosdb-langchain-notebook.ipynb | 376 +++++++++--------- src/quartapp/config_base.py | 13 +- tests/test_app_endpoints.py | 1 - 5 files changed, 206 insertions(+), 196 deletions(-) rename CBD_Mongo_vCore.ipynb => rag-azure-openai-cosmosdb-langchain-notebook.ipynb (53%) diff --git a/.env.example b/.env.example index a8fab76..0f836a1 100644 --- a/.env.example +++ b/.env.example @@ -10,6 +10,8 @@ AZURE_OPENAI_ENDPOINT="https://.openai.azure.com/" AZURE_OPENAI_API_KEY="" # Environment variable obtained from Azure Cosmos DB for MongoDB vCore AZURE_COSMOS_CONNECTION_STRING="" +AZURE_COSMOS_USERNAME="" +AZURE_COSMOS_PASSWORD="" # Environment variables you set to be used by the code AZURE_COSMOS_DATABASE_NAME="" AZURE_COSMOS_COLLECTION_NAME="" diff --git a/infra/main.bicep b/infra/main.bicep index 60319e2..06f4f5e 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -133,12 +133,12 @@ module mongoCluster 'core/database/cosmos/mongo/cosmos-mongo-cluster.bicep' = { module keyVaultSecrets './core/security/keyvault-secret.bicep' = { dependsOn: [ mongoCluster ] - name: 'keyvault-secret-mongo-connstr' + name: 'keyvault-secret-mongo-password' scope: resourceGroup params: { - name: 'mongoConnectionStr' + name: 'mongoAdminPassword' keyVaultName: keyVault.outputs.name - secretValue: replace(replace(mongoCluster.outputs.connectionStringKey, '', mongoAdminUser), '', mongoAdminPassword) + secretValue: mongoAdminPassword } } @@ -165,7 +165,9 @@ module web 'core/host/appservice.bicep' = { AZURE_OPENAI_EMBEDDINGS_MODEL_NAME: embeddingModelName AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: embeddingDeploymentName AZURE_OPENAI_API_KEY: '@Microsoft.KeyVault(VaultName=${keyVault.outputs.name};SecretName=cognitiveServiceKey)' - AZURE_COSMOS_CONNECTION_STRING: '@Microsoft.KeyVault(VaultName=${keyVault.outputs.name};SecretName=mongoConnectionStr)' + AZURE_COSMOS_PASSWORD: '@Microsoft.KeyVault(VaultName=${keyVault.outputs.name};SecretName=mongoAdminPassword)' + AZURE_COSMOS_CONNECTION_STRING: mongoCluster.outputs.connectionStringKey + AZURE_COSMOS_USERNAME: mongoAdminUser AZURE_COSMOS_DATABASE_NAME: 'lc_database' AZURE_COSMOS_COLLECTION_NAME: 'lc_collection' } diff --git a/CBD_Mongo_vCore.ipynb b/rag-azure-openai-cosmosdb-langchain-notebook.ipynb similarity index 53% rename from CBD_Mongo_vCore.ipynb rename to rag-azure-openai-cosmosdb-langchain-notebook.ipynb index 6ef6403..280798c 100644 --- a/CBD_Mongo_vCore.ipynb +++ b/rag-azure-openai-cosmosdb-langchain-notebook.ipynb @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -46,7 +46,7 @@ "True" ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -57,7 +57,7 @@ "\n", "from dotenv import load_dotenv\n", "\n", - "load_dotenv()" + "load_dotenv(\".env\", override=True)" ] }, { @@ -71,19 +71,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Save the `api_type`, `api_base`, `api_version`, and `api_key` as global variables to avoid the need to supply them later in code." + "Save the `api_type`, `base_url`, `api_version`, and `api_key` as global variables to avoid the need to supply them later in code." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import openai\n", "\n", "openai.api_type = os.getenv(\"OPENAI_API_TYPE\", \"azure\")\n", - "openai.api_base = os.getenv(\"AZURE_OPENAI_ENDPOINT\", \"https://.openai.azure.com/\")\n", + "openai.base_url = os.getenv(\"AZURE_OPENAI_ENDPOINT\", \"https://.openai.azure.com/\")\n", "openai.api_version = os.getenv(\"OPENAI_API_VERSION\", \"2023-09-15-preview\")\n", "openai.api_key = os.getenv(\"OPENAI_API_KEY\", \"\")" ] @@ -97,23 +97,30 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11099/3617200806.py:9: UserWarning: You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n", + "/var/folders/db/x_x115ns61154jxycm1mwr780000gn/T/ipykernel_29265/1114347389.py:14: UserWarning: You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n", " mongo_client = MongoClient(mongo_connection_string)\n" ] } ], "source": [ "from pymongo import MongoClient\n", + "from urllib.parse import quote_plus\n", "\n", "# Read and Store Environment variables\n", "mongo_connection_string = os.getenv(\"AZURE_COSMOS_CONNECTION_STRING\", \"\")\n", + "mongo_username = quote_plus(os.getenv(\"AZURE_COSMOS_USERNAME\"))\n", + "mongo_password = quote_plus(os.getenv(\"AZURE_COSMOS_PASSWORD\"))\n", + "mongo_connection_string = mongo_connection_string.replace(\"\", mongo_username).replace(\n", + " \"\", mongo_password\n", + ")\n", + "\n", "collection_name = os.getenv(\"AZURE_COSMOS_COLLECTION_NAME\", \"collectionName\")\n", "database_name = os.getenv(\"AZURE_COSMOS_DATABASE_NAME\", \"DatabaseName\")\n", "\n", @@ -136,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -146,21 +153,18 @@ "\n", "SOURCE_FILE_NAME = \"./src/data/food_items.json\"\n", "\n", + "\n", "def read_data(file_path) -> list[Document]:\n", " # Load JSON file\n", " with open(file_path) as file:\n", " json_data = json.load(file)\n", "\n", - "\n", " documents = []\n", " absolute_path = os.path.abspath(file_path)\n", " # Process each item in the JSON data\n", " for idx, item in enumerate(json_data):\n", " documents.append(\n", - " Document(\n", - " page_content=json.dumps(item),\n", - " metadata={'source': absolute_path, 'seq_num': idx+1}\n", - " )\n", + " Document(page_content=json.dumps(item), metadata={\"source\": absolute_path, \"seq_num\": idx + 1})\n", " )\n", "\n", " return documents" @@ -168,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -177,14 +181,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "page_content='{\"category\": \"Smoothies\", \"name\": \"Jimmy Jam Smoothie\", \"description\": \"Berries n kale, strawberries, bananas, blueberries kale, tropical fruit blend, and dragon fruit. Our fruity tasty smoothies are blended to perfection.\", \"price\": \"5.49 USD\"}' metadata={'source': '/home/john/repos/Cosmic-Food-RAG-app/src/data/food_items.json', 'seq_num': 2}\n" + "page_content='{\"category\": \"Smoothies\", \"name\": \"Jimmy Jam Smoothie\", \"description\": \"Berries n kale, strawberries, bananas, blueberries kale, tropical fruit blend, and dragon fruit. Our fruity tasty smoothies are blended to perfection.\", \"price\": \"5.49 USD\"}' metadata={'source': '/Users/john0isaac/Developer/Cosmic-Food-RAG-app/src/data/food_items.json', 'seq_num': 2}\n" ] } ], @@ -202,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -226,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -246,14 +250,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/john/repos/Cosmic-Food-RAG-app/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/azure_cosmos_db.py:146: UserWarning: You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n", + "/Users/john0isaac/Developer/Cosmic-Food-RAG-app/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/azure_cosmos_db.py:146: UserWarning: You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n", " client: MongoClient = MongoClient(connection_string, appname=appname)\n" ] } @@ -262,7 +266,7 @@ "from langchain_community.vectorstores.azure_cosmos_db import AzureCosmosDBVectorSearch\n", "\n", "# Run this to connect to the vector store\n", - "vector_store: AzureCosmosDBVectorSearch = AzureCosmosDBVectorSearch.from_connection_string(\n", + "vector_store: AzureCosmosDBVectorSearch = AzureCosmosDBVectorSearch.from_connection_string(\n", " connection_string=mongo_connection_string,\n", " namespace=f\"{database_name}.{collection_name}\",\n", " embedding=azure_openai_embeddings,\n", @@ -278,7 +282,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -292,7 +296,7 @@ " 'ok': 1}" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -312,9 +316,7 @@ "ef_construction = 64\n", "\n", "# Create the collection and the index\n", - "vector_store.create_index(\n", - " num_lists, dimensions, similarity_algorithm, kind, m, ef_construction\n", - ")" + "vector_store.create_index(num_lists, dimensions, similarity_algorithm, kind, m, ef_construction)" ] }, { @@ -326,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -352,14 +354,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from langchain_openai import AzureChatOpenAI\n", "\n", "openai_chat_model = os.getenv(\"AZURE_OPENAI_CHAT_MODEL_NAME\", \"gpt-35-turbo\")\n", - "openai_chat_deployment= os.getenv(\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\", \"chat-gpt\")\n", + "openai_chat_deployment = os.getenv(\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\", \"chat-gpt\")\n", "\n", "azure_openai_chat: AzureChatOpenAI = AzureChatOpenAI(\n", " model=openai_chat_model,\n", @@ -369,14 +371,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Why did the tomato turn red? Because it saw the salad dressing!\n" + "Why did the tomato turn red?\n", + "\n", + "Because it saw the salad dressing!\n" ] } ], @@ -388,45 +392,27 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Education is the backbone of society. It is the foundation that enables individuals to acquire knowledge and skills, and it is through education that individuals are able to grow and develop. Education is not just about acquiring knowledge; it is also about learning how to think critically, how to communicate effectively, and how to become productive members of society. In this essay, I will explore the importance of education, the challenges facing education, and the future of education.\n", - "\n", - "The Importance of Education\n", - "\n", - "Education is important because it provides individuals with the knowledge and skills they need to succeed in life. Education enables individuals to acquire the skills they need to enter the workforce and to become productive members of society. It also provides individuals with the knowledge they need to make informed decisions about their lives, including decisions about their health, finances, and relationships.\n", - "\n", - "Education is also important because it promotes social mobility. Education provides individuals with the opportunity to move up the social ladder, regardless of their background. Education is the great equalizer, and it is through education that individuals can overcome the barriers of poverty, discrimination, and inequality.\n", - "\n", - "Challenges Facing Education\n", - "\n", - "Despite the importance of education, there are many challenges facing education today. One of the biggest challenges is the lack of funding. Education is expensive, and many schools and universities struggle to provide the resources they need to provide a quality education to their students. This lack of funding often leads to overcrowded classrooms, outdated materials and technology, and a lack of opportunities for students.\n", + "Education is a fundamental right of every individual and plays a crucial role in determining their future success. It is the process of acquiring knowledge, skills, values, and attitudes that enable individuals to lead a meaningful and fulfilling life. Education is not only important for personal development but also for the development of society as a whole.\n", "\n", - "Another challenge facing education is the lack of access to education for marginalized communities. Many individuals, particularly those living in poverty or in rural areas, do not have access to quality education. This lack of access often perpetuates the cycle of poverty and inequality, as individuals without education are unable to secure jobs that pay a living wage.\n", + "Education provides individuals with the necessary skills and knowledge to succeed in their chosen field. It equips them with the ability to think critically, solve problems, and make informed decisions. Education also helps individuals to develop their creativity and imagination, which is essential in today's rapidly changing world.\n", "\n", - "Finally, there is also a challenge of the quality of education. While some schools and universities provide a quality education, others do not. This often leads to a disparity in the quality of education that students receive, and this disparity can lead to a lack of opportunities for some individuals.\n", + "Moreover, education plays a vital role in promoting societal development. It helps to create a more educated and informed population, which can contribute to the growth and development of the economy. Education also promotes social cohesion and helps to reduce social inequalities by providing equal opportunities to all individuals.\n", "\n", - "The Future of Education\n", + "However, access to education is still a challenge for many people, especially in developing countries. Lack of access to education can lead to poverty, unemployment, and limited opportunities for personal growth. Therefore, there is a need to provide equal access to education for everyone, regardless of their background or economic status.\n", "\n", - "Despite the challenges facing education, there is also a bright future for education. With advances in technology, education is becoming more accessible than ever before. Online learning platforms, such as Coursera and Khan Academy, are providing individuals with access to quality education from the comfort of their own homes.\n", - "\n", - "In addition, there is also a growing emphasis on experiential learning. Experiential learning involves learning through doing, and it is becoming an increasingly popular method of education. This method of education provides students with the opportunity to apply what they have learned in real-world situations, which can lead to a deeper understanding of the material.\n", - "\n", - "Finally, there is also a growing emphasis on lifelong learning. Lifelong learning involves the idea that education is not just something that happens in school; it is something that happens throughout our lives. Lifelong learning encourages individuals to continue learning and growing throughout their lives, which can lead to personal and professional growth.\n", - "\n", - "Conclusion\n", - "\n", - "Education is the foundation of society. It provides individuals with the knowledge and skills they need to succeed in life, and it is through education that individuals can overcome the barriers of poverty, discrimination, and inequality. While there are many challenges facing education, there is also a bright future for education. With advances in technology, experiential learning, and lifelong learning, education is becoming more accessible and more effective than ever before. As we move forward, it is important to continue to invest in education and to ensure that everyone has access to quality education." + "In conclusion, education is a basic human right that should be accessible to all individuals. It is essential for personal development, societal growth, and economic prosperity. Therefore, it is the responsibility of governments, institutions, and individuals to ensure that everyone has access to quality education." ] } ], "source": [ - "chat_response = azure_openai_chat.astream(\"Write a 500 words essay about education.\")\n", + "chat_response = azure_openai_chat.astream(\"Write a 200 words essay about education.\")\n", "\n", "async for response in chat_response:\n", " print(response.content, end=\"\")" @@ -441,37 +427,57 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from langchain.prompts import ChatPromptTemplate\n", - "from langchain_core.prompts import MessagesPlaceholder\n", - "\n", - "history_prompt = ChatPromptTemplate.from_messages(\n", - " [\n", - " MessagesPlaceholder(variable_name=\"chat_history\"),\n", - " (\"user\", \"{input}\"),\n", - " (\n", - " \"user\",\n", - " \"\"\"Given the above conversation,\n", - " generate a search query to look up to get information relevant to the conversation\"\"\",\n", - " ),\n", - " ]\n", - ")\n", "\n", - "context_prompt = ChatPromptTemplate.from_messages(\n", - " [\n", - " (\"system\", \"Answer the user's questions based on the below context:\\n\\n{context}\"),\n", - " MessagesPlaceholder(variable_name=\"chat_history\"),\n", - " (\"user\", \"{input}\"),\n", - " ]\n", - ")" + "\n", + "REPHRASE_PROMPT = \"\"\"\\\n", + "Given the following conversation and a follow up question, rephrase the follow up \\\n", + "question to be a standalone question.\n", + "\n", + "Chat History:\n", + "{chat_history}\n", + "Follow Up Input: {question}\n", + "Standalone Question:\"\"\"\n", + "\n", + "CONTEXT_PROMPT = \"\"\"\\\n", + "You are a restaurant chatbot, tasked with answering any question about \\\n", + "food dishes from the contex.\n", + "\n", + "Generate a response of 80 words or less for the \\\n", + "given question based solely on the provided search results (name, description, price, and category). \\\n", + "You must only use information from the provided search results. Use an unbiased and \\\n", + "fun tone. Do not repeat text. Your response must be solely based on the provided context.\n", + "\n", + "If there is nothing in the context relevant to the question at hand, just say \"Hmm, \\\n", + "I'm not sure.\" Don't try to make up an answer.\n", + "\n", + "Anything between the following `context` html blocks is retrieved from a knowledge \\\n", + "bank, not part of the conversation with the user. \n", + "\n", + "\n", + " {context} \n", + "\n", + "\n", + "REMEMBER: If there is no relevant information within the context, just say \"Hmm, I'm \\\n", + "not sure.\" Don't try to make up an answer. Anything between the preceding 'context' \\\n", + "html blocks is retrieved from a knowledge bank, not part of the conversation with the \\\n", + "user.\\\n", + "\n", + "User Question: {input}\n", + "\n", + "Chatbot Response:\"\"\"\n", + "\n", + "rephrase_prompt_template = ChatPromptTemplate.from_template(REPHRASE_PROMPT)\n", + "context_prompt_template = ChatPromptTemplate.from_template(CONTEXT_PROMPT)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -482,28 +488,30 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-07-19 16:25:27,396] {logger.py:101} INFO - You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n", + "[2024-07-19 16:25:27,415] {logger.py:101} INFO - You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n", + "[2024-07-19 16:25:27,433] {logger.py:101} INFO - You appear to be connected to a CosmosDB cluster. For more information regarding feature compatibility and support please visit https://www.mongodb.com/supportability/cosmosdb\n" + ] + } + ], "source": [ - "from langchain.chains.combine_documents import create_stuff_documents_chain\n", - "from langchain.chains.history_aware_retriever import create_history_aware_retriever\n", - "from langchain.chains.retrieval import create_retrieval_chain\n", - "from langchain_core.runnables import Runnable\n", + "from quartapp.approaches.rag import get_data_points\n", "\n", + "# Vector Store Retriever\n", "vector_store_retriever = vector_store.as_retriever(\n", " search_type=search_type, search_kwargs={\"k\": limit, \"score_threshold\": score_threshold}\n", ")\n", - "\n", - "\n", - "retriever_chain = create_history_aware_retriever(azure_openai_chat, vector_store_retriever, history_prompt)\n", - "\n", - "context_chain = create_stuff_documents_chain(llm=azure_openai_chat, prompt=context_prompt)\n", - "\n", - "rag_chain: Runnable = create_retrieval_chain(\n", - " retriever=retriever_chain,\n", - " combine_docs_chain=context_chain,\n", - ")" + "# Rephrase Chain\n", + "rephrase_chain = rephrase_prompt_template | azure_openai_chat\n", + "# Context Chain\n", + "context_chain = context_prompt_template | azure_openai_chat" ] }, { @@ -515,137 +523,137 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 17, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-07-19 16:25:34,499] {_client.py:1026} INFO - HTTP Request: POST https://build-24-openai.openai.azure.com//openai/deployments/chat-gpt/chat/completions?api-version=2023-09-15-preview \"HTTP/1.1 200 OK\"\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Yes, we have two vegan options: the Beyond Burger and the Tofu Salad Sandwich.\n" + "What vegan options do you offer?\n" ] } ], "source": [ - "first_question = \"Do you have any vegan options?\"\n", - "chat_history = []\n", - "response = rag_chain.invoke({\"input\": first_question, \"chat_history\": chat_history})\n", - "print(response['answer'])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_core.messages import HumanMessage\n", - "\n", - "chat_history.extend([HumanMessage(content=first_question), response[\"answer\"]])\n", + "# 1. Rephrase the question\n", + "messages = [{\"content\": \"Do you have any vegan options?\", \"role\": \"user\"}]\n", "\n", - "second_question = \"What did I just ask you about?\"\n", - "response = rag_chain.invoke({\"input\": second_question, \"chat_history\": chat_history})" + "rephrased_question = rephrase_chain.invoke({\"chat_history\": messages[:-1], \"question\": messages[-1]})\n", + "print(rephrased_question.content)" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 18, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-07-19 16:25:37,722] {_client.py:1026} INFO - HTTP Request: POST https://build-24-openai.openai.azure.com//openai/deployments/text-embedding/embeddings?api-version=2023-09-15-preview \"HTTP/1.1 200 OK\"\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "You asked if there are any vegan options available.\n" + "[DataPoint(name='Beyond Burger', description='Served with Romaine lettuce, tomato, pickle, vegan mayonnaise, ketchup, and mustard on a toasted bun. Sandwich made with whole wheat bread. Can be made as a wrap in a whole wheat tortilla. Served with kettle potato chips or corn tortilla chips.', price='9.0 USD', category='Sandwiches', collection=None), DataPoint(name='Tofu Salad Sandwich', description='Served with Romaine lettuce, tomato, vegan mayonnaise, and mustard. Sandwich made with whole wheat bread. Can be made as a wrap in a whole wheat tortilla. Served with kettle potato chips or corn tortilla chips.', price='9.0 USD', category='Sandwiches', collection=None), DataPoint(name=\"Boca Chik'n Sandwich\", description='Served with Romaine lettuce, tomato, pickle, vegan mayonnaise, ketchup, and mustard on a toasted bun. Sandwich made with whole wheat bread. Can be made as a wrap in a whole wheat tortilla. Served with kettle potato chips or corn tortilla chips.', price='9.0 USD', category='Sandwiches', collection=None)]\n" ] } ], "source": [ - "print(response['answer'])" + "# 2. Get the context from the database and format it to remove the embeddings\n", + "vector_context = vector_store_retriever.invoke(str(rephrased_question.content))\n", + "data_points = get_data_points(vector_context)\n", + "print(data_points)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 19, "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-07-19 16:25:41,757] {_client.py:1026} INFO - HTTP Request: POST https://build-24-openai.openai.azure.com//openai/deployments/chat-gpt/chat/completions?api-version=2023-09-15-preview \"HTTP/1.1 200 OK\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We have several vegan options available, including the Beyond Burger, Tofu Salad Sandwich, and Boca Chik'n Sandwich. All of these sandwiches come with vegan mayonnaise, and can be made as a wrap in a whole wheat tortilla. They are each served with kettle potato chips or corn tortilla chips.\n" + ] + } + ], "source": [ - "# Test with Gradio" + "# 3. Generate a response based on the context\n", + "response = context_chain.invoke({\"context\": [dp.to_dict() for dp in data_points], \"input\": rephrased_question.content})\n", + "print(response.content)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ - "%pip install ipywidgets gradio" + "# 4. Store the chat history and the response\n", + "messages.append({\"content\": response.content, \"role\": \"assistant\"})" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2024-07-19 16:25:48,395] {_client.py:1026} INFO - HTTP Request: POST https://build-24-openai.openai.azure.com//openai/deployments/chat-gpt/chat/completions?api-version=2023-09-15-preview \"HTTP/1.1 200 OK\"\n", + "[2024-07-19 16:25:48,942] {_client.py:1026} INFO - HTTP Request: POST https://build-24-openai.openai.azure.com//openai/deployments/text-embedding/embeddings?api-version=2023-09-15-preview \"HTTP/1.1 200 OK\"\n", + "[2024-07-19 16:25:50,271] {_client.py:1026} INFO - HTTP Request: POST https://build-24-openai.openai.azure.com//openai/deployments/chat-gpt/chat/completions?api-version=2023-09-15-preview \"HTTP/1.1 200 OK\"\n" + ] + } + ], "source": [ - "import gradio as gr\n", - "\n", + "# Test with another question to see if the chat history is maintained\n", + "messages.append({\"content\": \"what is the price of the first dish?\", \"role\": \"user\"})\n", "\n", - "def setup_gradio_interface(chain): \n", - " with gr.Blocks() as demo_interface:\n", - " chatbot = gr.Chatbot(label=\"Food Ordering System\")\n", - " chat_history = gr.State([])\n", - " lc_chat_history = gr.State([])\n", - " msg = gr.Textbox(label=\"Your question\")\n", - " gr.ClearButton([msg, chatbot])\n", - " \n", - " def fetch_response(message, chat_history, lc_chat_history):\n", - " response = chain.invoke({\"question\": message, \"chat_history\": lc_chat_history})\n", - " lc_chat_history.append((message, response['answer']))\n", - " chat_history.append([message, response[\"answer\"]])\n", - " return \"\", chat_history, lc_chat_history\n", - "\n", - " msg.submit(fetch_response, inputs=[msg, chatbot, lc_chat_history], outputs=[msg, chatbot, lc_chat_history])\n", - " \n", - " return demo_interface" + "rephrased_question = rephrase_chain.invoke({\"chat_history\": messages[:-1], \"question\": messages[-1]})\n", + "vector_context = vector_store_retriever.invoke(str(rephrased_question.content))\n", + "data_points = get_data_points(vector_context)\n", + "response = context_chain.invoke({\"context\": [dp.to_dict() for dp in data_points], \"input\": rephrased_question.content})" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Running on local URL: http://127.0.0.1:7860\n", - "\n", - "To create a public link, set `share=True` in `launch()`.\n" + "Rephrased Question: What is the price of the Beyond Burger?\n", + "LLM Response: The price of the Beyond Burger is 9.0 USD. It's a delicious vegan burger served with Romaine lettuce, tomato, pickle, vegan mayonnaise, ketchup, and mustard on a toasted bun. You can also choose to have it made as a wrap in a whole wheat tortilla. It comes with your choice of kettle potato chips or corn tortilla chips.\n" ] - }, - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "food_ordering_demo = setup_gradio_interface(rag_chain)\n", - "food_ordering_demo.launch()" + "print(\"Rephrased Question: \", rephrased_question.content)\n", + "print(\"LLM Response: \", response.content)" ] }, { @@ -692,35 +700,23 @@ " \"m\": 4,\n", " \"efConstruction\": 16,\n", " \"similarity\": \"COS\",\n", - " \"dimensions\": 1536\n", - " }\n", + " \"dimensions\": 1536,\n", + " },\n", " }\n", - " ]\n", + " ],\n", "}\n", - "db.command(createIndexCommand)\n" + "db.command(createIndexCommand)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Score: 1.000] Ashunti`Way Smoothie: Fruit n greens, mango bananas, tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special green with strawberry bananas juice blend . Our fruity tasty smoothies are blended to perfection.\n", - "[Score: 0.986] Dayton 500 Smoothie: Tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples. Special green juice blend. Our fruity tasty smoothies are blended to perfection.\n", - "[Score: 0.973] Tongue Teaser Smoothie: Tropical fruit blend, dragon fruit, pineapples, bananas, mango, apples, spinach, ginger powder. Special green blend, pineapple and ginger smoothies. Our fruity tasty smoothies are blended to perfection.\n", - "[Score: 0.967] Tejay Impact Smoothie: Tropical fruit blend, dragon fruit mix, mango, bananas, pineapples, apples, and spinach. Special blue juice blend smoothies.\n", - "[Score: 0.961] Jimmy Jam Smoothie: Berries n kale, strawberries, bananas, blueberries kale, tropical fruit blend, and dragon fruit. Our fruity tasty smoothies are blended to perfection.\n" - ] - } - ], + "outputs": [], "source": [ - "search_pipeline = [ \n", - " { \"$search\": { \"cosmosSearch\": { \"query\": docs[0][\"description\"], \"k\": 5, \"path\": \"embeddings\", \"efSearch\": 100 }}} , \n", - " { \"$project\": { \"similarityScore\": { \"$meta\": \"searchScore\" }, \"_id\":0, \"name\":1, \"description\":1 } }\n", + "search_pipeline = [\n", + " {\"$search\": {\"cosmosSearch\": {\"query\": docs[0][\"description\"], \"k\": 5, \"path\": \"embeddings\", \"efSearch\": 100}}},\n", + " {\"$project\": {\"similarityScore\": {\"$meta\": \"searchScore\"}, \"_id\": 0, \"name\": 1, \"description\": 1}},\n", "]\n", "\n", "results = collection.aggregate(search_pipeline)\n", @@ -746,7 +742,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/src/quartapp/config_base.py b/src/quartapp/config_base.py index 6c8ba6c..fb04245 100644 --- a/src/quartapp/config_base.py +++ b/src/quartapp/config_base.py @@ -1,6 +1,7 @@ import json import os from abc import ABC, abstractmethod +from urllib.parse import quote_plus from langchain_core.documents import Document from pydantic.v1 import SecretStr @@ -15,13 +16,23 @@ from quartapp.approaches.setup import Setup +def read_and_parse_connection_string() -> str: + mongo_connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING", "YOUR-COSMOS-DB-CONNECTION-STRING") + mongo_username = quote_plus(os.getenv("AZURE_COSMOS_USERNAME", "YOUR-COSMOS-DB-USERNAME")) + mongo_password = quote_plus(os.getenv("AZURE_COSMOS_PASSWORD", "YOUR-COSMOS-DB-PASSWORD")) + mongo_connection_string = mongo_connection_string.replace("", mongo_username).replace( + "", mongo_password + ) + return mongo_connection_string + + class AppConfigBase(ABC): def __init__(self) -> None: openai_embeddings_model = os.getenv("AZURE_OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002") openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME", "text-embedding") openai_chat_model = os.getenv("AZURE_OPENAI_CHAT_MODEL_NAME", "gpt-35-turbo") openai_chat_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "chat-gpt") - connection_string = os.getenv("AZURE_COSMOS_CONNECTION_STRING", "") + connection_string = read_and_parse_connection_string() database_name = os.getenv("AZURE_COSMOS_DATABASE_NAME", "") collection_name = os.getenv("AZURE_COSMOS_COLLECTION_NAME", "") index_name = os.getenv("AZURE_COSMOS_INDEX_NAME", "") diff --git a/tests/test_app_endpoints.py b/tests/test_app_endpoints.py index e57e197..8b95196 100644 --- a/tests/test_app_endpoints.py +++ b/tests/test_app_endpoints.py @@ -40,7 +40,6 @@ async def test_favicon(client): favicon_file = f.read() assert response.status_code == 200 - assert response.content_type == "image/vnd.microsoft.icon" assert response.headers["Content-Length"] == str(len(favicon_file)) assert favicon_file == await response.data