From 11cc2a81ddfe7b2092fe298a903c4c25eb751382 Mon Sep 17 00:00:00 2001
From: john0isaac <johnaziz269@gmail.com>
Date: Fri, 21 Jun 2024 05:52:15 +0300
Subject: [PATCH 1/4] Add streaming

---
 frontend/src/pages/chat/Chat.tsx |  10 +--
 src/quartapp/app.py              |  61 +++++++++++++++-
 src/quartapp/approaches/rag.py   | 120 +++++++++++++++++++++++++------
 src/quartapp/config.py           |  25 +++++++
 4 files changed, 185 insertions(+), 31 deletions(-)
diff --git a/frontend/src/pages/chat/Chat.tsx b/frontend/src/pages/chat/Chat.tsx
index 6fcb9c3..b27ac8f 100644
--- a/frontend/src/pages/chat/Chat.tsx
+++ b/frontend/src/pages/chat/Chat.tsx
@@ -33,7 +33,7 @@ const Chat = () => {
 
     const [isLoading, setIsLoading] = useState<boolean>(false);
     const [isStreaming, setIsStreaming] = useState<boolean>(false);
-    const [shouldStream, setShouldStream] = useState<boolean>(false);
+    const [shouldStream, setShouldStream] = useState<boolean>(true);
     const [isBuy, setIsBuy] = useState<boolean>(false);
     const [address, setAddress] = useState<string>("");
     const [cartItems, setCartItems] = useState<string[]>([]);
@@ -97,12 +97,9 @@ const Chat = () => {
             for await (const event of readNDJSONStream(responseBody)) {
                 if (event["context"] && event["context"]["data_points"] && event["message"]) {
                     askResponse = event as ChatAppResponse;
-                } else if (event["message"]["content"]) {
+                } else if (event["content"]) {
                     setIsLoading(false);
-                    await updateState(event["message"]["content"]);
-                } else if (event["context"]) {
-                    // Update context with new keys from latest event
-                    askResponse.context = { ...askResponse.context, ...event["context"] };
+                    await updateState(event["content"]);
                 } else if (event["error"]) {
                     throw Error(event["error"]);
                 }
@@ -406,7 +403,6 @@ const Chat = () => {
                         onRenderLabel={(props: ICheckboxProps | undefined) => (
                             <HelpCallout labelId={shouldStreamId} fieldId={shouldStreamFieldId} helpText={toolTipText.streamChat} label={props?.label} />
                         )}
-                        disabled={true}
                     />
                 </Panel>
             </div>
diff --git a/src/quartapp/app.py b/src/quartapp/app.py
index 942a226..f93ff2e 100644
--- a/src/quartapp/app.py
+++ b/src/quartapp/app.py
@@ -1,10 +1,13 @@
 import logging
+from collections.abc import AsyncGenerator
+from dataclasses import asdict, is_dataclass
+from json import JSONEncoder, dumps
 from pathlib import Path
 from typing import Any
 
-from quart import Quart, Response, jsonify, request, send_file, send_from_directory
+from quart import Quart, Response, jsonify, make_response, request, send_file, send_from_directory
 
-from quartapp.approaches.schemas import RetrievalResponse
+from quartapp.approaches.schemas import Message, RetrievalResponse
 from quartapp.config import AppConfig
 
 logging.basicConfig(
@@ -14,6 +17,25 @@
 )
 
 
+class CustomJSONEncoder(JSONEncoder):
+    def default(self, o: Any) -> Any:
+        if is_dataclass(o):
+            return asdict(o)
+        return super().default(o)
+
+
+async def format_as_ndjson(r: AsyncGenerator[RetrievalResponse | Message, None]) -> AsyncGenerator[str, None]:
+    """
+    Format the response as NDJSON
+    """
+    try:
+        async for event in r:
+            yield dumps(event.to_dict(), ensure_ascii=False, cls=CustomJSONEncoder) + "\n"
+    except Exception as error:
+        logging.exception("Exception while generating response stream: %s", error)
+        yield dumps({"error": str(error)}, ensure_ascii=False) + "\n"
+
+
 def create_app(app_config: AppConfig, test_config: dict[str, Any] | None = None) -> Quart:
     app = Quart(__name__, static_folder="static")
 
@@ -56,8 +78,13 @@ async def chat() -> Any:
         if not body:
             return jsonify({"error": "request body is empty"}), 400
 
-        # Get the request message, session_state, context from the request body
+        # Get the request message
         messages: list = body.get("messages", [])
+
+        if not messages:
+            return jsonify({"error": "request must have a message"}), 400
+
+        # Get the request session_state, context from the request body
         session_state = body.get("session_state", None)
         context = body.get("context", {})
 
@@ -90,6 +117,34 @@ async def stream_chat() -> Any:
         if not body:
             return jsonify({"error": "request body is empty"}), 400
 
+        # Get the request message
+        messages: list = body.get("messages", [])
+
+        if not messages:
+            return jsonify({"error": "request must have a message"}), 400
+
+        # Get the request session_state, context from the request body
+        session_state = body.get("session_state", None)
+        context = body.get("context", {})
+
+        # Get the overrides from the context
+        override = context.get("overrides", {})
+        retrieval_mode: str = override.get("retrieval_mode", "vector")
+        temperature: float = override.get("temperature", 0.3)
+        top: int = override.get("top", 3)
+        score_threshold: float = override.get("score_threshold", 0.5)
+
+        if retrieval_mode == "rag":
+            result: AsyncGenerator[RetrievalResponse | Message, None] = app_config.run_rag_stream(
+                session_state=session_state,
+                messages=messages,
+                temperature=temperature,
+                limit=top,
+                score_threshold=score_threshold,
+            )
+            response = await make_response(format_as_ndjson(result))
+            response.mimetype = "application/x-ndjson"
+            return response
         return jsonify({"error": "Not Implemented!"}), 501
 
     return app
diff --git a/src/quartapp/approaches/rag.py b/src/quartapp/approaches/rag.py
index b26878d..789b9b1 100644
--- a/src/quartapp/approaches/rag.py
+++ b/src/quartapp/approaches/rag.py
@@ -1,25 +1,64 @@
-from langchain.chains.combine_documents import create_stuff_documents_chain
+import json
+from collections.abc import AsyncIterator
+
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.documents import Document
+from langchain_core.messages import BaseMessage
 
 from quartapp.approaches.base import ApproachesBase
+from quartapp.approaches.schemas import DataPoint
+
+
+def get_data_points(documents: list[Document]) -> list[DataPoint]:
+    data_points: list[DataPoint] = []
+
+    for res in documents:
+        raw_data = json.loads(res.page_content)
+        json_data_point: DataPoint = DataPoint()
+        json_data_point.name = raw_data.get("name")
+        json_data_point.description = raw_data.get("description")
+        json_data_point.price = raw_data.get("price")
+        json_data_point.category = raw_data.get("category")
+        data_points.append(json_data_point)
+    return data_points
+
 
-chat_history_prompt = """\
+REPHRASE_PROMPT = """\
 Given the following conversation and a follow up question, rephrase the follow up \
 question to be a standalone question.
 
 Chat History:
-{messages}
-
+{chat_history}
+Follow Up Input: {question}
 Standalone Question:"""
 
-context_prompt = """You are a chatbot that can have a conversation about Food dishes from the context.
-Answer the following question based only on the provided context:
+CONTEXT_PROMPT = """\
+You are a restaurant chatbot, tasked with answering any question about \
+food dishes from the contex.
+
+Generate a response of 80 words or less for the \
+given question based solely on the provided search results (name, description, price, and category). \
+You must only use information from the provided search results. Use an unbiased and \
+fun tone. Do not repeat text. Your response must be solely based on the provided context.
+
+If there is nothing in the context relevant to the question at hand, just say "Hmm, \
+I'm not sure." Don't try to make up an answer.
+
+Anything between the following `context` html blocks is retrieved from a knowledge \
+bank, not part of the conversation with the user. 
 
-Context:
-{context}
+<context>
+    {context} 
+<context/>
 
-Question: {input}"""
+REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
+not sure." Don't try to make up an answer. Anything between the preceding 'context' \
+html blocks is retrieved from a knowledge bank, not part of the conversation with the \
+user.\
+
+User Question: {input}
+
+Chatbot Response:"""
 
 
 class RAG(ApproachesBase):
@@ -35,31 +74,70 @@ async def run(
             self._chat.temperature = 0.3
 
             # Create a vector context aware chat retriever
-            history_prompt_template = ChatPromptTemplate.from_template(chat_history_prompt)
-            history_chain = history_prompt_template | self._chat
+            rephrase_prompt_template = ChatPromptTemplate.from_template(REPHRASE_PROMPT)
+            rephrase_chain = rephrase_prompt_template | self._chat
 
             # Rephrase the question
-            rephrased_question = await history_chain.ainvoke({"messages": messages})
+            rephrased_question = await rephrase_chain.ainvoke({"chat_history": messages[:-1], "question": messages[-1]})
 
             print(rephrased_question.content)
             # Perform vector search
             vector_context = await retriever.ainvoke(str(rephrased_question.content))
+            data_points: list[DataPoint] = get_data_points(vector_context)
 
             # Create a vector context aware chat retriever
-            context_prompt_template = ChatPromptTemplate.from_template(context_prompt)
-            document_chain = create_stuff_documents_chain(self._chat, context_prompt_template)
-
+            context_prompt_template = ChatPromptTemplate.from_template(CONTEXT_PROMPT)
             self._chat.temperature = temperature
+            context_chain = context_prompt_template | self._chat
 
-            if vector_context:
+            if data_points:
                 # Perform RAG search
-                response = await document_chain.ainvoke(
-                    {"context": vector_context, "input": rephrased_question.content}
+                response = await context_chain.ainvoke(
+                    {"context": [dp.to_dict() for dp in data_points], "input": rephrased_question.content}
                 )
 
-                return vector_context, response
+                return vector_context, str(response.content)
 
             # Perform RAG search with no context
-            response = await document_chain.ainvoke({"context": vector_context, "input": rephrased_question.content})
-            return [], response
+            response = await context_chain.ainvoke({"context": [], "input": rephrased_question.content})
+            return [], str(response.content)
         return [], ""
+
+    async def run_stream(
+        self, messages: list, temperature: float, limit: int, score_threshold: float
+    ) -> tuple[list[Document], AsyncIterator[BaseMessage]]:
+        # Create a vector store retriever
+        retriever = self._vector_store.as_retriever(
+            search_type="similarity", search_kwargs={"k": limit, "score_threshold": score_threshold}
+        )
+
+        self._chat.temperature = 0.3
+
+        # Create a vector context aware chat retriever
+        rephrase_prompt_template = ChatPromptTemplate.from_template(REPHRASE_PROMPT)
+        rephrase_chain = rephrase_prompt_template | self._chat
+
+        # Rephrase the question
+        rephrased_question = await rephrase_chain.ainvoke({"chat_history": messages[:-1], "question": messages[-1]})
+
+        print(rephrased_question.content)
+        # Perform vector search
+        vector_context = await retriever.ainvoke(str(rephrased_question.content))
+        data_points: list[DataPoint] = get_data_points(vector_context)
+
+        # Create a vector context aware chat retriever
+        context_prompt_template = ChatPromptTemplate.from_template(CONTEXT_PROMPT)
+        self._chat.temperature = temperature
+        context_chain = context_prompt_template | self._chat
+
+        if data_points:
+            # Perform RAG search
+            response = context_chain.astream(
+                {"context": [dp.to_dict() for dp in data_points], "input": rephrased_question.content}
+            )
+
+            return vector_context, response
+
+        # Perform RAG search with no context
+        response = context_chain.astream({"context": [], "input": rephrased_question.content})
+        return [], response
diff --git a/src/quartapp/config.py b/src/quartapp/config.py
index 43eedbe..8b06e21 100644
--- a/src/quartapp/config.py
+++ b/src/quartapp/config.py
@@ -1,4 +1,5 @@
 import json
+from collections.abc import AsyncGenerator
 from uuid import uuid4
 
 from quartapp.approaches.schemas import Context, DataPoint, Message, RetrievalResponse, Thought
@@ -109,3 +110,27 @@ async def run_rag(
         )
 
         return RetrievalResponse(context, message, new_session_state)
+
+    async def run_rag_stream(
+        self, session_state: str | None, messages: list, temperature: float, limit: int, score_threshold: float
+    ) -> AsyncGenerator[RetrievalResponse | Message, None]:
+        rag_response, answer = await self.setup.rag.run_stream(messages, temperature, limit, score_threshold)
+
+        new_session_state: str = session_state if session_state else str(uuid4())
+
+        context: Context = await self.get_context(rag_response)
+
+        empty_message: Message = Message(content="", role="assistant")
+
+        yield RetrievalResponse(context, empty_message, new_session_state)
+
+        async for message_chunk in answer:
+            message: Message = Message(content=str(message_chunk.content), role="assistant")
+            yield message
+
+        await self.add_to_cosmos(
+            old_messages=messages,
+            new_message=message.to_dict(),
+            session_state=session_state,
+            new_session_state=new_session_state,
+        )

From 4bbac786691e64f07ff59dc4d37e796fe0c794dd Mon Sep 17 00:00:00 2001
From: john0isaac <johnaziz269@gmail.com>
Date: Fri, 21 Jun 2024 05:54:05 +0300
Subject: [PATCH 2/4] partial fix for tests

---
 tests/conftest.py           | 29 ++++++++---------------------
 tests/test_app_endpoints.py | 16 +++++++++++++---
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index f5b4ad7..1293425 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import mongomock
@@ -56,19 +57,16 @@ def approaches_base_mock():
     # Mock Chat
     mock_chat = MagicMock()
 
-    mock_content = MagicMock()
-    mock_content.content = "content"
+    @dataclass
+    class MockContent:
+        content: str
 
-    runnable_return = MagicMock()
-    runnable_return.to_json = MagicMock(return_value={"kwargs": {"messages": [mock_content]}})
+    mock_content = MockContent(content="content")
 
-    mock_runnable = MagicMock()
-    mock_runnable.ainvoke = AsyncMock(return_value=runnable_return)
-
-    mock_chat.__or__.return_value = mock_runnable
-
-    mock_chat.ainvoke = AsyncMock(return_value=mock_content)
+    mock_chat.__or__ = MagicMock()
+    mock_chat.__or__.return_value.ainvoke = AsyncMock(return_value=mock_content)
 
+    # Mock Data Collection
     mock_mongo_document = {"textContent": mock_document.page_content, "source": "test"}
     mock_data_collection = MagicMock()
     mock_data_collection.find = MagicMock()
@@ -161,17 +159,6 @@ def app_config_mock(setup_mock):
     return app_config
 
 
-@pytest.fixture(autouse=True)
-def create_stuff_documents_chain_mock(monkeypatch):
-    """Mock quartapp.approaches.rag.create_stuff_documents_chain."""
-    document_chain_mock = MagicMock()
-    document_chain_mock.ainvoke = AsyncMock(return_value="content")
-    _mock = MagicMock()
-    _mock.return_value = document_chain_mock
-    monkeypatch.setattr(quartapp.approaches.rag, quartapp.approaches.rag.create_stuff_documents_chain.__name__, _mock)
-    return _mock
-
-
 @pytest.fixture(autouse=True)
 def setup_data_collection_mock(monkeypatch):
     """Mock quartapp.approaches.setup.setup_data_collection."""
diff --git a/tests/test_app_endpoints.py b/tests/test_app_endpoints.py
index 219272c..e57e197 100644
--- a/tests/test_app_endpoints.py
+++ b/tests/test_app_endpoints.py
@@ -98,7 +98,12 @@ async def test_chat_no_message_400(client):
 async def test_chat_not_implemented_501(client):
     """test the chat route with a retrieval_mode not implemented"""
     response: Response = await client.post(
-        "/chat", json={"context": {"overrides": {"retrieval_mode": "not_implemented"}}}
+        "/chat",
+        json={
+            "session_state": "test",
+            "messages": [{"content": "test"}],
+            "context": {"overrides": {"retrieval_mode": "not_implemented"}},
+        },
     )
 
     assert response.status_code == 501
@@ -114,7 +119,7 @@ async def test_chat_rag_option(client_mock):
         "/chat",
         json={
             "session_state": "test",
-            "messages": [{"content": "test"}],
+            "messages": [{"content": "test"}, {"content": "test2"}],
             "context": {"overrides": {"retrieval_mode": "rag"}},
         },
     )
@@ -250,7 +255,12 @@ async def test_chat_stream_no_message_400(client):
 async def test_chat_stream_not_implemented_501(client):
     """test the chat route with a retrieval_mode not implemented"""
     response: Response = await client.post(
-        "/chat/stream", json={"context": {"overrides": {"retrieval_mode": "not_implemented"}}}
+        "/chat/stream",
+        json={
+            "session_state": "test",
+            "messages": [{"content": "test"}],
+            "context": {"overrides": {"retrieval_mode": "not_implemented"}},
+        },
     )
 
     assert response.status_code == 501

From 09b310e7e20eb6b63fa5a1c8e86ab30d3a2eed5e Mon Sep 17 00:00:00 2001
From: john0isaac <johnaziz269@gmail.com>
Date: Sat, 22 Jun 2024 00:25:08 +0300
Subject: [PATCH 3/4] fix tests and remove unnessacry cls

---
 src/quartapp/app.py | 12 ++----------
 tests/conftest.py   | 32 ++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/quartapp/app.py b/src/quartapp/app.py
index f93ff2e..1212301 100644
--- a/src/quartapp/app.py
+++ b/src/quartapp/app.py
@@ -1,7 +1,6 @@
 import logging
 from collections.abc import AsyncGenerator
-from dataclasses import asdict, is_dataclass
-from json import JSONEncoder, dumps
+from json import dumps
 from pathlib import Path
 from typing import Any
 
@@ -17,20 +16,13 @@
 )
 
 
-class CustomJSONEncoder(JSONEncoder):
-    def default(self, o: Any) -> Any:
-        if is_dataclass(o):
-            return asdict(o)
-        return super().default(o)
-
-
 async def format_as_ndjson(r: AsyncGenerator[RetrievalResponse | Message, None]) -> AsyncGenerator[str, None]:
     """
     Format the response as NDJSON
     """
     try:
         async for event in r:
-            yield dumps(event.to_dict(), ensure_ascii=False, cls=CustomJSONEncoder) + "\n"
+            yield dumps(event.to_dict(), ensure_ascii=False) + "\n"
     except Exception as error:
         logging.exception("Exception while generating response stream: %s", error)
         yield dumps({"error": str(error)}, ensure_ascii=False) + "\n"
diff --git a/tests/conftest.py b/tests/conftest.py
index 1293425..337d04e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass
 from unittest.mock import AsyncMock, MagicMock, patch
 
+import langchain_core
 import mongomock
 import pytest
 import pytest_asyncio
@@ -47,25 +48,14 @@ def approaches_base_mock():
 
     # Mock Vector Store
     mock_vector_store = MagicMock()
-    mock_retriever = MagicMock()
     mock_document = Document(
         page_content='{"name": "test", "description": "test", "price": "5.0USD", "category": "test"}'
     )
-    mock_retriever.ainvoke = AsyncMock(return_value=[mock_document])  # Assume there is always a response
-    mock_vector_store.as_retriever.return_value = mock_retriever
+    mock_vector_store.as_retriever.return_value.ainvoke = AsyncMock(return_value=[mock_document])
 
     # Mock Chat
     mock_chat = MagicMock()
 
-    @dataclass
-    class MockContent:
-        content: str
-
-    mock_content = MockContent(content="content")
-
-    mock_chat.__or__ = MagicMock()
-    mock_chat.__or__.return_value.ainvoke = AsyncMock(return_value=mock_content)
-
     # Mock Data Collection
     mock_mongo_document = {"textContent": mock_document.page_content, "source": "test"}
     mock_data_collection = MagicMock()
@@ -167,6 +157,24 @@ def setup_data_collection_mock(monkeypatch):
     return _mock
 
 
+@pytest.fixture(autouse=True)
+def mock_runnable_or(monkeypatch):
+    """Mock langchain_core.runnables.base.Runnable.__or__."""
+
+    @dataclass
+    class MockContent:
+        content: str
+
+    or_return = MagicMock()
+    or_return.ainvoke = AsyncMock(return_value=MockContent(content="content"))
+    or_mock = MagicMock()
+    or_mock.return_value = or_return
+    monkeypatch.setattr(
+        langchain_core.runnables.base.Runnable, langchain_core.runnables.base.Runnable.__or__.__name__, or_mock
+    )
+    return or_mock
+
+
 @pytest_asyncio.fixture
 async def app_mock(app_config_mock):
     """Create a test app with the test config mock."""

From 2d5d224e6da9a332f47c22608b1e76ad7154997c Mon Sep 17 00:00:00 2001
From: john0isaac <johnaziz269@gmail.com>
Date: Sat, 22 Jun 2024 00:38:42 +0300
Subject: [PATCH 4/4] disable streaming by default as we can't stream vector or
 keyword responses

---
 frontend/src/pages/chat/Chat.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/pages/chat/Chat.tsx b/frontend/src/pages/chat/Chat.tsx
index b27ac8f..de460f7 100644
--- a/frontend/src/pages/chat/Chat.tsx
+++ b/frontend/src/pages/chat/Chat.tsx
@@ -33,7 +33,7 @@ const Chat = () => {
 
     const [isLoading, setIsLoading] = useState<boolean>(false);
     const [isStreaming, setIsStreaming] = useState<boolean>(false);
-    const [shouldStream, setShouldStream] = useState<boolean>(true);
+    const [shouldStream, setShouldStream] = useState<boolean>(false);
     const [isBuy, setIsBuy] = useState<boolean>(false);
     const [address, setAddress] = useState<string>("");
     const [cartItems, setCartItems] = useState<string[]>([]);