Skip to content

Commit

Permalink
Qjufa chat 1414 (#1539)
Browse files Browse the repository at this point in the history
issue: #1414 


## Todos
- [x] Fix datetime filters when recalling the memory
- [x] Fix unrelated memories in chat response
  • Loading branch information
beastoin authored Dec 16, 2024
2 parents 63d9a6a + b70bad1 commit bef34ac
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 42 deletions.
2 changes: 1 addition & 1 deletion backend/database/vector_db.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
from collections import defaultdict
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from typing import List

from pinecone import Pinecone
Expand Down
144 changes: 120 additions & 24 deletions backend/utils/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import re
from datetime import datetime
from datetime import datetime, timezone
from typing import List, Optional

import tiktoken
Expand Down Expand Up @@ -239,7 +239,7 @@ class DatesContext(BaseModel):
dates_range: List[datetime] = Field(default=[], description="Dates range. (Optional)")


def requires_context(messages: List[Message]) -> bool:
def requires_context_v1(messages: List[Message]) -> bool:
prompt = f'''
Based on the current conversation your task is to determine whether the user is asking a question or a follow up question that requires context outside the conversation to be answered.
Take as example: if the user is saying "Hi", "Hello", "How are you?", "Good morning", etc, the answer is False.
Expand All @@ -254,25 +254,72 @@ def requires_context(messages: List[Message]) -> bool:
except ValidationError:
return False

def requires_context(question: str) -> bool:
prompt = f'''
Based on the current question your task is to determine whether the user is asking a question that requires context outside the conversation to be answered.
Take as example: if the user is saying "Hi", "Hello", "How are you?", "Good morning", etc, the answer is False.
User's Question:
{question}
'''
with_parser = llm_mini.with_structured_output(RequiresContext)
response: RequiresContext = with_parser.invoke(prompt)
try:
return response.value
except ValidationError:
return False


class IsAnOmiQuestion(BaseModel):
value: bool = Field(description="If the message is an Omi/Friend related question")


def retrieve_is_an_omi_question(messages: List[Message]) -> bool:
def retrieve_is_an_omi_question_v1(messages: List[Message]) -> bool:
prompt = f'''
The user is using the chat functionality of an app known as Omi or Friend.
Based on the current conversation your task is to determine if the user is asking a question about the way you work, or how to use you or the app.
Questions like,
Questions like,
- "How does it work?"
- "What can you do?"
- "How can I buy it"
- "Where do I get it"
- "How the chat works?"
- ...
Conversation History:
Conversation History:
{Message.get_messages_as_string(messages)}
'''.replace(' ', '').strip()
with_parser = llm_mini.with_structured_output(IsAnOmiQuestion)
response: IsAnOmiQuestion = with_parser.invoke(prompt)
try:
return response.value
except ValidationError:
return False

def retrieve_is_an_omi_question_v2(messages: List[Message]) -> bool:
prompt = f'''
Task: Analyze the conversation to identify if the user is inquiring about the functionalities or usage of the app, Omi or Friend. Focus on detecting questions related to the app's operations or capabilities.
Examples of User Questions:
- "How does it work?"
- "What can you do?"
- "How can I buy it?"
- "Where do I get it?"
- "How does the chat function?"
Instructions:
1. Review the conversation history carefully.
2. Determine if the user is asking about:
- The operational aspects of the app.
- How to utilize the app effectively.
- Any specific features or purchasing options.
Output: Clearly state if the user is asking a question related to the app's functionality or usage. If yes, specify the nature of the inquiry.
Conversation Context:
{Message.get_messages_as_string(messages)}
'''.replace(' ', '').strip()
with_parser = llm_mini.with_structured_output(IsAnOmiQuestion)
Expand All @@ -283,14 +330,47 @@ def retrieve_is_an_omi_question(messages: List[Message]) -> bool:
return False


def retrieve_is_an_omi_question(question: str) -> bool:
prompt = f'''
Task: Analyze the question to identify if the user is inquiring about the functionalities or usage of the app, Omi or Friend. Focus on detecting questions related to the app's operations or capabilities.
Examples of User Questions:
- "How does it work?"
- "What can you do?"
- "How can I buy it?"
- "Where do I get it?"
- "How does the chat function?"
Instructions:
1. Review the question carefully.
2. Determine if the user is asking about:
- The operational aspects of the app.
- How to utilize the app effectively.
- Any specific features or purchasing options.
Output: Clearly state if the user is asking a question related to the app's functionality or usage. If yes, specify the nature of the inquiry.
User's Question:
{question}
'''.replace(' ', '').strip()
with_parser = llm_mini.with_structured_output(IsAnOmiQuestion)
response: IsAnOmiQuestion = with_parser.invoke(prompt)
try:
return response.value
except ValidationError:
return False


def retrieve_context_topics(messages: List[Message]) -> List[str]:
prompt = f'''
Based on the current conversation an AI and a User are having, for the AI to answer the latest user messages, it needs context outside the conversation.
Your task is to extract the correct and most accurate context in the conversation, to be used to retrieve more information.
Provide a list of topics in which the current conversation needs context about, in order to answer the most recent user request.
It is possible that the data needed is not related to a topic, in that case, output an empty list.
It is possible that the data needed is not related to a topic, in that case, output an empty list.
Conversation:
{Message.get_messages_as_string(messages)}
Expand All @@ -304,18 +384,16 @@ def retrieve_context_topics(messages: List[Message]) -> List[str]:
return topics


def retrieve_context_dates(messages: List[Message]) -> List[datetime]:
def retrieve_context_dates(messages: List[Message], tz: str) -> List[datetime]:
prompt = f'''
Based on the current conversation an AI and a User are having, for the AI to answer the latest user messages, it needs context outside the conversation.
Your task is to to find the dates range in which the current conversation needs context about, in order to answer the most recent user request.
For example, if the user request relates to "What did I do last week?", or "What did I learn yesterday", or "Who did I meet today?", the dates range should be provided.
For example, if the user request relates to "What did I do last week?", or "What did I learn yesterday", or "Who did I meet today?", the dates range should be provided.
Other type of dates, like historical events, or future events, should be ignored and an empty list should be returned.
For context, today is {datetime.now().isoformat()}.
Year: {datetime.now().year}, Month: {datetime.now().month}, Day: {datetime.now().day}
For context, today is {datetime.now(timezone.utc).strftime('%Y-%m-%d')} in UTC. {tz} is the user's timezone, convert it to UTC and respond in UTC.
Conversation:
{Message.get_messages_as_string(messages)}
Expand All @@ -324,6 +402,26 @@ def retrieve_context_dates(messages: List[Message]) -> List[datetime]:
response: DatesContext = with_parser.invoke(prompt)
return response.dates_range

def retrieve_context_dates_by_question(question: str, tz: str) -> List[datetime]:
prompt = f'''
Task: Identify the relevant date range needed to provide context for answering the user's recent question.
Instructions:
1. Use the current date for reference, which is {datetime.now(timezone.utc).strftime('%Y-%m-%d')} in UTC. Convert the user's timezone, {tz}, to UTC and respond accordingly.
2. Ignore requests related to historical or future events. For these, return an empty list.
3. Provide the date range in UTC
User's Question:
{question}
'''.replace(' ', '').strip()

with_parser = llm_mini.with_structured_output(DatesContext)
response: DatesContext = with_parser.invoke(prompt)
return response.dates_range


class SummaryOutput(BaseModel):
summary: str = Field(description="The extracted content, maximum 500 words.")
Expand Down Expand Up @@ -382,7 +480,7 @@ def answer_omi_question(messages: List[Message], context: str) -> str:
prompt = f"""
You are an assistant for answering questions about the app Omi, also known as Friend.
Continue the conversation, answering the question based on the context provided.
Context:
```
{context}
Expand All @@ -395,7 +493,6 @@ def answer_omi_question(messages: List[Message], context: str) -> str:
""".replace(' ', '').strip()
return llm_mini.invoke(prompt).content


def qa_rag(uid: str, question: str, context: str, plugin: Optional[Plugin] = None) -> str:
user_name, facts_str = get_prompt_facts(uid)
facts_str = '\n'.join(facts_str.split('\n')[1:]).strip()
Expand Down Expand Up @@ -698,7 +795,7 @@ def extract_question_from_conversation(messages: List[Message]) -> str:
prompt = f'''
You will be given a recent conversation within a user and an AI, \
there could be a few messages exchanged, and partly built up the proper question, \
your task is to understand the last few messages, and identify the single question or follow-up question the user is asking. \
your task is to understand THE LAST FEW MESSAGES, and identify the single question or follow-up question the user is asking. \
If the user is not asking a question or does not want to follow up, respond with an empty message.
Expand All @@ -711,9 +808,8 @@ def extract_question_from_conversation(messages: List[Message]) -> str:
'''.replace(' ', '').strip()
return llm_mini.with_structured_output(OutputQuestion).invoke(prompt).question


def retrieve_metadata_fields_from_transcript(
uid: str, created_at: datetime, transcript_segment: List[dict]
uid: str, created_at: datetime, transcript_segment: List[dict], tz: str
) -> ExtractedInformation:
transcript = ''
for segment in transcript_segment:
Expand All @@ -728,7 +824,7 @@ def retrieve_metadata_fields_from_transcript(
Make sure as a first step, you infer and fix the raw transcript errors and then proceed to extract the information.
For context when extracting dates, today is {created_at.strftime('%Y-%m-%d')}.
For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. {tz} is the user's timezone, convert it to UTC and respond in UTC.
If one says "today", it means the current day.
If one says "tomorrow", it means the next day after today.
If one says "yesterday", it means the day before today.
Expand Down
3 changes: 2 additions & 1 deletion backend/utils/memories/process_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def save_structured_vector(uid: str, memory: Memory, update_only: bool = False):
vector = generate_embedding(str(memory.structured)) if not update_only else None

segments = [t.dict() for t in memory.transcript_segments]
metadata = retrieve_metadata_fields_from_transcript(uid, memory.created_at, segments)
tz = notification_db.get_user_time_zone(uid)
metadata = retrieve_metadata_fields_from_transcript(uid, memory.created_at, segments, tz)
metadata['created_at'] = int(memory.created_at.timestamp())
if not update_only:
print('save_structured_vector creating vector')
Expand Down
47 changes: 31 additions & 16 deletions backend/utils/retrieval/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import database.memories as memories_db
from database.redis_db import get_filter_category_items
from database.vector_db import query_vectors_by_metadata
import database.notifications as notification_db
from models.chat import Message
from models.memory import Memory
from models.plugin import Plugin
Expand All @@ -20,6 +21,7 @@
requires_context,
answer_simple_message,
retrieve_context_dates,
retrieve_context_dates_by_question,
qa_rag,
retrieve_is_an_omi_question,
select_structured_filters,
Expand Down Expand Up @@ -48,6 +50,7 @@ class GraphState(TypedDict):
uid: str
messages: List[Message]
plugin_selected: Optional[Plugin]
tz: str

filters: Optional[StructuredFilters]
date_filters: Optional[DateRangeFilters]
Expand All @@ -59,22 +62,28 @@ class GraphState(TypedDict):
ask_for_nps: Optional[bool]


def determine_conversation(state: GraphState):
question = extract_question_from_conversation(state.get("messages", []))
print("determine_conversation parsed question:", question)
return {"parsed_question": question}


def determine_conversation_type(
s: GraphState,
state: GraphState,
) -> Literal["no_context_conversation", "context_dependent_conversation", "omi_question"]:
is_omi_question = retrieve_is_an_omi_question(s.get("messages", []))
# TODO: after asked many questions this is causing issues.
# TODO: an option to be considered, single prompt outputs response needed type (omi,no context, context, suggestion)
question = state.get("parsed_question", "")
if not question or len(question) == 0:
return "no_context_conversation"

is_omi_question = retrieve_is_an_omi_question(question)
if is_omi_question:
return "omi_question"

requires = requires_context(s.get("messages", []))

requires = requires_context(question)
if requires:
return "context_dependent_conversation"
return "no_context_conversation"


def no_context_conversation(state: GraphState):
print("no_context_conversation node")
return {"answer": answer_simple_message(state.get("uid"), state.get("messages")), "ask_for_nps": False}
Expand All @@ -88,12 +97,15 @@ def omi_question(state: GraphState):
return {'answer': answer, 'ask_for_nps': True}


def context_dependent_conversation(state: GraphState):
def context_dependent_conversation_v1(state: GraphState):
question = extract_question_from_conversation(state.get("messages", []))
print("context_dependent_conversation parsed question:", question)
return {"parsed_question": question}


def context_dependent_conversation(state: GraphState):
return state

# !! include a question extractor? node?


Expand All @@ -119,9 +131,9 @@ def retrieve_topics_filters(state: GraphState):
def retrieve_date_filters(state: GraphState):
print('retrieve_date_filters')
# TODO: if this makes vector search fail further, query firestore instead
dates_range = retrieve_context_dates(state.get("messages", []))
if dates_range and len(dates_range) == 2:
print('retrieve_date_filters dates_range:', dates_range)
dates_range = retrieve_context_dates_by_question(state.get("parsed_question", ""), state.get("tz", "UTC"))
print('retrieve_date_filters dates_range:', dates_range)
if dates_range and len(dates_range) >= 2:
return {"date_filters": {"start": dates_range[0], "end": dates_range[1]}}
return {"date_filters": {}}

Expand Down Expand Up @@ -163,10 +175,12 @@ def qa_handler(state: GraphState):

workflow = StateGraph(GraphState)

workflow.add_conditional_edges(
START,
determine_conversation_type,
)

workflow.add_edge(START, "determine_conversation")

workflow.add_node("determine_conversation", determine_conversation)

workflow.add_conditional_edges("determine_conversation", determine_conversation_type)

workflow.add_node("no_context_conversation", no_context_conversation)
workflow.add_node("omi_question", omi_question)
Expand Down Expand Up @@ -200,8 +214,9 @@ def execute_graph_chat(
uid: str, messages: List[Message], plugin: Optional[Plugin] = None
) -> Tuple[str, bool, List[Memory]]:
print('execute_graph_chat plugin :', plugin)
tz = notification_db.get_user_time_zone(uid)
result = graph.invoke(
{"uid": uid, "messages": messages, "plugin_selected": plugin},
{"uid": uid, "tz": tz, "messages": messages, "plugin_selected": plugin},
{"configurable": {"thread_id": str(uuid.uuid4())}},
)
return result.get("answer"), result.get('ask_for_nps', False), result.get("memories_found", [])
Expand Down

0 comments on commit bef34ac

Please sign in to comment.