From 935bea674ca0e13554f815717ca88a7e0cdbe40d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Paulo=20P=C3=A9rez-Tejada?= Date: Mon, 11 Dec 2023 17:35:26 -0600 Subject: [PATCH 1/3] refactor: separate app components (#17) --- app.py | 130 +++---------------------------- {src => gnosis}/agent.py | 2 +- gnosis/builder.py | 25 ++++++ {src => gnosis}/chroma_client.py | 0 gnosis/components/handlers.py | 15 ++++ gnosis/components/main.py | 56 +++++++++++++ gnosis/components/sidebar.py | 66 ++++++++++++++++ {src => gnosis}/gui_messages.py | 6 ++ {src => gnosis}/search.py | 0 {src => gnosis}/settings.py | 0 10 files changed, 181 insertions(+), 119 deletions(-) rename {src => gnosis}/agent.py (97%) create mode 100644 gnosis/builder.py rename {src => gnosis}/chroma_client.py (100%) create mode 100644 gnosis/components/handlers.py create mode 100644 gnosis/components/main.py create mode 100644 gnosis/components/sidebar.py rename {src => gnosis}/gui_messages.py (69%) rename {src => gnosis}/search.py (100%) rename {src => gnosis}/settings.py (100%) diff --git a/app.py b/app.py index b7bdffa..af0f865 100644 --- a/app.py +++ b/app.py @@ -1,139 +1,33 @@ -""" A simple example of Streamlit. """ -import textwrap +""" A Streamlit app for GnosisPages. """ import os -import tiktoken -import fitz import streamlit as st import openai from dotenv import load_dotenv -from langchain.chat_models import ChatOpenAI -from langchain.callbacks import StreamlitCallbackHandler -from src.chroma_client import ChromaDB -import src.gui_messages as gm -from src import settings - -from src.agent import PDFExplainer +from gnosis.chroma_client import ChromaDB +import gnosis.gui_messages as gm +from gnosis import settings +from gnosis.components.sidebar import sidebar +from gnosis.components.main import main load_dotenv() -def set_api_key(): - """Set the OpenAI API key.""" - openai.api_key = st.session_state.api_key - st.session_state.api_message = gm.api_message(openai.api_key) - - -def click_wk_button(): - """Set the OpenAI API key.""" - st.session_state.wk_button = not st.session_state.wk_button - - openai.api_key = os.getenv("OPENAI_API_KEY") if "api_message" not in st.session_state: st.session_state.api_message = gm.api_message(openai.api_key) +if "wk_button" not in st.session_state: + st.session_state.wk_button = False + + # Build settings chroma_db = ChromaDB(openai.api_key) collection = settings.build(chroma_db) # Sidebar -with st.sidebar: - st.write("## OpenAI API key") - openai.api_key = st.text_input( - "Enter OpenAI API key", - value="", - type="password", - key="api_key", - placeholder="Enter your OpenAI API key", - on_change=set_api_key, - label_visibility="collapsed", - ) - st.write( - "You can find your API key at https://platform.openai.com/account/api-keys" - ) - if "wk_button" not in st.session_state: - st.session_state.wk_button = False - - st.checkbox( - "Use Wikipedia", on_change=click_wk_button, value=st.session_state.wk_button - ) - st.subheader("Creativity") - st.write("The higher the value, the crazier the text.") - st.slider( - "Temperature", - min_value=0.0, - max_value=2.0, - value=0.9, - step=0.01, - key="temperature", - ) - - if st.button("Delete collection"): - st.warning("Are you sure?") - if st.button("Yes"): - try: - chroma_db.delete_collection(collection.name) - except AttributeError: - st.error("Collection erased.") - -# Main -st.title("GnosisPages") -st.subheader("Create your knowledge base") - -## Uploader - -st.write( - "Upload, extract and consult the content of PDF Files for builiding your knowledge base!" -) -pdf = st.file_uploader("Upload a file", type="pdf") - -if pdf is not None: - with fitz.open(stream=pdf.read(), filetype="pdf") as doc: # open document - with st.spinner("Extracting text..."): - text = chr(12).join([page.get_text() for page in doc]) - st.subheader("Text preview") - st.write(text[0:300] + "...") - if st.button("Save chunks"): - with st.spinner("Saving chunks..."): - chunks = textwrap.wrap(text, 1250) - for idx, chunk in enumerate(chunks): - encoding = tiktoken.get_encoding("cl100k_base") - num_tokens = len(encoding.encode(chunk)) - collection.add( - documents=[chunk], - metadatas=[{"source": pdf.name, "num_tokens": num_tokens}], - ids=[pdf.name + str(idx)], - ) -else: - st.write("Please upload a file of type: pdf") - -st.subheader("Consult your knowledge base") - - -prompt = st.chat_input() - -if prompt: - # Create Agent - try: - openai_api_key = openai.api_key - llm = ChatOpenAI( - temperature=st.session_state.temperature, - model="gpt-3.5-turbo-16k", - api_key=openai.api_key, - ) - agent = PDFExplainer( - llm, - chroma_db, - extra_tools=st.session_state.wk_button, - ).agent - except Exception: # pylint: disable=broad-exception-caught - st.warning("Missing OpenAI API Key.") +sidebar(chroma_db, collection) - st.chat_message("user").write(prompt) - with st.chat_message("assistant"): - st_callback = StreamlitCallbackHandler(st.container()) - response = agent.run(prompt, callbacks=[st_callback]) - st.write(response) +main(openai.api_key, chroma_db, collection) diff --git a/src/agent.py b/gnosis/agent.py similarity index 97% rename from src/agent.py rename to gnosis/agent.py index 6980c87..54aca92 100644 --- a/src/agent.py +++ b/gnosis/agent.py @@ -1,7 +1,7 @@ """An Langchain Agent that uses ChromaDB as a query tool""" from langchain.agents import AgentType, initialize_agent, load_tools from langchain.tools import Tool -from src.search import Search +from gnosis.search import Search class PDFExplainer: diff --git a/gnosis/builder.py b/gnosis/builder.py new file mode 100644 index 0000000..7798b11 --- /dev/null +++ b/gnosis/builder.py @@ -0,0 +1,25 @@ +"""Module for building the Langchain Agent""" +import streamlit as st +from langchain.chat_models import ChatOpenAI +from gnosis.agent import PDFExplainer + + +def build(key, client): + """An Agent builder""" + # Build Agent + try: + print(str(st.session_state.temperature)) + llm = ChatOpenAI( + temperature=st.session_state.temperature, + model="gpt-3.5-turbo-16k", + api_key=key, + ) + agent = PDFExplainer( + llm, + client, + extra_tools=st.session_state.wk_button, + ).agent + except Exception: # pylint: disable=broad-exception-caught + st.warning("Missing OpenAI API Key.") + + return agent diff --git a/src/chroma_client.py b/gnosis/chroma_client.py similarity index 100% rename from src/chroma_client.py rename to gnosis/chroma_client.py diff --git a/gnosis/components/handlers.py b/gnosis/components/handlers.py new file mode 100644 index 0000000..c1780d6 --- /dev/null +++ b/gnosis/components/handlers.py @@ -0,0 +1,15 @@ +"""Handler functions for the components""" +import streamlit as st +import openai +import gnosis.gui_messages as gm + + +def set_api_key(): + """Set the OpenAI API key.""" + openai.api_key = st.session_state.api_key + st.session_state.api_message = gm.api_message(openai.api_key) + + +def click_wk_button(): + """Set the OpenAI API key.""" + st.session_state.wk_button = not st.session_state.wk_button diff --git a/gnosis/components/main.py b/gnosis/components/main.py new file mode 100644 index 0000000..4345ae9 --- /dev/null +++ b/gnosis/components/main.py @@ -0,0 +1,56 @@ +"""Main component""" +import textwrap +import tiktoken +import fitz +import streamlit as st +from langchain.callbacks import StreamlitCallbackHandler +import gnosis.gui_messages as gm +from gnosis.builder import build + + +def uploader(collection): + """Component for upload files""" + st.write( + "Upload, extract and consult the content of PDF Files for builiding your knowledge base!" + ) + pdf = st.file_uploader("Upload a file", type="pdf") + + if pdf is not None: + with fitz.open(stream=pdf.read(), filetype="pdf") as doc: # open document + with st.spinner("Extracting text..."): + text = chr(12).join([page.get_text() for page in doc]) + st.subheader("Text preview") + st.write(text[0:300] + "...") + if st.button("Save chunks"): + with st.spinner("Saving chunks..."): + chunks = textwrap.wrap(text, 1250) + for idx, chunk in enumerate(chunks): + encoding = tiktoken.get_encoding("cl100k_base") + num_tokens = len(encoding.encode(chunk)) + collection.add( + documents=[chunk], + metadatas=[{"source": pdf.name, "num_tokens": num_tokens}], + ids=[pdf.name + str(idx)], + ) + else: + st.write("Please upload a file of type: pdf") + + +def main(key, client, collection): + """Main component""" + gm.header() + + uploader(collection) + + st.subheader("Consult your knowledge base") + + prompt = st.chat_input() + + if prompt: + agent = build(key, client) + + st.chat_message("user").write(prompt) + with st.chat_message("assistant"): + st_callback = StreamlitCallbackHandler(st.container()) + response = agent.run(prompt, callbacks=[st_callback]) + st.write(response) diff --git a/gnosis/components/sidebar.py b/gnosis/components/sidebar.py new file mode 100644 index 0000000..e5d3da8 --- /dev/null +++ b/gnosis/components/sidebar.py @@ -0,0 +1,66 @@ +"""Sidebar component for the Streamlit app.""" +import streamlit as st +import openai +from gnosis.components.handlers import set_api_key, click_wk_button + + +def delete_collection(client, collection): + """Delete collection button.""" + if st.button("Delete collection"): + st.warning("Are you sure?") + if st.button("Yes"): + try: + client.delete_collection(collection.name) + except AttributeError: + st.error("Collection erased.") + + +def openai_api_key_box(): + """Box for entrying OpenAi API Key""" + st.sidebar.write("## OpenAI API key") + openai.api_key = st.sidebar.text_input( + "Enter OpenAI API key", + value="", + type="password", + key="api_key", + placeholder="Enter your OpenAI API key", + on_change=set_api_key, + label_visibility="collapsed", + ) + st.sidebar.write( + "You can find your API key at https://platform.openai.com/account/api-keys" + ) + + +def creativity_slider(): + """Slider with temperature level""" + st.sidebar.subheader("Creativity") + st.sidebar.write("The higher the value, the crazier the text.") + st.sidebar.slider( + "Temperature", + min_value=0.0, + max_value=1.25, # Max level is 2, but it's too stochastic + value=0.5, + step=0.01, + key="temperature", + ) + + +def wk_checkbox(): + """Wikipedia Checkbox for changing state""" + st.sidebar.checkbox( + "Use Wikipedia", on_change=click_wk_button, value=st.session_state.wk_button + ) + + +# Sidebar +def sidebar(client, collection): + """Sidebar component for the Streamlit app.""" + with st.sidebar: + openai_api_key_box() + + wk_checkbox() + + creativity_slider() + + delete_collection(client, collection) diff --git a/src/gui_messages.py b/gnosis/gui_messages.py similarity index 69% rename from src/gui_messages.py rename to gnosis/gui_messages.py index 84e8c32..4071301 100644 --- a/src/gui_messages.py +++ b/gnosis/gui_messages.py @@ -2,6 +2,12 @@ import streamlit as st +def header(): + """A header""" + st.title("GnosisPages") + st.subheader("Create your knowledge base") + + def api_message(api_key): """Inform if the api key is set.""" if api_key is None: diff --git a/src/search.py b/gnosis/search.py similarity index 100% rename from src/search.py rename to gnosis/search.py diff --git a/src/settings.py b/gnosis/settings.py similarity index 100% rename from src/settings.py rename to gnosis/settings.py From f4a581e4f94c5723ed77c9dccd80947ee7a46e04 Mon Sep 17 00:00:00 2001 From: Juan Perez Tejada Date: Mon, 11 Dec 2023 17:58:43 -0600 Subject: [PATCH 2/3] Deactivate verbose mode and remove print --- gnosis/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gnosis/agent.py b/gnosis/agent.py index 54aca92..21f7ea8 100644 --- a/gnosis/agent.py +++ b/gnosis/agent.py @@ -27,7 +27,7 @@ def __init__(self, llm, chroma_db, extra_tools=False): self.tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, - verbose=True, + verbose=False, handle_parsing_errors=True, ) @@ -41,6 +41,6 @@ def replace_agent(self, agent: AgentType, llm): self.tools, llm, agent=agent, - verbose=True, + verbose=False, handle_parsing_errors=True, ) From 967c610ce2e4e76f820ecfbd75ec160814074bf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Paulo=20P=C3=A9rez-Tejada?= Date: Mon, 11 Dec 2023 18:04:56 -0600 Subject: [PATCH 3/3] refactor: separate app components