diff --git a/.streamlit/config.yaml b/.streamlit/config.yaml index 21feaef..77c801c 100644 --- a/.streamlit/config.yaml +++ b/.streamlit/config.yaml @@ -10,6 +10,18 @@ credentials: logged_in: false name: John Doe password: $2b$12$iWlVOac3uujRvTrXDi6wructXftKmo/GyQd6SMu5FmyX306kH.yFO + stest: + email: sankettest@gmail.com + failed_login_attempts: 0 + logged_in: false + name: Sanket Test + password: $2b$12$iWlVOac3uujRvTrXDi6wructXftKmo/GyQd6SMu5FmyX306kH.yFO + stest2: + email: sankettest@gmail.com + failed_login_attempts: 0 + logged_in: false + name: Sanket Test + password: $2b$12$iWlVOac3uujRvTrXDi6wructXftKmo/GyQd6SMu5FmyX306kH.yFO rbriggs: email: rbriggs@gmail.com failed_login_attempts: 0 diff --git a/app.py b/app.py index 507f69e..bf836b5 100644 --- a/app.py +++ b/app.py @@ -15,14 +15,14 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.documents import Document from langchain_core.prompts import PromptTemplate -from langchain_chroma import Chroma +from langchain_community.vectorstores.chroma import Chroma import chromadb from chromadb.config import Settings from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer -from langchain.schema import Document from langchain.retrievers import ContextualCompressionRetriever from tei_rerank import TEIRerank +from transformers import AutoTokenizer import streamlit as st import streamlit_authenticator as stauth @@ -35,21 +35,22 @@ st.set_page_config(layout="wide", page_title="InSightful") + def authenticate(): - with open('.streamlit/config.yaml') as file: + with open(".streamlit/config.yaml") as file: config = yaml.load(file, Loader=SafeLoader) authenticator = stauth.Authenticate( - config['credentials'], - config['cookie']['name'], - config['cookie']['key'], - config['cookie']['expiry_days'], - config['pre-authorized'] + config["credentials"], + config["cookie"]["name"], + config["cookie"]["key"], + config["cookie"]["expiry_days"], + config["pre-authorized"], ) name, authentication_status, username = authenticator.login() - st.session_state['authentication_status'] = authentication_status - st.session_state['username'] = username + st.session_state["authentication_status"] = authentication_status + st.session_state["username"] = username return authenticator @@ -60,20 +61,20 @@ def setup_chroma_client(): host=os.getenv("VECTORDB_HOST", "localhost"), port=os.getenv("VECTORDB_PORT", "8000"), ), - settings=Settings(allow_reset=True, - anonymized_telemetry=False) - + settings=Settings(allow_reset=True, anonymized_telemetry=False), ) return client + # Set up Chroma embedding function -def setup_chroma_embedding_function(): - chroma_embedding_function = HuggingFaceEmbeddingServer( +def hf_embedding_server(): + _embedding_function = HuggingFaceEmbeddingServer( url="http://{host}:{port}/embed".format( host=os.getenv("TEI_HOST", "localhost"), port=os.getenv("TEI_PORT", "8081") ) ) - return chroma_embedding_function + return _embedding_function + # Set up HuggingFaceEndpoint model def setup_huggingface_endpoint(model_id): @@ -85,24 +86,27 @@ def setup_huggingface_endpoint(model_id): task="conversational", stop_sequences=[ "<|im_end|>", - "{your_token}".format(your_token=os.getenv("STOP_TOKEN", "<|end_of_text|>")), + "{your_token}".format( + your_token=os.getenv("STOP_TOKEN", "<|end_of_text|>") + ), ], ) - model = ChatHuggingFace(llm=llm, - model_id=model_id) + model = ChatHuggingFace(llm=llm, model_id=model_id) return model + def setup_portkey_integrated_model(): from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL from langchain_openai import ChatOpenAI + portkey_headers = createHeaders( api_key=os.getenv("PORTKEY_API_KEY"), custom_host=os.getenv("PORTKEY_CUSTOM_HOST"), - provider=os.getenv("PORTKEY_PROVIDER") + provider=os.getenv("PORTKEY_PROVIDER"), ) - + model = ChatOpenAI( api_key="None", base_url=PORTKEY_GATEWAY_URL, @@ -112,6 +116,7 @@ def setup_portkey_integrated_model(): return model + # Set up HuggingFaceEndpointEmbeddings embedder def setup_huggingface_embeddings(): embedder = HuggingFaceEndpointEmbeddings( @@ -122,10 +127,13 @@ def setup_huggingface_embeddings(): ) return embedder -def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmpl", template=None): - #prompt = hub.pull("hwchase17/react-chat") + +def load_prompt_and_system_ins( + template_file_path="templates/prompt_template.tmpl", template=None +): + # prompt = hub.pull("hwchase17/react-chat") prompt = PromptTemplate.from_file(template_file_path) - + # Set up prompt template template = """ Based on the retrieved context, respond with an accurate answer. Use the provided tools to support your response. @@ -140,156 +148,185 @@ def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmp return prompt, system_instructions + class RAG: - def __init__(self, llm, embeddings, collection_name, db_client): - self.llm = llm - self.embeddings = embeddings + def __init__(self, collection_name, db_client): + # self.llm = llm + # self.embedding_svc = embedding_svc self.collection_name = collection_name self.db_client = db_client - @retry(retry=retry_if_exception_type(ProtocolError), stop=stop_after_attempt(5), wait=wait_fixed(2)) + @retry( + retry=retry_if_exception_type(ProtocolError), + stop=stop_after_attempt(5), + wait=wait_fixed(2), + ) def load_documents(self, doc, num_docs=250): documents = [] - for data in datasets.load_dataset(doc, split=f"train[:{num_docs}]", num_proc=10).to_list(): + for data in datasets.load_dataset( + doc, split=f"train[:{num_docs}]", num_proc=10 + ).to_list(): documents.append( Document( page_content=data["text"], - metadata=dict(user=data["user"], - workspace=data["workspace"]), + metadata=dict(user=data["user"], workspace=data["workspace"]), ) ) print("Document loaded") return documents def chunk_doc(self, pages, chunk_size=512, chunk_overlap=30): - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=chunk_size, chunk_overlap=chunk_overlap + tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-large-en-v1.5") + text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( + tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap ) chunks = text_splitter.split_documents(pages) print("Document chunked") return chunks - def insert_embeddings(self, chunks, chroma_embedding_function, embedder, batch_size=32): + def insert_embeddings(self, chunks, chroma_embedding_function, batch_size=32): + print( + "Inserting embeddings into collection: {collection_name}".format( + collection_name=self.collection_name + ) + ) collection = self.db_client.get_or_create_collection( self.collection_name, embedding_function=chroma_embedding_function ) for i in range(0, len(chunks), batch_size): - batch = chunks[i:i + batch_size] + batch = chunks[i : i + batch_size] chunk_ids = [str(uuid.uuid1()) for _ in batch] metadatas = [chunk.metadata for chunk in batch] documents = [chunk.page_content for chunk in batch] - - collection.add( - ids=chunk_ids, - metadatas=metadatas, - documents=documents - ) - db = Chroma( - embedding_function=embedder, - collection_name=self.collection_name, - client=self.db_client, - ) + + collection.add(ids=chunk_ids, metadatas=metadatas, documents=documents) + # db = Chroma( + # embedding_function=embedder, + # collection_name=self.collection_name, + # client=self.db_client, + # ) print("Embeddings inserted\n") - return db + # return db - def query_docs(self, model, question, vector_store, prompt, chat_history): + def query_docs( + self, model, question, vector_store, prompt, chat_history, use_reranker=False + ): retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 4} + search_type="similarity", search_kwargs={"k": 10} ) + if use_reranker: + compressor = TEIRerank( + url="http://{host}:{port}".format( + host=os.getenv("RERANKER_HOST", "localhost"), + port=os.getenv("RERANKER_PORT", "8082"), + ), + top_n=4, + batch_size=10, + ) + retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) + pass_question = lambda input: input["question"] rag_chain = ( - RunnablePassthrough.assign( - context= pass_question | retriever | format_docs - ) + RunnablePassthrough.assign(context=pass_question | retriever | format_docs) | prompt | model | StrOutputParser() ) + answer = rag_chain.invoke({"question": question, "chat_history": chat_history}) return answer + def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) -def create_retriever(name, model, description, client, chroma_embedding_function, embedder): - rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client) - pages = rag.load_documents("spencer/software_slacks") - chunks = rag.chunk_doc(pages) - vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) - retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 10} - ) - info_retriever = create_retriever_tool(retriever, name, description) - return info_retriever -def create_reranker_retriever(name, model, description, client, chroma_embedding_function, embedder): - rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client) +def create_retriever( + name, description, client, chroma_embedding_function, embedding_svc, reranker=False +): + collection_name = "software-slacks" + rag = RAG(collection_name=collection_name, db_client=client) pages = rag.load_documents("spencer/software_slacks", num_docs=100) chunks = rag.chunk_doc(pages) - vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) - compressor = TEIRerank(url="http://{host}:{port}".format(host=os.getenv("RERANKER_HOST", "localhost"), - port=os.getenv("RERANKER_PORT", "8082")), - top_n=10, - batch_size=16) - retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 100} - ) - compression_retriever = ContextualCompressionRetriever( - base_compressor=compressor, base_retriever=retriever + rag.insert_embeddings(chunks, chroma_embedding_function) + vector_store = Chroma( + embedding_function=embedding_svc, + collection_name=collection_name, + client=client, ) - info_retriever = create_retriever_tool(compression_retriever, name, description) - return info_retriever + if reranker: + compressor = TEIRerank( + url="http://{host}:{port}".format( + host=os.getenv("RERANKER_HOST", "localhost"), + port=os.getenv("RERANKER_PORT", "8082"), + ), + top_n=10, + batch_size=16, + ) -def setup_tools(_model, _client, _chroma_embedding_function, _embedder): - stackexchange_wrapper = StackExchangeAPIWrapper(max_results=3) - stackexchange_tool = StackExchangeTool(api_wrapper=stackexchange_wrapper) - - web_search_tool = TavilySearchResults(max_results=10, - handle_tool_error=True) - - #retriever = create_retriever( - # name="Slack conversations retriever", - # model=_model, - # description="Retrieves conversations from Slack for context.", - # client=_client, - # chroma_embedding_function=_chroma_embedding_function, - # embedder=_embedder, - #) - - if os.getenv("USE_RERANKER", "False") == True: - retriever = create_reranker_retriever( - name="slack_conversations_retriever", - model=_model, - description="Useful for when you need to answer from Slack conversations.", - client=_client, - chroma_embedding_function=_chroma_embedding_function, - embedder=_embedder, + retriever = vector_store.as_retriever( + search_type="similarity", search_kwargs={"k": 100} + ) + compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever ) + info_retriever = create_retriever_tool(compression_retriever, name, description) else: - retriever = create_retriever( - name="slack_conversations_retriever", - model=_model, - description="Useful for when you need to answer from Slack conversations.", - client=_client, - chroma_embedding_function=_chroma_embedding_function, - embedder=_embedder, + retriever = vector_store.as_retriever( + search_type="similarity", search_kwargs={"k": 10} ) + info_retriever = create_retriever_tool(retriever, name, description) + + return info_retriever + + +def setup_tools(_model, _client, _chroma_embedding_function, _embedder): + tools = [] + if ( + os.getenv("STACK_OVERFLOW_API_KEY") + and os.getenv("STACK_OVERFLOW_API_KEY").strip() + ): + stackexchange_wrapper = StackExchangeAPIWrapper(max_results=3) + stackexchange_tool = StackExchangeTool(api_wrapper=stackexchange_wrapper) + tools.append(stackexchange_tool) + + if os.getenv("TAVILY_API_KEY") and os.getenv("TAVILY_API_KEY").strip(): + web_search_tool = TavilySearchResults(max_results=10, handle_tool_error=True) + tools.append(web_search_tool) + + use_reranker = os.getenv("USE_RERANKER", "False") == "True" + retriever = create_retriever( + "slack_conversations_retriever", + "Useful for when you need to answer from Slack conversations.", + _client, + _chroma_embedding_function, + _embedder, + reranker=use_reranker, + ) + tools.append(retriever) + return tools - return [web_search_tool, stackexchange_tool, retriever] @st.cache_resource def setup_agent(_model, _prompt, _client, _chroma_embedding_function, _embedder): tools = setup_tools(_model, _client, _chroma_embedding_function, _embedder) - agent = create_react_agent(llm=_model, prompt=_prompt, tools=tools, ) + agent = create_react_agent( + llm=_model, + prompt=_prompt, + tools=tools, + ) agent_executor = AgentExecutor( agent=agent, verbose=True, tools=tools, handle_parsing_errors=True ) return agent_executor + def main(): client = setup_chroma_client() - chroma_embedding_function = setup_chroma_embedding_function() + chroma_embedding_function = hf_embedding_server() prompt, system_instructions = load_prompt_and_system_ins() if os.getenv("ENABLE_PORTKEY", "False") == "True": model = setup_portkey_integrated_model() @@ -305,7 +342,7 @@ def main(): st.text("Made with ❤️ by InfraCloud Technologies") st.markdown( """ - InSightful is an AI assistant that helps you with your questions. + InSightful is an AI assistant that helps you with your questions. - It can browse past conversations with your colleagues/teammates and can search StackOverflow for technical questions. - With access to the web, InSightful can also conduct its own research for you.""" ) @@ -333,8 +370,9 @@ def main(): st.session_state["chat_history"] = chat_history + if __name__ == "__main__": - #authenticator = authenticate() - #if st.session_state['authentication_status']: + # authenticator = authenticate() + # if st.session_state['authentication_status']: # authenticator.logout() - main() + main() diff --git a/multi_tenant_rag.py b/multi_tenant_rag.py index 29ba011..fb4e6d1 100644 --- a/multi_tenant_rag.py +++ b/multi_tenant_rag.py @@ -1,70 +1,87 @@ -import streamlit as st -import streamlit_authenticator as stauth -from streamlit_authenticator.utilities import RegisterError, LoginError import os -from langchain_community.vectorstores.chroma import Chroma -from app import setup_chroma_client, setup_chroma_embedding_function, load_prompt_and_system_ins -from app import setup_huggingface_embeddings, setup_huggingface_endpoint -from app import RAG -from langchain import hub import tempfile -from langchain_community.document_loaders import PyPDFLoader - import yaml from yaml.loader import SafeLoader +import streamlit as st +import streamlit_authenticator as stauth +from streamlit_authenticator.utilities import RegisterError +from langchain_community.document_loaders import PyPDFLoader +from langchain_community.vectorstores.chroma import Chroma +from unstructured.cleaners.core import clean_extra_whitespace, group_broken_paragraphs + +from app import ( + setup_chroma_client, + hf_embedding_server, + load_prompt_and_system_ins, + setup_huggingface_embeddings, + setup_huggingface_endpoint, + RAG, +) def configure_authenticator(): - with open('.streamlit/config.yaml') as file: + with open(".streamlit/config.yaml") as file: config = yaml.load(file, Loader=SafeLoader) - + authenticator = stauth.Authenticate( - config['credentials'], - config['cookie']['name'], - config['cookie']['key'], - config['cookie']['expiry_days'], - config['pre-authorized'] + config["credentials"], + config["cookie"]["name"], + config["cookie"]["key"], + config["cookie"]["expiry_days"], + config["pre-authorized"], ) return authenticator + def authenticate(op): authenticator = configure_authenticator() if op == "login": name, authentication_status, username = authenticator.login() - st.session_state['authentication_status'] = authentication_status - st.session_state['username'] = username + st.session_state["authentication_status"] = authentication_status + st.session_state["username"] = username elif op == "register": try: - (email_of_registered_user, - username_of_registered_user, - name_of_registered_user) = authenticator.register_user(pre_authorization=False) + ( + email_of_registered_user, + username_of_registered_user, + name_of_registered_user, + ) = authenticator.register_user(pre_authorization=False) if email_of_registered_user: - st.success('User registered successfully') + st.success("User registered successfully") except RegisterError as e: st.error(e) return authenticator + class MultiTenantRAG(RAG): - def __init__(self, user_id, llm, embeddings, collection_name, db_client): + def __init__(self, user_id, collection_name, db_client): self.user_id = user_id - super().__init__(llm, embeddings, collection_name, db_client) - + super().__init__(collection_name, db_client) + def load_documents(self, doc): - with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.name)[1]) as tmp: + with tempfile.NamedTemporaryFile( + delete=False, suffix=os.path.splitext(doc.name)[1] + ) as tmp: tmp.write(doc.getvalue()) tmp_path = tmp.name loader = PyPDFLoader(tmp_path) documents = loader.load() - return documents + cleaned_pages = [] + for doc in documents: + doc.page_content = clean_extra_whitespace(doc.page_content) + doc.page_content = group_broken_paragraphs(doc.page_content) + cleaned_pages.append(doc) + return cleaned_pages + def main(): llm = setup_huggingface_endpoint(model_id="qwen/Qwen2-7B-Instruct") - embeddings = setup_huggingface_embeddings() + embedding_svc = setup_huggingface_embeddings() - chroma_embeddings = setup_chroma_embedding_function() + chroma_embeddings = hf_embedding_server() - user_id = st.session_state['username'] + user_id = st.session_state["username"] client = setup_chroma_client() # Set up prompt template @@ -73,9 +90,12 @@ def main(): Be concise and always provide accurate, specific, and relevant information. """ - - prompt, system_instructions = load_prompt_and_system_ins(template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", template=template) - + + prompt, system_instructions = load_prompt_and_system_ins( + template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", + template=template, + ) + chat_history = st.session_state.get( "chat_history", [{"role": "system", "content": system_instructions.content}] ) @@ -86,43 +106,67 @@ def main(): if user_id: - collection = client.get_or_create_collection(f"user-collection-{user_id}", - embedding_function=chroma_embeddings) + collection = client.get_or_create_collection( + f"user-collection-{user_id}", embedding_function=chroma_embeddings + ) uploaded_file = st.file_uploader("Upload a document", type=["pdf"]) - rag = MultiTenantRAG(user_id, llm, embeddings, collection.name, client) + rag = MultiTenantRAG(user_id, collection.name, client) # prompt = hub.pull("rlm/rag-prompt") + vectorstore = Chroma( + embedding_function=embedding_svc, + collection_name=collection.name, + client=client, + ) + if uploaded_file: document = rag.load_documents(uploaded_file) chunks = rag.chunk_doc(document) - vectorstore = rag.insert_embeddings(chunks=chunks, - chroma_embedding_function=chroma_embeddings, - embedder=embeddings, - batch_size=32) - else: - vectorstore = Chroma(embedding_function=embeddings, - collection_name=collection.name, - client=client) - + rag.insert_embeddings( + chunks=chunks, + chroma_embedding_function=chroma_embeddings, + # embedder=embedding_svc, + batch_size=32, + ) + if question := st.chat_input("Chat with your doc"): st.chat_message("user").markdown(question) with st.spinner(): - answer = rag.query_docs(model=llm, - question=question, - vector_store=vectorstore, - prompt=prompt, - chat_history=chat_history) - print("####\n#### Answer received by querying docs: " + answer + "\n####") + answer = rag.query_docs( + model=llm, + question=question, + vector_store=vectorstore, + prompt=prompt, + chat_history=chat_history, + use_reranker=False, + ) + # print( + # "####\n#### Answer received by querying docs: " + answer + "\n####" + # ) + + answer_with_reranker = rag.query_docs( + model=llm, + question=question, + vector_store=vectorstore, + prompt=prompt, + chat_history=chat_history, + use_reranker=True, + ) + st.chat_message("assistant").markdown(answer) + st.chat_message("assistant").markdown(answer_with_reranker) + chat_history.append({"role": "user", "content": question}) chat_history.append({"role": "assistant", "content": answer}) st.session_state["chat_history"] = chat_history + if __name__ == "__main__": + authenticator = authenticate("login") - if st.session_state['authentication_status']: + if st.session_state["authentication_status"]: authenticator.logout() - main() \ No newline at end of file + main() diff --git a/requirements.txt b/requirements.txt index 94ec8db..b83ae62 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ streamlit_authenticator==0.3.3 stackapi==0.3.1 langchainhub==0.1.21 pypdf==4.3.1 -jinja2>=3.1.0 \ No newline at end of file +jinja2>=3.1.0 +unstructured==0.15.7 \ No newline at end of file