From 31ea15d3291530b1730be26e2db19dc0f2fe52f9 Mon Sep 17 00:00:00 2001 From: Sanket Date: Thu, 22 Aug 2024 19:12:40 +0530 Subject: [PATCH 1/3] Add fixes with tokenizer Signed-off-by: Sanket --- .streamlit/config.yaml | 12 +++ app.py | 189 +++++++++++++++++++++++++++-------------- multi_tenant_rag.py | 139 ++++++++++++++++++++---------- requirements.txt | 3 +- 4 files changed, 233 insertions(+), 110 deletions(-) diff --git a/.streamlit/config.yaml b/.streamlit/config.yaml index 21feaef..77c801c 100644 --- a/.streamlit/config.yaml +++ b/.streamlit/config.yaml @@ -10,6 +10,18 @@ credentials: logged_in: false name: John Doe password: $2b$12$iWlVOac3uujRvTrXDi6wructXftKmo/GyQd6SMu5FmyX306kH.yFO + stest: + email: sankettest@gmail.com + failed_login_attempts: 0 + logged_in: false + name: Sanket Test + password: $2b$12$iWlVOac3uujRvTrXDi6wructXftKmo/GyQd6SMu5FmyX306kH.yFO + stest2: + email: sankettest@gmail.com + failed_login_attempts: 0 + logged_in: false + name: Sanket Test + password: $2b$12$iWlVOac3uujRvTrXDi6wructXftKmo/GyQd6SMu5FmyX306kH.yFO rbriggs: email: rbriggs@gmail.com failed_login_attempts: 0 diff --git a/app.py b/app.py index 507f69e..cabfb68 100644 --- a/app.py +++ b/app.py @@ -23,6 +23,7 @@ from langchain.schema import Document from langchain.retrievers import ContextualCompressionRetriever from tei_rerank import TEIRerank +from transformers import AutoTokenizer import streamlit as st import streamlit_authenticator as stauth @@ -35,21 +36,22 @@ st.set_page_config(layout="wide", page_title="InSightful") + def authenticate(): - with open('.streamlit/config.yaml') as file: + with open(".streamlit/config.yaml") as file: config = yaml.load(file, Loader=SafeLoader) authenticator = stauth.Authenticate( - config['credentials'], - config['cookie']['name'], - config['cookie']['key'], - config['cookie']['expiry_days'], - config['pre-authorized'] + config["credentials"], + config["cookie"]["name"], + config["cookie"]["key"], + config["cookie"]["expiry_days"], + config["pre-authorized"], ) name, authentication_status, username = authenticator.login() - st.session_state['authentication_status'] = authentication_status - st.session_state['username'] = username + st.session_state["authentication_status"] = authentication_status + st.session_state["username"] = username return authenticator @@ -60,20 +62,20 @@ def setup_chroma_client(): host=os.getenv("VECTORDB_HOST", "localhost"), port=os.getenv("VECTORDB_PORT", "8000"), ), - settings=Settings(allow_reset=True, - anonymized_telemetry=False) - + settings=Settings(allow_reset=True, anonymized_telemetry=False), ) return client + # Set up Chroma embedding function -def setup_chroma_embedding_function(): - chroma_embedding_function = HuggingFaceEmbeddingServer( +def hf_embedding_server(): + _embedding_function = HuggingFaceEmbeddingServer( url="http://{host}:{port}/embed".format( host=os.getenv("TEI_HOST", "localhost"), port=os.getenv("TEI_PORT", "8081") ) ) - return chroma_embedding_function + return _embedding_function + # Set up HuggingFaceEndpoint model def setup_huggingface_endpoint(model_id): @@ -85,24 +87,27 @@ def setup_huggingface_endpoint(model_id): task="conversational", stop_sequences=[ "<|im_end|>", - "{your_token}".format(your_token=os.getenv("STOP_TOKEN", "<|end_of_text|>")), + "{your_token}".format( + your_token=os.getenv("STOP_TOKEN", "<|end_of_text|>") + ), ], ) - model = ChatHuggingFace(llm=llm, - model_id=model_id) + model = ChatHuggingFace(llm=llm, model_id=model_id) return model + def setup_portkey_integrated_model(): from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL from langchain_openai import ChatOpenAI + portkey_headers = createHeaders( api_key=os.getenv("PORTKEY_API_KEY"), custom_host=os.getenv("PORTKEY_CUSTOM_HOST"), - provider=os.getenv("PORTKEY_PROVIDER") + provider=os.getenv("PORTKEY_PROVIDER"), ) - + model = ChatOpenAI( api_key="None", base_url=PORTKEY_GATEWAY_URL, @@ -112,6 +117,7 @@ def setup_portkey_integrated_model(): return model + # Set up HuggingFaceEndpointEmbeddings embedder def setup_huggingface_embeddings(): embedder = HuggingFaceEndpointEmbeddings( @@ -122,10 +128,13 @@ def setup_huggingface_embeddings(): ) return embedder -def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmpl", template=None): - #prompt = hub.pull("hwchase17/react-chat") + +def load_prompt_and_system_ins( + template_file_path="templates/prompt_template.tmpl", template=None +): + # prompt = hub.pull("hwchase17/react-chat") prompt = PromptTemplate.from_file(template_file_path) - + # Set up prompt template template = """ Based on the retrieved context, respond with an accurate answer. Use the provided tools to support your response. @@ -140,66 +149,101 @@ def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmp return prompt, system_instructions + class RAG: - def __init__(self, llm, embeddings, collection_name, db_client): - self.llm = llm - self.embeddings = embeddings + def __init__(self, collection_name, db_client): + # self.llm = llm + # self.embedding_svc = embedding_svc self.collection_name = collection_name self.db_client = db_client - @retry(retry=retry_if_exception_type(ProtocolError), stop=stop_after_attempt(5), wait=wait_fixed(2)) + @retry( + retry=retry_if_exception_type(ProtocolError), + stop=stop_after_attempt(5), + wait=wait_fixed(2), + ) def load_documents(self, doc, num_docs=250): documents = [] - for data in datasets.load_dataset(doc, split=f"train[:{num_docs}]", num_proc=10).to_list(): + for data in datasets.load_dataset( + doc, split=f"train[:{num_docs}]", num_proc=10 + ).to_list(): documents.append( Document( page_content=data["text"], - metadata=dict(user=data["user"], - workspace=data["workspace"]), + metadata=dict(user=data["user"], workspace=data["workspace"]), ) ) print("Document loaded") return documents def chunk_doc(self, pages, chunk_size=512, chunk_overlap=30): - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=chunk_size, chunk_overlap=chunk_overlap + tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-large-en-v1.5") + text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( + tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap ) chunks = text_splitter.split_documents(pages) print("Document chunked") return chunks - def insert_embeddings(self, chunks, chroma_embedding_function, embedder, batch_size=32): + def insert_embeddings(self, chunks, chroma_embedding_function, batch_size=32): + print( + "Inserting embeddings into collection: {collection_name}".format( + collection_name=self.collection_name + ) + ) collection = self.db_client.get_or_create_collection( self.collection_name, embedding_function=chroma_embedding_function ) for i in range(0, len(chunks), batch_size): - batch = chunks[i:i + batch_size] + batch = chunks[i : i + batch_size] chunk_ids = [str(uuid.uuid1()) for _ in batch] metadatas = [chunk.metadata for chunk in batch] documents = [chunk.page_content for chunk in batch] - - collection.add( - ids=chunk_ids, - metadatas=metadatas, - documents=documents - ) - db = Chroma( - embedding_function=embedder, - collection_name=self.collection_name, - client=self.db_client, - ) + + collection.add(ids=chunk_ids, metadatas=metadatas, documents=documents) + # db = Chroma( + # embedding_function=embedder, + # collection_name=self.collection_name, + # client=self.db_client, + # ) print("Embeddings inserted\n") - return db + # return db def query_docs(self, model, question, vector_store, prompt, chat_history): retriever = vector_store.as_retriever( search_type="similarity", search_kwargs={"k": 4} ) pass_question = lambda input: input["question"] + rag_chain = ( + RunnablePassthrough.assign(context=pass_question | retriever | format_docs) + | prompt + | model + | StrOutputParser() + ) + answer = rag_chain.invoke({"question": question, "chat_history": chat_history}) + return answer + + def query_docs_with_reranker( + self, model, question, vector_store, prompt, chat_history + ): + retriever = vector_store.as_retriever( + search_type="similarity", search_kwargs={"k": 20} + ) + compressor = TEIRerank( + url="http://{host}:{port}".format( + host=os.getenv("RERANKER_HOST", "localhost"), + port=os.getenv("RERANKER_PORT", "8082"), + ), + top_n=4, + batch_size=10, + ) + compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) + pass_question = lambda input: input["question"] rag_chain = ( RunnablePassthrough.assign( - context= pass_question | retriever | format_docs + context=pass_question | compression_retriever | format_docs ) | prompt | model @@ -208,11 +252,15 @@ def query_docs(self, model, question, vector_store, prompt, chat_history): answer = rag_chain.invoke({"question": question, "chat_history": chat_history}) return answer + def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) -def create_retriever(name, model, description, client, chroma_embedding_function, embedder): - rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client) + +def create_retriever( + name, model, description, client, chroma_embedding_function, embedder +): + rag = RAG(collection_name="Slack", db_client=client) pages = rag.load_documents("spencer/software_slacks") chunks = rag.chunk_doc(pages) vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) @@ -222,15 +270,22 @@ def create_retriever(name, model, description, client, chroma_embedding_function info_retriever = create_retriever_tool(retriever, name, description) return info_retriever -def create_reranker_retriever(name, model, description, client, chroma_embedding_function, embedder): - rag = RAG(llm=model, embeddings=embedder, collection_name="Slack", db_client=client) + +def create_reranker_retriever( + name, model, description, client, chroma_embedding_function, embedder +): + rag = RAG(collection_name="Slack", db_client=client) pages = rag.load_documents("spencer/software_slacks", num_docs=100) chunks = rag.chunk_doc(pages) vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) - compressor = TEIRerank(url="http://{host}:{port}".format(host=os.getenv("RERANKER_HOST", "localhost"), - port=os.getenv("RERANKER_PORT", "8082")), - top_n=10, - batch_size=16) + compressor = TEIRerank( + url="http://{host}:{port}".format( + host=os.getenv("RERANKER_HOST", "localhost"), + port=os.getenv("RERANKER_PORT", "8082"), + ), + top_n=10, + batch_size=16, + ) retriever = vector_store.as_retriever( search_type="similarity", search_kwargs={"k": 100} ) @@ -240,21 +295,21 @@ def create_reranker_retriever(name, model, description, client, chroma_embedding info_retriever = create_retriever_tool(compression_retriever, name, description) return info_retriever + def setup_tools(_model, _client, _chroma_embedding_function, _embedder): stackexchange_wrapper = StackExchangeAPIWrapper(max_results=3) stackexchange_tool = StackExchangeTool(api_wrapper=stackexchange_wrapper) - web_search_tool = TavilySearchResults(max_results=10, - handle_tool_error=True) + web_search_tool = TavilySearchResults(max_results=10, handle_tool_error=True) - #retriever = create_retriever( + # retriever = create_retriever( # name="Slack conversations retriever", # model=_model, # description="Retrieves conversations from Slack for context.", # client=_client, # chroma_embedding_function=_chroma_embedding_function, # embedder=_embedder, - #) + # ) if os.getenv("USE_RERANKER", "False") == True: retriever = create_reranker_retriever( @@ -275,21 +330,26 @@ def setup_tools(_model, _client, _chroma_embedding_function, _embedder): embedder=_embedder, ) - return [web_search_tool, stackexchange_tool, retriever] + @st.cache_resource def setup_agent(_model, _prompt, _client, _chroma_embedding_function, _embedder): tools = setup_tools(_model, _client, _chroma_embedding_function, _embedder) - agent = create_react_agent(llm=_model, prompt=_prompt, tools=tools, ) + agent = create_react_agent( + llm=_model, + prompt=_prompt, + tools=tools, + ) agent_executor = AgentExecutor( agent=agent, verbose=True, tools=tools, handle_parsing_errors=True ) return agent_executor + def main(): client = setup_chroma_client() - chroma_embedding_function = setup_chroma_embedding_function() + chroma_embedding_function = hf_embedding_server() prompt, system_instructions = load_prompt_and_system_ins() if os.getenv("ENABLE_PORTKEY", "False") == "True": model = setup_portkey_integrated_model() @@ -333,8 +393,9 @@ def main(): st.session_state["chat_history"] = chat_history + if __name__ == "__main__": - #authenticator = authenticate() - #if st.session_state['authentication_status']: + # authenticator = authenticate() + # if st.session_state['authentication_status']: # authenticator.logout() - main() + main() diff --git a/multi_tenant_rag.py b/multi_tenant_rag.py index 29ba011..a676154 100644 --- a/multi_tenant_rag.py +++ b/multi_tenant_rag.py @@ -1,70 +1,94 @@ +# from langchain.globals import set_verbose, set_debug + +# set_verbose(True) +# set_debug(True) + import streamlit as st import streamlit_authenticator as stauth from streamlit_authenticator.utilities import RegisterError, LoginError import os from langchain_community.vectorstores.chroma import Chroma -from app import setup_chroma_client, setup_chroma_embedding_function, load_prompt_and_system_ins +from app import ( + setup_chroma_client, + hf_embedding_server, + load_prompt_and_system_ins, +) from app import setup_huggingface_embeddings, setup_huggingface_endpoint from app import RAG from langchain import hub import tempfile from langchain_community.document_loaders import PyPDFLoader +from unstructured.cleaners.core import clean_extra_whitespace, group_broken_paragraphs +from langchain_core.documents import Document import yaml from yaml.loader import SafeLoader + def configure_authenticator(): - with open('.streamlit/config.yaml') as file: + with open(".streamlit/config.yaml") as file: config = yaml.load(file, Loader=SafeLoader) - + authenticator = stauth.Authenticate( - config['credentials'], - config['cookie']['name'], - config['cookie']['key'], - config['cookie']['expiry_days'], - config['pre-authorized'] + config["credentials"], + config["cookie"]["name"], + config["cookie"]["key"], + config["cookie"]["expiry_days"], + config["pre-authorized"], ) return authenticator + def authenticate(op): authenticator = configure_authenticator() if op == "login": name, authentication_status, username = authenticator.login() - st.session_state['authentication_status'] = authentication_status - st.session_state['username'] = username + st.session_state["authentication_status"] = authentication_status + st.session_state["username"] = username elif op == "register": try: - (email_of_registered_user, - username_of_registered_user, - name_of_registered_user) = authenticator.register_user(pre_authorization=False) + ( + email_of_registered_user, + username_of_registered_user, + name_of_registered_user, + ) = authenticator.register_user(pre_authorization=False) if email_of_registered_user: - st.success('User registered successfully') + st.success("User registered successfully") except RegisterError as e: st.error(e) return authenticator + class MultiTenantRAG(RAG): - def __init__(self, user_id, llm, embeddings, collection_name, db_client): + def __init__(self, user_id, collection_name, db_client): self.user_id = user_id - super().__init__(llm, embeddings, collection_name, db_client) - + super().__init__(collection_name, db_client) + def load_documents(self, doc): - with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.name)[1]) as tmp: + with tempfile.NamedTemporaryFile( + delete=False, suffix=os.path.splitext(doc.name)[1] + ) as tmp: tmp.write(doc.getvalue()) tmp_path = tmp.name loader = PyPDFLoader(tmp_path) documents = loader.load() - return documents + cleaned_pages = [] + for doc in documents: + doc.page_content = clean_extra_whitespace(doc.page_content) + doc.page_content = group_broken_paragraphs(doc.page_content) + cleaned_pages.append(doc) + return cleaned_pages + def main(): llm = setup_huggingface_endpoint(model_id="qwen/Qwen2-7B-Instruct") - embeddings = setup_huggingface_embeddings() + embedding_svc = setup_huggingface_embeddings() - chroma_embeddings = setup_chroma_embedding_function() + chroma_embeddings = hf_embedding_server() - user_id = st.session_state['username'] + user_id = st.session_state["username"] client = setup_chroma_client() # Set up prompt template @@ -73,9 +97,12 @@ def main(): Be concise and always provide accurate, specific, and relevant information. """ - - prompt, system_instructions = load_prompt_and_system_ins(template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", template=template) - + + prompt, system_instructions = load_prompt_and_system_ins( + template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", + template=template, + ) + chat_history = st.session_state.get( "chat_history", [{"role": "system", "content": system_instructions.content}] ) @@ -86,43 +113,65 @@ def main(): if user_id: - collection = client.get_or_create_collection(f"user-collection-{user_id}", - embedding_function=chroma_embeddings) + collection = client.get_or_create_collection( + f"user-collection-{user_id}", embedding_function=chroma_embeddings + ) uploaded_file = st.file_uploader("Upload a document", type=["pdf"]) - rag = MultiTenantRAG(user_id, llm, embeddings, collection.name, client) + rag = MultiTenantRAG(user_id, collection.name, client) # prompt = hub.pull("rlm/rag-prompt") + vectorstore = Chroma( + embedding_function=embedding_svc, + collection_name=collection.name, + client=client, + ) + if uploaded_file: document = rag.load_documents(uploaded_file) chunks = rag.chunk_doc(document) - vectorstore = rag.insert_embeddings(chunks=chunks, - chroma_embedding_function=chroma_embeddings, - embedder=embeddings, - batch_size=32) - else: - vectorstore = Chroma(embedding_function=embeddings, - collection_name=collection.name, - client=client) - + rag.insert_embeddings( + chunks=chunks, + chroma_embedding_function=chroma_embeddings, + # embedder=embedding_svc, + batch_size=32, + ) + if question := st.chat_input("Chat with your doc"): st.chat_message("user").markdown(question) with st.spinner(): - answer = rag.query_docs(model=llm, - question=question, - vector_store=vectorstore, - prompt=prompt, - chat_history=chat_history) - print("####\n#### Answer received by querying docs: " + answer + "\n####") + answer = rag.query_docs( + model=llm, + question=question, + vector_store=vectorstore, + prompt=prompt, + chat_history=chat_history, + ) + # print( + # "####\n#### Answer received by querying docs: " + answer + "\n####" + # ) + + answer_with_reranker = rag.query_docs_with_reranker( + model=llm, + question=question, + vector_store=vectorstore, + prompt=prompt, + chat_history=chat_history, + ) + st.chat_message("assistant").markdown(answer) + st.chat_message("assistant").markdown(answer_with_reranker) + chat_history.append({"role": "user", "content": question}) chat_history.append({"role": "assistant", "content": answer}) st.session_state["chat_history"] = chat_history + if __name__ == "__main__": + authenticator = authenticate("login") - if st.session_state['authentication_status']: + if st.session_state["authentication_status"]: authenticator.logout() - main() \ No newline at end of file + main() diff --git a/requirements.txt b/requirements.txt index 94ec8db..b83ae62 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ streamlit_authenticator==0.3.3 stackapi==0.3.1 langchainhub==0.1.21 pypdf==4.3.1 -jinja2>=3.1.0 \ No newline at end of file +jinja2>=3.1.0 +unstructured==0.15.7 \ No newline at end of file From 76cedfa068db4f1218998c5e9875301baf3234b8 Mon Sep 17 00:00:00 2001 From: Sanket Date: Thu, 22 Aug 2024 19:50:55 +0530 Subject: [PATCH 2/3] Optimize code bit Signed-off-by: Sanket --- app.py | 153 +++++++++++++++++--------------------------- multi_tenant_rag.py | 33 ++++------ 2 files changed, 72 insertions(+), 114 deletions(-) diff --git a/app.py b/app.py index cabfb68..c00ec23 100644 --- a/app.py +++ b/app.py @@ -15,12 +15,10 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.documents import Document from langchain_core.prompts import PromptTemplate -from langchain_chroma import Chroma import chromadb from chromadb.config import Settings from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer -from langchain.schema import Document from langchain.retrievers import ContextualCompressionRetriever from tei_rerank import TEIRerank from transformers import AutoTokenizer @@ -177,15 +175,20 @@ def load_documents(self, doc, num_docs=250): return documents def chunk_doc(self, pages, chunk_size=512, chunk_overlap=30): - tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-large-en-v1.5") + tokenizer = AutoTokenizer.from_pretrained( + "BAAI/bge-large-en-v1.5" + ) text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( - tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap + tokenizer, + chunk_size=chunk_size, chunk_overlap=chunk_overlap ) chunks = text_splitter.split_documents(pages) print("Document chunked") return chunks - def insert_embeddings(self, chunks, chroma_embedding_function, batch_size=32): + def insert_embeddings( + self, chunks, chroma_embedding_function, batch_size=32 + ): print( "Inserting embeddings into collection: {collection_name}".format( collection_name=self.collection_name @@ -209,90 +212,68 @@ def insert_embeddings(self, chunks, chroma_embedding_function, batch_size=32): print("Embeddings inserted\n") # return db - def query_docs(self, model, question, vector_store, prompt, chat_history): + def query_docs(self, model, question, vector_store, prompt, chat_history, use_reranker=False): retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 4} + search_type="similarity", search_kwargs={"k": 10} ) - pass_question = lambda input: input["question"] - rag_chain = ( - RunnablePassthrough.assign(context=pass_question | retriever | format_docs) - | prompt - | model - | StrOutputParser() - ) - answer = rag_chain.invoke({"question": question, "chat_history": chat_history}) - return answer + if use_reranker: + compressor = TEIRerank( + url="http://{host}:{port}".format( + host=os.getenv("RERANKER_HOST", "localhost"), + port=os.getenv("RERANKER_PORT", "8082"), + ), + top_n=4, + batch_size=10, + ) + retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) - def query_docs_with_reranker( - self, model, question, vector_store, prompt, chat_history - ): - retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 20} - ) - compressor = TEIRerank( - url="http://{host}:{port}".format( - host=os.getenv("RERANKER_HOST", "localhost"), - port=os.getenv("RERANKER_PORT", "8082"), - ), - top_n=4, - batch_size=10, - ) - compression_retriever = ContextualCompressionRetriever( - base_compressor=compressor, base_retriever=retriever - ) pass_question = lambda input: input["question"] rag_chain = ( - RunnablePassthrough.assign( - context=pass_question | compression_retriever | format_docs - ) + RunnablePassthrough.assign(context=pass_question | retriever | format_docs) | prompt | model | StrOutputParser() ) + answer = rag_chain.invoke({"question": question, "chat_history": chat_history}) return answer - def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def create_retriever( - name, model, description, client, chroma_embedding_function, embedder + name, model, description, client, chroma_embedding_function, embedder, reranker=False ): rag = RAG(collection_name="Slack", db_client=client) - pages = rag.load_documents("spencer/software_slacks") + pages = rag.load_documents("spencer/software_slacks", num_docs=100) chunks = rag.chunk_doc(pages) vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) - retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 10} - ) - info_retriever = create_retriever_tool(retriever, name, description) - return info_retriever + if reranker: + compressor = TEIRerank( + url="http://{host}:{port}".format( + host=os.getenv("RERANKER_HOST", "localhost"), + port=os.getenv("RERANKER_PORT", "8082"), + ), + top_n=10, + batch_size=16, + ) + retriever = vector_store.as_retriever( + search_type="similarity", search_kwargs={"k": 100} + ) + compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=retriever + ) + info_retriever = create_retriever_tool(compression_retriever, name, description) + else: + retriever = vector_store.as_retriever( + search_type="similarity", search_kwargs={"k": 10} + ) + info_retriever = create_retriever_tool(retriever, name, description) -def create_reranker_retriever( - name, model, description, client, chroma_embedding_function, embedder -): - rag = RAG(collection_name="Slack", db_client=client) - pages = rag.load_documents("spencer/software_slacks", num_docs=100) - chunks = rag.chunk_doc(pages) - vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) - compressor = TEIRerank( - url="http://{host}:{port}".format( - host=os.getenv("RERANKER_HOST", "localhost"), - port=os.getenv("RERANKER_PORT", "8082"), - ), - top_n=10, - batch_size=16, - ) - retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 100} - ) - compression_retriever = ContextualCompressionRetriever( - base_compressor=compressor, base_retriever=retriever - ) - info_retriever = create_retriever_tool(compression_retriever, name, description) return info_retriever @@ -302,34 +283,16 @@ def setup_tools(_model, _client, _chroma_embedding_function, _embedder): web_search_tool = TavilySearchResults(max_results=10, handle_tool_error=True) - # retriever = create_retriever( - # name="Slack conversations retriever", - # model=_model, - # description="Retrieves conversations from Slack for context.", - # client=_client, - # chroma_embedding_function=_chroma_embedding_function, - # embedder=_embedder, - # ) - - if os.getenv("USE_RERANKER", "False") == True: - retriever = create_reranker_retriever( - name="slack_conversations_retriever", - model=_model, - description="Useful for when you need to answer from Slack conversations.", - client=_client, - chroma_embedding_function=_chroma_embedding_function, - embedder=_embedder, - ) - else: - retriever = create_retriever( - name="slack_conversations_retriever", - model=_model, - description="Useful for when you need to answer from Slack conversations.", - client=_client, - chroma_embedding_function=_chroma_embedding_function, - embedder=_embedder, - ) - + use_reranker = os.getenv("USE_RERANKER", "False") == "True" + retriever = create_retriever( + name="slack_conversations_retriever", + model=_model, + description="Useful for when you need to answer from Slack conversations.", + client=_client, + chroma_embedding_function=_chroma_embedding_function, + embedder=_embedder, + reranker=use_reranker, + ) return [web_search_tool, stackexchange_tool, retriever] @@ -365,7 +328,7 @@ def main(): st.text("Made with ❤️ by InfraCloud Technologies") st.markdown( """ - InSightful is an AI assistant that helps you with your questions. + InSightful is an AI assistant that helps you with your questions. - It can browse past conversations with your colleagues/teammates and can search StackOverflow for technical questions. - With access to the web, InSightful can also conduct its own research for you.""" ) diff --git a/multi_tenant_rag.py b/multi_tenant_rag.py index a676154..fb4e6d1 100644 --- a/multi_tenant_rag.py +++ b/multi_tenant_rag.py @@ -1,29 +1,22 @@ -# from langchain.globals import set_verbose, set_debug - -# set_verbose(True) -# set_debug(True) - +import os +import tempfile +import yaml +from yaml.loader import SafeLoader import streamlit as st import streamlit_authenticator as stauth -from streamlit_authenticator.utilities import RegisterError, LoginError -import os +from streamlit_authenticator.utilities import RegisterError +from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores.chroma import Chroma +from unstructured.cleaners.core import clean_extra_whitespace, group_broken_paragraphs + from app import ( setup_chroma_client, hf_embedding_server, load_prompt_and_system_ins, + setup_huggingface_embeddings, + setup_huggingface_endpoint, + RAG, ) -from app import setup_huggingface_embeddings, setup_huggingface_endpoint -from app import RAG -from langchain import hub -import tempfile -from langchain_community.document_loaders import PyPDFLoader -from unstructured.cleaners.core import clean_extra_whitespace, group_broken_paragraphs -from langchain_core.documents import Document - -import yaml -from yaml.loader import SafeLoader - def configure_authenticator(): with open(".streamlit/config.yaml") as file: @@ -148,17 +141,19 @@ def main(): vector_store=vectorstore, prompt=prompt, chat_history=chat_history, + use_reranker=False, ) # print( # "####\n#### Answer received by querying docs: " + answer + "\n####" # ) - answer_with_reranker = rag.query_docs_with_reranker( + answer_with_reranker = rag.query_docs( model=llm, question=question, vector_store=vectorstore, prompt=prompt, chat_history=chat_history, + use_reranker=True, ) st.chat_message("assistant").markdown(answer) From 054edbc474c8666ce47bbecd4e64fda59a000eec Mon Sep 17 00:00:00 2001 From: Sanket Date: Thu, 22 Aug 2024 22:22:49 +0530 Subject: [PATCH 3/3] Fix app code Signed-off-by: Sanket --- app.py | 64 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/app.py b/app.py index c00ec23..bf836b5 100644 --- a/app.py +++ b/app.py @@ -15,6 +15,7 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.documents import Document from langchain_core.prompts import PromptTemplate +from langchain_community.vectorstores.chroma import Chroma import chromadb from chromadb.config import Settings from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer @@ -175,20 +176,15 @@ def load_documents(self, doc, num_docs=250): return documents def chunk_doc(self, pages, chunk_size=512, chunk_overlap=30): - tokenizer = AutoTokenizer.from_pretrained( - "BAAI/bge-large-en-v1.5" - ) + tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-large-en-v1.5") text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( - tokenizer, - chunk_size=chunk_size, chunk_overlap=chunk_overlap + tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap ) chunks = text_splitter.split_documents(pages) print("Document chunked") return chunks - def insert_embeddings( - self, chunks, chroma_embedding_function, batch_size=32 - ): + def insert_embeddings(self, chunks, chroma_embedding_function, batch_size=32): print( "Inserting embeddings into collection: {collection_name}".format( collection_name=self.collection_name @@ -212,9 +208,11 @@ def insert_embeddings( print("Embeddings inserted\n") # return db - def query_docs(self, model, question, vector_store, prompt, chat_history, use_reranker=False): + def query_docs( + self, model, question, vector_store, prompt, chat_history, use_reranker=False + ): retriever = vector_store.as_retriever( - search_type="similarity", search_kwargs={"k": 10} + search_type="similarity", search_kwargs={"k": 10} ) if use_reranker: compressor = TEIRerank( @@ -236,22 +234,28 @@ def query_docs(self, model, question, vector_store, prompt, chat_history, use_re | model | StrOutputParser() ) - + answer = rag_chain.invoke({"question": question, "chat_history": chat_history}) return answer + def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) def create_retriever( - name, model, description, client, chroma_embedding_function, embedder, reranker=False + name, description, client, chroma_embedding_function, embedding_svc, reranker=False ): - rag = RAG(collection_name="Slack", db_client=client) + collection_name = "software-slacks" + rag = RAG(collection_name=collection_name, db_client=client) pages = rag.load_documents("spencer/software_slacks", num_docs=100) chunks = rag.chunk_doc(pages) - vector_store = rag.insert_embeddings(chunks, chroma_embedding_function, embedder) - + rag.insert_embeddings(chunks, chroma_embedding_function) + vector_store = Chroma( + embedding_function=embedding_svc, + collection_name=collection_name, + client=client, + ) if reranker: compressor = TEIRerank( url="http://{host}:{port}".format( @@ -261,6 +265,7 @@ def create_retriever( top_n=10, batch_size=16, ) + retriever = vector_store.as_retriever( search_type="similarity", search_kwargs={"k": 100} ) @@ -278,22 +283,31 @@ def create_retriever( def setup_tools(_model, _client, _chroma_embedding_function, _embedder): - stackexchange_wrapper = StackExchangeAPIWrapper(max_results=3) - stackexchange_tool = StackExchangeTool(api_wrapper=stackexchange_wrapper) + tools = [] + if ( + os.getenv("STACK_OVERFLOW_API_KEY") + and os.getenv("STACK_OVERFLOW_API_KEY").strip() + ): + stackexchange_wrapper = StackExchangeAPIWrapper(max_results=3) + stackexchange_tool = StackExchangeTool(api_wrapper=stackexchange_wrapper) + tools.append(stackexchange_tool) - web_search_tool = TavilySearchResults(max_results=10, handle_tool_error=True) + if os.getenv("TAVILY_API_KEY") and os.getenv("TAVILY_API_KEY").strip(): + web_search_tool = TavilySearchResults(max_results=10, handle_tool_error=True) + tools.append(web_search_tool) use_reranker = os.getenv("USE_RERANKER", "False") == "True" retriever = create_retriever( - name="slack_conversations_retriever", - model=_model, - description="Useful for when you need to answer from Slack conversations.", - client=_client, - chroma_embedding_function=_chroma_embedding_function, - embedder=_embedder, + "slack_conversations_retriever", + "Useful for when you need to answer from Slack conversations.", + _client, + _chroma_embedding_function, + _embedder, reranker=use_reranker, ) - return [web_search_tool, stackexchange_tool, retriever] + tools.append(retriever) + + return tools @st.cache_resource