Merge pull request #111 from arjbingly/sphinx

Update sphinx docs
arjbingly · Apr 30, 2024 · 24a7c06 · 24a7c06
2 parents e1464e7 + 0ab5695
commit 24a7c06
Show file tree

Hide file tree

Showing 90 changed files with 1,645 additions and 718 deletions.
diff --git a/cookbook/Basic-RAG/BasicRAG_CustomPrompt.py b/cookbook/Basic-RAG/BasicRAG_CustomPrompt.py
@@ -1,9 +1,19 @@
 """Custom Prompts
 ====================
 This cookbook demonstrates how to use custom prompts with Basic RAG.
+
+
+`Note that this cookbook assumes that you already have the` ``Llama-2-13b-chat`` `LLM ready,`
+`for more details on how to quantize and run an LLM locally,`
+`refer to the LLM section under Getting Started.`
+
+`Note that this cookbook also assumes that you have already ingested documents into a DeepLake collection called 'grag'`
+`for more details on how to ingest documents refer to the cookbook called` ``Document Ingestion``.
 """
 
+from grag.components.multivec_retriever import Retriever
 from grag.components.prompt import Prompt
+from grag.components.vectordb.deeplake_client import DeepLakeClient
 from grag.rag.basic_rag import BasicRAG
 
 custom_prompt = Prompt(
@@ -14,4 +24,14 @@
     answer: 
     """,
 )
-rag = BasicRAG(doc_chain="stuff", custom_prompt=custom_prompt)
+
+client = DeepLakeClient(collection_name="grag")
+retriever = Retriever(vectordb=client)
+rag = BasicRAG(
+    model_name="Llama-2-13b-chat", custom_prompt=custom_prompt, retriever=retriever
+)
+
+if __name__ == "__main__":
+    while True:
+        query = input("Query:")
+        rag(query)
diff --git a/cookbook/Basic-RAG/BasicRAG_FewShotPrompt.py b/cookbook/Basic-RAG/BasicRAG_FewShotPrompt.py
@@ -1,9 +1,18 @@
 """Custom Few-Shot Prompts
 ============================
 This cookbook demonstrates how to use custom few-shot prompts with Basic RAG.
+
+`Note that this cookbook assumes that you already have the` ``Llama-2-13b-chat`` `LLM ready, `
+`for more details on how to quantize and run an LLM locally, `
+`refer to the LLM section under Getting Started.`
+
+`Note that this cookbook also assumes that you have already ingested documents into a DeepLake collection called 'grag'`
+`for more details on how to ingest documents refer to the cookbook called` ``Document Ingestion``.
 """
 
+from grag.components.multivec_retriever import Retriever
 from grag.components.prompt import FewShotPrompt
+from grag.components.vectordb.deeplake_client import DeepLakeClient
 from grag.rag.basic_rag import BasicRAG
 
 custom_few_shot_prompt = FewShotPrompt(
@@ -30,4 +39,16 @@
         },
     ],
 )
-rag = BasicRAG(doc_chain="stuff", custom_prompt=custom_few_shot_prompt)
+
+client = DeepLakeClient(collection_name="grag")
+retriever = Retriever(vectordb=client)
+rag = BasicRAG(
+    model_name="Llama-2-13b-chat",
+    custom_prompt=custom_few_shot_prompt,
+    retriever=retriever,
+)
+
+if __name__ == "__main__":
+    while True:
+        query = input("Query:")
+        rag(query)
diff --git a/cookbook/Basic-RAG/BasicRAG_ingest.py b/cookbook/Basic-RAG/BasicRAG_ingest.py
@@ -1,6 +1,6 @@
 """Document Ingestion
 =======================
-This cookbook demonstrates how to ingest documents into a vector database.
+This cookbook demonstrates how to ingest pdf documents into a vector database.
 """
 
 import asyncio
@@ -9,22 +9,21 @@
 from grag.components.multivec_retriever import Retriever
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 
-client = DeepLakeClient(collection_name="your_collection_name")
+client = DeepLakeClient(collection_name="grag")
 
 ## Alternatively to use Chroma
 # from grag.components.vectordb.chroma_client import ChromaClient
-# client = ChromaClient(collection_name="ci_test")
+# client = ChromaClient(collection_name="grag")
 
-SYNC = True  # Run synchronously (slow)
-ASYNC = True  # Run asynchronously 
+ASYNC = True
 
-client = DeepLakeClient(collection_name="ci_test")
-# client = ChromaClient(collection_name="ci_test")
 retriever = Retriever(vectordb=client)
 
-dir_path = Path(__file__).parents[2] / "data/test/pdfs/new_papers"
+dir_path = (
+    Path(__file__).parents[2] / "data/pdf"
+)  # path to the folder containing the pdfs
 
-if SYNC:
-    retriever.ingest(dir_path)
-elif ASYNC:
+if ASYNC:
     asyncio.run(retriever.aingest(dir_path))
+else:
+    retriever.ingest(dir_path)
diff --git a/cookbook/Basic-RAG/BasicRAG_refine.py b/cookbook/Basic-RAG/BasicRAG_refine.py
@@ -11,15 +11,19 @@
 
   Illustration of refine chain (Source: LangChain)
 
+
+`Note that this cookbook assumes that you already have` ``Llama-2-13b-chat`` `LLM ready,`
+`for more details on how to quantize and run an LLM locally,`
+`refer to the LLM section under Getting Started.`
 """
 
 from grag.components.multivec_retriever import Retriever
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 from grag.rag.basic_rag import BasicRAG
 
-client = DeepLakeClient(collection_name="test")
+client = DeepLakeClient(collection_name="grag")
 retriever = Retriever(vectordb=client)
-rag = BasicRAG(doc_chain="refine")
+rag = BasicRAG(model_name="Llama-2-13b-chat", doc_chain="refine", retriever=retriever)
 
 if __name__ == "__main__":
     while True:

diff --git a/cookbook/Basic-RAG/BasicRAG_stuff.py b/cookbook/Basic-RAG/BasicRAG_stuff.py
@@ -11,16 +11,22 @@
 
   Illustration of stuff chain (Source: LangChain)
 
+
+`Note that this cookbook assumes that you already have the` ``Llama-2-13b-chat`` `LLM ready,`
+`for more details on how to quantize and run an LLM locally,`
+`refer to the LLM section under Getting Started.`
 """
 
 from grag.components.multivec_retriever import Retriever
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 from grag.rag.basic_rag import BasicRAG
 
-client = DeepLakeClient(collection_name="test")
+client = DeepLakeClient(collection_name="grag")
 retriever = Retriever(vectordb=client)
 
-rag = BasicRAG(doc_chain="stuff", retriever=retriever)
+rag = BasicRAG(model_name="Llama-2-13b-chat", retriever=retriever)
+# Note that doc_chain='stuff' is the default hence not passed to the class explicitly.
+
 
 if __name__ == "__main__":
     while True:

diff --git a/cookbook/RAG-GUI/README.rst b/cookbook/RAG-GUI/README.rst
@@ -0,0 +1,4 @@
+RAG-GUI Cookbooks
+=================
+
+Below is an example to get you started with a GUI for our RAG implementation.
diff --git a/cookbook/RAG-GUI/app.py b/cookbook/RAG-GUI/app.py
@@ -1,4 +1,10 @@
-"""A cookbook demonstrating how to run RAG app on streamlit."""
+"""
+RAG-GUI
+=======
+
+A cookbook demonstrating how to run a RAG app on streamlit.
+
+"""
 
 import os
 import sys
@@ -12,11 +18,13 @@
 
 sys.path.insert(1, str(Path(os.getcwd()).parents[1]))
 
-st.set_page_config(page_title="GRAG",
-                   menu_items={
-                       "Get Help": "https://github.com/arjbingly/Capstone_5",
-                       "About": "This is a simple GUI for GRAG"
-                   })
+st.set_page_config(
+    page_title="GRAG",
+    menu_items={
+        "Get Help": "https://github.com/arjbingly/Capstone_5",
+        "About": "This is a simple GUI for GRAG",
+    },
+)
 
 
 def spinner(text):
@@ -87,51 +95,64 @@ def __init__(self, app, conf):
     def render_sidebar(self):
         """Renders the sidebar in the application interface with model selection and parameters."""
         with st.sidebar:
-            st.title('GRAG')
-            st.subheader('Models and parameters')
-            st.sidebar.selectbox('Choose a model',
-                                 ['Llama-2-13b-chat', 'Llama-2-7b-chat',
-                                  'Mixtral-8x7B-Instruct-v0.1', 'gemma-7b-it'],
-                                 key='selected_model')
-            st.sidebar.slider('Temperature',
-                              min_value=0.1,
-                              max_value=1.0,
-                              value=0.1,
-                              step=0.1,
-                              key='temperature')
-            st.sidebar.slider('Top-k',
-                              min_value=1,
-                              max_value=5,
-                              value=3,
-                              step=1,
-                              key='top_k')
-            st.button('Load Model', on_click=self.load_rag)
-            st.checkbox('Show sources', key='show_sources')
-
-    @spinner(text='Loading model...')
+            st.title("GRAG")
+            st.subheader("Models and parameters")
+            st.sidebar.selectbox(
+                "Choose a model",
+                [
+                    "Llama-2-13b-chat",
+                    "Llama-2-7b-chat",
+                    "Mixtral-8x7B-Instruct-v0.1",
+                    "gemma-7b-it",
+                ],
+                key="selected_model",
+            )
+            st.sidebar.slider(
+                "Temperature",
+                min_value=0.1,
+                max_value=1.0,
+                value=0.1,
+                step=0.1,
+                key="temperature",
+            )
+            st.sidebar.slider(
+                "Top-k", min_value=1, max_value=5, value=3, step=1, key="top_k"
+            )
+            st.button("Load Model", on_click=self.load_rag)
+            st.checkbox("Show sources", key="show_sources")
+
+    @spinner(text="Loading model...")
     def load_rag(self):
         """Loads the specified RAG model based on the user's selection and settings in the sidebar."""
-        if 'rag' in st.session_state:
-            del st.session_state['rag']
+        if "rag" in st.session_state:
+            del st.session_state["rag"]
 
-        llm_kwargs = {"temperature": st.session_state['temperature'], }
-        if st.session_state['selected_model'] == "Mixtral-8x7B-Instruct-v0.1":
-            llm_kwargs['n_gpu_layers'] = 16
-            llm_kwargs['quantization'] = 'Q4_K_M'
-        elif st.session_state['selected_model'] == "gemma-7b-it":
-            llm_kwargs['n_gpu_layers'] = 18
-            llm_kwargs['quantization'] = 'f16'
+        llm_kwargs = {
+            "temperature": st.session_state["temperature"],
+        }
+        if st.session_state["selected_model"] == "Mixtral-8x7B-Instruct-v0.1":
+            llm_kwargs["n_gpu_layers"] = 16
+            llm_kwargs["quantization"] = "Q4_K_M"
+        elif st.session_state["selected_model"] == "gemma-7b-it":
+            llm_kwargs["n_gpu_layers"] = 18
+            llm_kwargs["quantization"] = "f16"
 
         retriever_kwargs = {
-            "client_kwargs": {"read_only": True, },
-            "top_k": st.session_state['top_k']
+            "client_kwargs": {
+                "read_only": True,
+            },
+            "top_k": st.session_state["top_k"],
         }
         client = DeepLakeClient(collection_name="usc", read_only=True)
         retriever = Retriever(vectordb=client)
 
-        st.session_state['rag'] = BasicRAG(model_name=st.session_state['selected_model'], stream=True,
-                                           llm_kwargs=llm_kwargs, retriever=retriever,
-                                           retriever_kwargs=retriever_kwargs)
+        st.session_state["rag"] = BasicRAG(
+            model_name=st.session_state["selected_model"],
+            stream=True,
+            llm_kwargs=llm_kwargs,
+            retriever=retriever,
+            retriever_kwargs=retriever_kwargs,
+        )
         st.success(
             f"""Model Loaded !!!
     
@@ -147,7 +168,7 @@ def clear_cache(self):
     def render_main(self):
         """Renders the main chat interface for user interaction with the loaded RAG model."""
         st.title(":us: US Constitution Expert! :mortar_board:")
-        if 'rag' not in st.session_state:
+        if "rag" not in st.session_state:
             st.warning("You have not loaded any model")
         else:
             user_input = st.chat_input("Ask me anything about the US Constitution.")
@@ -156,14 +177,16 @@ def render_main(self):
                 with st.chat_message("user"):
                     st.write(user_input)
                 with st.chat_message("assistant"):
-                    _ = st.write_stream(
-                        st.session_state['rag'](user_input)[0]
-                    )
-                    if st.session_state['show_sources']:
-                        retrieved_docs = st.session_state['rag'].retriever.get_chunk(user_input)
+                    _ = st.write_stream(st.session_state["rag"](user_input)[0])
+                    if st.session_state["show_sources"]:
+                        retrieved_docs = st.session_state["rag"].retriever.get_chunk(
+                            user_input
+                        )
                         for index, doc in enumerate(retrieved_docs):
                             with st.expander(f"Source {index + 1}"):
-                                st.markdown(f"**{index + 1}. {doc.metadata['source']}**")
+                                st.markdown(
+                                    f"**{index + 1}. {doc.metadata['source']}**"
+                                )
                                 # if st.session_state['show_content']:
                                 st.text(f"**{doc.page_content}**")
 

diff --git a/cookbook/Retriever-GUI/README.rst b/cookbook/Retriever-GUI/README.rst
@@ -0,0 +1,4 @@
+Retriever-GUI Cookbooks
+-------------------------
+
+Below is an example, demonstrating a simple GUI of the Retriever using Streamlit.
diff --git a/cookbook/Retriver-GUI/retriever_app.py → cookbook/Retriever-GUI/retriever_app.py b/cookbook/Retriver-GUI/retriever_app.py → cookbook/Retriever-GUI/retriever_app.py
diff --git a/cookbook/Retriver-GUI/README.rst b/cookbook/Retriver-GUI/README.rst