alan-turing-institute · rwood-97 · Sep 18, 2023 · Sep 14, 2023 · Sep 15, 2023 · Sep 15, 2023
diff --git a/slack_bot/run.py b/slack_bot/run.py
@@ -21,7 +21,11 @@ async def main():
     # Parse command line arguments
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--model", "-m", help="Select which model to use", default=None, choices=MODELS
+        "--model",
+        "-m",
+        help="Select which model to use",
+        default=os.environ.get("REGINALD_MODEL") or "hello",
+        choices=MODELS,
     )
     parser.add_argument(
         "--model-name",
@@ -45,17 +49,18 @@ async def main():
             "(ignored if not using llama-index-llama-cpp or llama-index-hf). "
             "Default is 'chat'."
         ),
-        default=None,
+        default=os.environ.get("LLAMA_INDEX_MODE") or "chat",
         choices=["chat", "query"],
     )
     parser.add_argument(
-        "--path",
+        "--is-path",
         "-p",
         help=(
             "Whether or not the model_name passed is a path to the model "
             "(ignored if not using llama-index-llama-cpp)"
         ),
-        action="store_true",
+        action=argparse.BooleanOptionalAction,
+        default=None,
     )
     parser.add_argument(
         "--max-input-size",
@@ -65,7 +70,7 @@ async def main():
             "Select maximum input size for LlamaCPP or HuggingFace model "
             "(ignored if not using llama-index-llama-cpp or llama-index-hf)"
         ),
-        default=4096,
+        default=os.environ.get("LLAMA_INDEX_MAX_INPUT_SIZE") or 4096,
     )
     parser.add_argument(
         "--n-gpu-layers",
@@ -75,7 +80,7 @@ async def main():
             "Select number of GPU layers for LlamaCPP model "
             "(ignored if not using llama-index-llama-cpp)"
         ),
-        default=0,
+        default=os.environ.get("LLAMA_INDEX_N_GPU_LAYERS") or 0,
     )
     parser.add_argument(
         "--device",
@@ -85,20 +90,22 @@ async def main():
             "Select device for HuggingFace model "
             "(ignored if not using llama-index-hf model)"
         ),
-        default="auto",
+        default=os.environ.get("LLAMA_INDEX_DEVICE") or "auto",
     )
     parser.add_argument(
         "--force-new-index",
         "-f",
         help="Recreate the index vector store or not",
-        action="store_true",
+        action=argparse.BooleanOptionalAction,
+        default=None,
     )
     parser.add_argument(
         "--data-dir",
         "-d",
         type=pathlib.Path,
         help="Location for data",
-        default=None,
+        default=os.environ.get("LLAMA_INDEX_DATA_DIR")
+        or (pathlib.Path(__file__).parent.parent / "data").resolve(),
     )
     parser.add_argument(
         "--which-index",
@@ -111,7 +118,7 @@ async def main():
             "files in the data directory, 'handbook' will "
             "only use 'handbook.csv' file."
         ),
-        default=None,
+        default=os.environ.get("LLAMA_INDEX_WHICH_INDEX") or "all_data",
         choices=["all_data", "public", "handbook"],
     )
 
@@ -124,83 +131,79 @@ async def main():
         level=logging.INFO,
     )
 
-    # Set model name
-    model_name = os.environ.get("REGINALD_MODEL")
-    if args.model:
-        model_name = args.model
-    if not model_name:
-        model_name = "hello"
-
-    # Set force new index
+    # Set force new index (by default, don't)
     force_new_index = False
-    if os.environ.get("LLAMA_FORCE_NEW_INDEX"):
-        force_new_index = os.environ.get("LLAMA_FORCE_NEW_INDEX").lower() == "true"
-    if args.force_new_index:
-        force_new_index = True
-
-    # Set data directory
-    data_dir = os.environ.get("LLAMA_DATA_DIR")
-    if args.data_dir:
-        data_dir = args.data_dir
-    if not data_dir:
-        data_dir = pathlib.Path(__file__).parent.parent / "data"
-    data_dir = pathlib.Path(data_dir).resolve()
-
-    # Set which index
-    which_index = os.environ.get("LLAMA_WHICH_INDEX")
-    if args.which_index:
-        which_index = args.which_index
-    if not which_index:
-        which_index = "all_data"
-
-    # Set mode
-    mode = os.environ.get("LLAMA_MODE")
-    if args.mode:
-        mode = args.mode
-    if not mode:
-        mode = "chat"
+    # try to obtain force_new_index from env var
+    if os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX"):
+        force_new_index = (
+            os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX").lower() == "true"
+        )
+    # if force_new_index is provided via command line, override env var
+    if args.force_new_index is not None:
+        force_new_index = args.force_new_index
+
+    # Set is_path bool (by default, False)
+    is_path = False
+    # try to obtain is_path from env var
+    if os.environ.get("LLAMA_INDEX_IS_PATH"):
+        is_path = os.environ.get("LLAMA_INDEX_IS_PATH").lower() == "true"
+    # if is_path bool is provided via command line, override env var
+    if args.is_path is not None:
+        is_path = args.is_path
 
     # Initialise a new Slack bot with the requested model
     try:
-        model = MODELS[model_name.lower()]
+        model = MODELS[args.model.lower()]
     except KeyError:
-        logging.error(f"Model {model_name} was not recognised")
+        logging.error(f"Model {args.model} was not recognised")
         sys.exit(1)
 
     # Initialise LLM reponse model
-    logging.info(f"Initialising bot with model: {model_name}")
+    logging.info(f"Initialising bot with model: {args.model}")
 
     # Set up any model args that are required
-    if model_name == "llama-index-llama-cpp":
-        if args.model_name is None:
-            args.model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL
+    if args.model == "llama-index-llama-cpp":
+        # try to obtain model name from env var
+        # if model name is provided via command line, override env var
+        model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME")
+
+        # if no model name is provided by command line or env var,
+        # default to DEFAULT_LLAMA_CPP_GGUF_MODEL
+        if model_name is None:
+            model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL
 
         model_args = {
-            "model_name": args.model_name,
-            "path": args.path,
+            "model_name": model_name,
+            "is_path": is_path,
             "n_gpu_layers": args.n_gpu_layers,
             "max_input_size": args.max_input_size,
         }
-    elif model_name == "llama-index-hf":
-        if args.model_name is None:
-            args.model_name = DEFAULT_HF_MODEL
+    elif args.model == "llama-index-hf":
+        # try to obtain model name from env var
+        # if model name is provided via command line, override env var
+        model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME")
+
+        # if no model name is provided by command line or env var,
+        # default to DEFAULT_HF_MODEL
+        if model_name is None:
+            model_name = DEFAULT_HF_MODEL
 
         model_args = {
-            "model_name": args.model_name,
+            "model_name": model_name,
             "device": args.device,
             "max_input_size": args.max_input_size,
         }
     else:
         model_args = {}
 
-    if model_name == "hello":
+    if model == "hello":
         response_model = model()
     else:
         response_model = model(
             force_new_index=force_new_index,
-            data_dir=data_dir,
-            which_index=which_index,
-            mode=mode,
+            data_dir=args.data_dir,
+            which_index=args.which_index,
+            mode=args.mode,
             **model_args,
         )
 

diff --git a/slack_bot/slack_bot/models/llama_index.py b/slack_bot/slack_bot/models/llama_index.py
@@ -68,7 +68,7 @@ def __init__(
             The type of engine to use when interacting with the data, options of "chat" or "query".
             Default is "chat".
         k : int, optional
-            `similarity_top_k` to use in query engine, by default 3
+            `similarity_top_k` to use in char or query engine, by default 3
         chunk_overlap_ratio : float, optional
             Chunk overlap as a ratio of chunk size, by default 0.1
         force_new_index : bool, optional
@@ -79,6 +79,14 @@ def __init__(
         """
         super().__init__(emoji="llama")
         logging.info("Setting up Huggingface backend.")
+        if mode == "chat":
+            logging.info("Setting up chat engine.")
+        elif mode == "query":
+            logging.info("Setting up query engine.")
+        else:
+            logging.error("Mode must either be 'query' or 'chat'.")
+            sys.exit(1)
+
         self.max_input_size = max_input_size
         self.model_name = model_name
         self.num_output = num_output
@@ -138,17 +146,14 @@ def __init__(
                 storage_context=storage_context, service_context=service_context
             )
 
-        if self.mode == "query":
-            self.query_engine = self.index.as_query_engine(similarity_top_k=k)
-            logging.info("Done setting up Huggingface backend for query engine.")
-        elif self.mode == "chat":
+        if self.mode == "chat":
             self.chat_engine = self.index.as_chat_engine(
                 chat_mode="context", similarity_top_k=k
             )
             logging.info("Done setting up Huggingface backend for chat engine.")
-        else:
-            logging.error("Mode must either be 'query' or 'chat'.")
-            sys.exit(1)
+        elif self.mode == "query":
+            self.query_engine = self.index.as_query_engine(similarity_top_k=k)
+            logging.info("Done setting up Huggingface backend for query engine.")
 
         self.error_response_template = (
             "Oh no! When I tried to get a response to your prompt, "
@@ -356,7 +361,7 @@ class LlamaIndexLlamaCPP(LlamaIndex):
     def __init__(
         self,
         model_name: str,
-        path: bool,
+        is_path: bool,
         n_gpu_layers: int = 0,
         *args: Any,
         **kwargs: Any,
@@ -369,14 +374,14 @@ def __init__(
         ----------
         model_name : str
             Either the path to the model or the URL to download the model from
-        path : bool, optional
+        is_path : bool, optional
             If True, model_name is used as a path to the model file,
             otherwise it should be the URL to download the model
         n_gpu_layers : int, optional
             Number of layers to offload to GPU.
             If -1, all layers are offloaded, by default 0
         """
-        self.path = path
+        self.is_path = is_path
         self.n_gpu_layers = n_gpu_layers
         super().__init__(*args, model_name=model_name, **kwargs)
 
@@ -389,8 +394,8 @@ def _prep_llm(self) -> LLM:
         )
 
         return LlamaCPP(
-            model_url=self.model_name if not self.path else None,
-            model_path=self.model_name if self.path else None,
+            model_url=self.model_name if not self.is_path else None,
+            model_path=self.model_name if self.is_path else None,
             temperature=0.1,
             max_new_tokens=self.num_output,
             context_window=self.max_input_size,