From 87ec9294bb8ee7245ccce20fc7975e467094d821 Mon Sep 17 00:00:00 2001 From: rchan Date: Thu, 14 Sep 2023 21:55:10 +0100 Subject: [PATCH 1/3] revert force-new-index default --- slack_bot/run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/slack_bot/run.py b/slack_bot/run.py index eed1e9ef..d62c5cb1 100755 --- a/slack_bot/run.py +++ b/slack_bot/run.py @@ -91,7 +91,8 @@ "--force-new-index", "-f", help="Recreate the index vector store or not", - action="store_true", + action=argparse.BooleanOptionalAction, + default=False, ) parser.add_argument( "--data-dir", From da8717bc0d9140ce5e28f18df7625da8bfa73b3e Mon Sep 17 00:00:00 2001 From: rchan Date: Fri, 15 Sep 2023 17:21:57 +0100 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=8E=A8=20restructure=20run.py=20defau?= =?UTF-8?q?lt=20args?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- slack_bot/run.py | 110 +++++++++++----------- slack_bot/slack_bot/models/llama_index.py | 10 +- 2 files changed, 59 insertions(+), 61 deletions(-) diff --git a/slack_bot/run.py b/slack_bot/run.py index d62c5cb1..4203f2c7 100755 --- a/slack_bot/run.py +++ b/slack_bot/run.py @@ -21,7 +21,11 @@ # Parse command line arguments parser = argparse.ArgumentParser() parser.add_argument( - "--model", "-m", help="Select which model to use", default=None, choices=MODELS + "--model", + "-m", + help="Select which model to use", + default=os.environ.get("REGINALD_MODEL") or "hello", + choices=MODELS, ) parser.add_argument( "--model-name", @@ -45,11 +49,11 @@ "(ignored if not using llama-index-llama-cpp or llama-index-hf). " "Default is 'chat'." ), - default=None, + default=os.environ.get("LLAMA_INDEX_MODE") or "chat", choices=["chat", "query"], ) parser.add_argument( - "--path", + "--is-path", "-p", help=( "Whether or not the model_name passed is a path to the model " @@ -65,7 +69,7 @@ "Select maximum input size for LlamaCPP or HuggingFace model " "(ignored if not using llama-index-llama-cpp or llama-index-hf)" ), - default=4096, + default=os.environ.get("LLAMA_INDEX_MAX_INPUT_SIZE") or 4096, ) parser.add_argument( "--n-gpu-layers", @@ -75,7 +79,7 @@ "Select number of GPU layers for LlamaCPP model " "(ignored if not using llama-index-llama-cpp)" ), - default=0, + default=os.environ.get("LLAMA_INDEX_N_GPU_LAYERS") or 0, ) parser.add_argument( "--device", @@ -85,21 +89,21 @@ "Select device for HuggingFace model " "(ignored if not using llama-index-hf model)" ), - default="auto", + default=os.environ.get("LLAMA_INDEX_DEVICE") or "auto", ) parser.add_argument( "--force-new-index", "-f", help="Recreate the index vector store or not", - action=argparse.BooleanOptionalAction, - default=False, + action="store_true", ) parser.add_argument( "--data-dir", "-d", type=pathlib.Path, help="Location for data", - default=None, + default=os.environ.get("LLAMA_INDEX_DATA_DIR") + or (pathlib.Path(__file__).parent.parent / "data").resolve(), ) parser.add_argument( "--which-index", @@ -112,7 +116,7 @@ "files in the data directory, 'handbook' will " "only use 'handbook.csv' file." ), - default=None, + default=os.environ.get("LLAMA_WHICH_INDEX") or "all_data", choices=["all_data", "public", "handbook"], ) @@ -125,66 +129,60 @@ level=logging.INFO, ) - # Set model name - model_name = os.environ.get("REGINALD_MODEL") - if args.model: - model_name = args.model - if not model_name: - model_name = "hello" - - # Set force new index + # Set force new index (by default, don't) force_new_index = False - if os.environ.get("LLAMA_FORCE_NEW_INDEX"): - force_new_index = os.environ.get("LLAMA_FORCE_NEW_INDEX").lower() == "true" + # try to obtain force_new_index from env var + if os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX"): + force_new_index = ( + os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX").lower() == "true" + ) + # if force_new_index is provided via command line, override env var if args.force_new_index: force_new_index = True - # Set data directory - data_dir = os.environ.get("LLAMA_DATA_DIR") - if args.data_dir: - data_dir = args.data_dir - if not data_dir: - data_dir = pathlib.Path(__file__).parent.parent / "data" - data_dir = pathlib.Path(data_dir).resolve() - - # Set which index - which_index = os.environ.get("LLAMA_WHICH_INDEX") - if args.which_index: - which_index = args.which_index - if not which_index: - which_index = "all_data" - - # Set mode - mode = os.environ.get("LLAMA_MODE") - if args.mode: - mode = args.mode - if not mode: - mode = "chat" + # Set is_path bool (by default, False) + is_path = False + # try to obtain is_path from env var + if os.environ.get("LLAMA_INDEX_PATH_BOOL"): + is_path = os.environ.get("LLAMA_INDEX_PATH_BOOL").lower() == "true" + # if is_path bool is provided via command line, override env var + if args.is_path: + is_path = True # Initialise a new Slack bot with the requested model try: - model = MODELS[model_name.lower()] + model = MODELS[args.model.lower()] except KeyError: - logging.error(f"Model {model_name} was not recognised") + logging.error(f"Model {args.model} was not recognised") sys.exit(1) # Initialise LLM reponse model - logging.info(f"Initialising bot with model: {model_name}") + logging.info(f"Initialising bot with model: {args.model}") # Set up any model args that are required - if model_name == "llama-index-llama-cpp": - if args.model_name is None: - args.model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL + if model == "llama-index-llama-cpp": + # try to obtain model name from env var + # if model name is provided via command line, override env var + model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME") + # if no model name is provided by command line or env var, + # default to DEFAULT_LLAMA_CPP_GGUF_MODEL + if model_name is None: + model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL model_args = { - "model_name": args.model_name, - "path": args.path, + "model_name": model_name, + "is_path": is_path, "n_gpu_layers": args.n_gpu_layers, "max_input_size": args.max_input_size, } - elif model_name == "llama-index-hf": - if args.model_name is None: - args.model_name = DEFAULT_HF_MODEL + elif model == "llama-index-hf": + # try to obtain model name from env var + # if model name is provided via command line, override env var + model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME") + # if no model name is provided by command line or env var, + # default to DEFAULT_HF_MODEL + if model_name is None: + model_name = DEFAULT_HF_MODEL model_args = { "model_name": args.model_name, @@ -194,14 +192,14 @@ else: model_args = {} - if model_name == "hello": + if model == "hello": response_model = model() else: response_model = model( force_new_index=force_new_index, - data_dir=data_dir, - which_index=which_index, - mode=mode, + data_dir=args.data_dir, + which_index=args.which_index, + mode=args.mode, **model_args, ) diff --git a/slack_bot/slack_bot/models/llama_index.py b/slack_bot/slack_bot/models/llama_index.py index d5005e79..ac0edd10 100644 --- a/slack_bot/slack_bot/models/llama_index.py +++ b/slack_bot/slack_bot/models/llama_index.py @@ -356,7 +356,7 @@ class LlamaIndexLlamaCPP(LlamaIndex): def __init__( self, model_name: str, - path: bool, + is_path: bool, n_gpu_layers: int = 0, *args: Any, **kwargs: Any, @@ -369,14 +369,14 @@ def __init__( ---------- model_name : str Either the path to the model or the URL to download the model from - path : bool, optional + is_path : bool, optional If True, model_name is used as a path to the model file, otherwise it should be the URL to download the model n_gpu_layers : int, optional Number of layers to offload to GPU. If -1, all layers are offloaded, by default 0 """ - self.path = path + self.is_path = is_path self.n_gpu_layers = n_gpu_layers super().__init__(*args, model_name=model_name, **kwargs) @@ -389,8 +389,8 @@ def _prep_llm(self) -> LLM: ) return LlamaCPP( - model_url=self.model_name if not self.path else None, - model_path=self.model_name if self.path else None, + model_url=self.model_name if not self.is_path else None, + model_path=self.model_name if self.is_path else None, temperature=0.1, max_new_tokens=self.num_output, context_window=self.max_input_size, From b951b0247f09aa019da7ddbeb6480e8ec086dca6 Mon Sep 17 00:00:00 2001 From: rchan Date: Fri, 15 Sep 2023 18:55:11 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=8E=A8=20fix=20defaults=20for=20is=5F?= =?UTF-8?q?path=20and=20force=5Fnew=5Findex?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- slack_bot/run.py | 28 +++++++++++++---------- slack_bot/slack_bot/models/llama_index.py | 21 ++++++++++------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/slack_bot/run.py b/slack_bot/run.py index bdb331f7..19d8d623 100755 --- a/slack_bot/run.py +++ b/slack_bot/run.py @@ -59,7 +59,8 @@ async def main(): "Whether or not the model_name passed is a path to the model " "(ignored if not using llama-index-llama-cpp)" ), - action="store_true", + action=argparse.BooleanOptionalAction, + default=None, ) parser.add_argument( "--max-input-size", @@ -95,7 +96,8 @@ async def main(): "--force-new-index", "-f", help="Recreate the index vector store or not", - action="store_true", + action=argparse.BooleanOptionalAction, + default=None, ) parser.add_argument( "--data-dir", @@ -116,7 +118,7 @@ async def main(): "files in the data directory, 'handbook' will " "only use 'handbook.csv' file." ), - default=os.environ.get("LLAMA_WHICH_INDEX") or "all_data", + default=os.environ.get("LLAMA_INDEX_WHICH_INDEX") or "all_data", choices=["all_data", "public", "handbook"], ) @@ -137,17 +139,17 @@ async def main(): os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX").lower() == "true" ) # if force_new_index is provided via command line, override env var - if args.force_new_index: - force_new_index = True + if args.force_new_index is not None: + force_new_index = args.force_new_index # Set is_path bool (by default, False) is_path = False # try to obtain is_path from env var - if os.environ.get("LLAMA_INDEX_PATH_BOOL"): - is_path = os.environ.get("LLAMA_INDEX_PATH_BOOL").lower() == "true" + if os.environ.get("LLAMA_INDEX_IS_PATH"): + is_path = os.environ.get("LLAMA_INDEX_IS_PATH").lower() == "true" # if is_path bool is provided via command line, override env var - if args.is_path: - is_path = True + if args.is_path is not None: + is_path = args.is_path # Initialise a new Slack bot with the requested model try: @@ -160,10 +162,11 @@ async def main(): logging.info(f"Initialising bot with model: {args.model}") # Set up any model args that are required - if model == "llama-index-llama-cpp": + if args.model == "llama-index-llama-cpp": # try to obtain model name from env var # if model name is provided via command line, override env var model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME") + # if no model name is provided by command line or env var, # default to DEFAULT_LLAMA_CPP_GGUF_MODEL if model_name is None: @@ -175,17 +178,18 @@ async def main(): "n_gpu_layers": args.n_gpu_layers, "max_input_size": args.max_input_size, } - elif model == "llama-index-hf": + elif args.model == "llama-index-hf": # try to obtain model name from env var # if model name is provided via command line, override env var model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME") + # if no model name is provided by command line or env var, # default to DEFAULT_HF_MODEL if model_name is None: model_name = DEFAULT_HF_MODEL model_args = { - "model_name": args.model_name, + "model_name": model_name, "device": args.device, "max_input_size": args.max_input_size, } diff --git a/slack_bot/slack_bot/models/llama_index.py b/slack_bot/slack_bot/models/llama_index.py index ac0edd10..83eae921 100644 --- a/slack_bot/slack_bot/models/llama_index.py +++ b/slack_bot/slack_bot/models/llama_index.py @@ -68,7 +68,7 @@ def __init__( The type of engine to use when interacting with the data, options of "chat" or "query". Default is "chat". k : int, optional - `similarity_top_k` to use in query engine, by default 3 + `similarity_top_k` to use in char or query engine, by default 3 chunk_overlap_ratio : float, optional Chunk overlap as a ratio of chunk size, by default 0.1 force_new_index : bool, optional @@ -79,6 +79,14 @@ def __init__( """ super().__init__(emoji="llama") logging.info("Setting up Huggingface backend.") + if mode == "chat": + logging.info("Setting up chat engine.") + elif mode == "query": + logging.info("Setting up query engine.") + else: + logging.error("Mode must either be 'query' or 'chat'.") + sys.exit(1) + self.max_input_size = max_input_size self.model_name = model_name self.num_output = num_output @@ -138,17 +146,14 @@ def __init__( storage_context=storage_context, service_context=service_context ) - if self.mode == "query": - self.query_engine = self.index.as_query_engine(similarity_top_k=k) - logging.info("Done setting up Huggingface backend for query engine.") - elif self.mode == "chat": + if self.mode == "chat": self.chat_engine = self.index.as_chat_engine( chat_mode="context", similarity_top_k=k ) logging.info("Done setting up Huggingface backend for chat engine.") - else: - logging.error("Mode must either be 'query' or 'chat'.") - sys.exit(1) + elif self.mode == "query": + self.query_engine = self.index.as_query_engine(similarity_top_k=k) + logging.info("Done setting up Huggingface backend for query engine.") self.error_response_template = ( "Oh no! When I tried to get a response to your prompt, "