Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #87 : Set default run args #91

Merged
merged 4 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 63 additions & 60 deletions slack_bot/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ async def main():
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument(
"--model", "-m", help="Select which model to use", default=None, choices=MODELS
"--model",
"-m",
help="Select which model to use",
default=os.environ.get("REGINALD_MODEL") or "hello",
choices=MODELS,
)
parser.add_argument(
"--model-name",
Expand All @@ -45,17 +49,18 @@ async def main():
"(ignored if not using llama-index-llama-cpp or llama-index-hf). "
"Default is 'chat'."
),
default=None,
default=os.environ.get("LLAMA_INDEX_MODE") or "chat",
choices=["chat", "query"],
)
parser.add_argument(
"--path",
"--is-path",
"-p",
help=(
"Whether or not the model_name passed is a path to the model "
"(ignored if not using llama-index-llama-cpp)"
),
action="store_true",
action=argparse.BooleanOptionalAction,
default=None,
)
parser.add_argument(
"--max-input-size",
Expand All @@ -65,7 +70,7 @@ async def main():
"Select maximum input size for LlamaCPP or HuggingFace model "
"(ignored if not using llama-index-llama-cpp or llama-index-hf)"
),
default=4096,
default=os.environ.get("LLAMA_INDEX_MAX_INPUT_SIZE") or 4096,
)
parser.add_argument(
"--n-gpu-layers",
Expand All @@ -75,7 +80,7 @@ async def main():
"Select number of GPU layers for LlamaCPP model "
"(ignored if not using llama-index-llama-cpp)"
),
default=0,
default=os.environ.get("LLAMA_INDEX_N_GPU_LAYERS") or 0,
)
parser.add_argument(
"--device",
Expand All @@ -85,20 +90,22 @@ async def main():
"Select device for HuggingFace model "
"(ignored if not using llama-index-hf model)"
),
default="auto",
default=os.environ.get("LLAMA_INDEX_DEVICE") or "auto",
)
parser.add_argument(
"--force-new-index",
"-f",
help="Recreate the index vector store or not",
action="store_true",
action=argparse.BooleanOptionalAction,
default=None,
)
parser.add_argument(
"--data-dir",
"-d",
type=pathlib.Path,
help="Location for data",
default=None,
default=os.environ.get("LLAMA_INDEX_DATA_DIR")
or (pathlib.Path(__file__).parent.parent / "data").resolve(),
)
parser.add_argument(
"--which-index",
Expand All @@ -111,7 +118,7 @@ async def main():
"files in the data directory, 'handbook' will "
"only use 'handbook.csv' file."
),
default=None,
default=os.environ.get("LLAMA_INDEX_WHICH_INDEX") or "all_data",
choices=["all_data", "public", "handbook"],
)

Expand All @@ -124,83 +131,79 @@ async def main():
level=logging.INFO,
)

# Set model name
model_name = os.environ.get("REGINALD_MODEL")
if args.model:
model_name = args.model
if not model_name:
model_name = "hello"

# Set force new index
# Set force new index (by default, don't)
force_new_index = False
if os.environ.get("LLAMA_FORCE_NEW_INDEX"):
force_new_index = os.environ.get("LLAMA_FORCE_NEW_INDEX").lower() == "true"
if args.force_new_index:
force_new_index = True

# Set data directory
data_dir = os.environ.get("LLAMA_DATA_DIR")
if args.data_dir:
data_dir = args.data_dir
if not data_dir:
data_dir = pathlib.Path(__file__).parent.parent / "data"
data_dir = pathlib.Path(data_dir).resolve()

# Set which index
which_index = os.environ.get("LLAMA_WHICH_INDEX")
if args.which_index:
which_index = args.which_index
if not which_index:
which_index = "all_data"

# Set mode
mode = os.environ.get("LLAMA_MODE")
if args.mode:
mode = args.mode
if not mode:
mode = "chat"
# try to obtain force_new_index from env var
if os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX"):
force_new_index = (
os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX").lower() == "true"
)
# if force_new_index is provided via command line, override env var
if args.force_new_index is not None:
force_new_index = args.force_new_index

# Set is_path bool (by default, False)
is_path = False
# try to obtain is_path from env var
if os.environ.get("LLAMA_INDEX_IS_PATH"):
is_path = os.environ.get("LLAMA_INDEX_IS_PATH").lower() == "true"
# if is_path bool is provided via command line, override env var
if args.is_path is not None:
is_path = args.is_path

# Initialise a new Slack bot with the requested model
try:
model = MODELS[model_name.lower()]
model = MODELS[args.model.lower()]
except KeyError:
logging.error(f"Model {model_name} was not recognised")
logging.error(f"Model {args.model} was not recognised")
sys.exit(1)

# Initialise LLM reponse model
logging.info(f"Initialising bot with model: {model_name}")
logging.info(f"Initialising bot with model: {args.model}")

# Set up any model args that are required
if model_name == "llama-index-llama-cpp":
if args.model_name is None:
args.model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL
if args.model == "llama-index-llama-cpp":
# try to obtain model name from env var
# if model name is provided via command line, override env var
model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME")

# if no model name is provided by command line or env var,
# default to DEFAULT_LLAMA_CPP_GGUF_MODEL
if model_name is None:
model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL

model_args = {
"model_name": args.model_name,
"path": args.path,
"model_name": model_name,
"is_path": is_path,
"n_gpu_layers": args.n_gpu_layers,
"max_input_size": args.max_input_size,
}
elif model_name == "llama-index-hf":
if args.model_name is None:
args.model_name = DEFAULT_HF_MODEL
elif args.model == "llama-index-hf":
# try to obtain model name from env var
# if model name is provided via command line, override env var
model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME")

# if no model name is provided by command line or env var,
# default to DEFAULT_HF_MODEL
if model_name is None:
model_name = DEFAULT_HF_MODEL

model_args = {
"model_name": args.model_name,
"model_name": model_name,
"device": args.device,
"max_input_size": args.max_input_size,
}
else:
model_args = {}

if model_name == "hello":
if model == "hello":
response_model = model()
else:
response_model = model(
force_new_index=force_new_index,
data_dir=data_dir,
which_index=which_index,
mode=mode,
data_dir=args.data_dir,
which_index=args.which_index,
mode=args.mode,
**model_args,
)

Expand Down
31 changes: 18 additions & 13 deletions slack_bot/slack_bot/models/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(
The type of engine to use when interacting with the data, options of "chat" or "query".
Default is "chat".
k : int, optional
`similarity_top_k` to use in query engine, by default 3
`similarity_top_k` to use in char or query engine, by default 3
chunk_overlap_ratio : float, optional
Chunk overlap as a ratio of chunk size, by default 0.1
force_new_index : bool, optional
Expand All @@ -79,6 +79,14 @@ def __init__(
"""
super().__init__(emoji="llama")
logging.info("Setting up Huggingface backend.")
if mode == "chat":
logging.info("Setting up chat engine.")
elif mode == "query":
logging.info("Setting up query engine.")
else:
logging.error("Mode must either be 'query' or 'chat'.")
sys.exit(1)

self.max_input_size = max_input_size
self.model_name = model_name
self.num_output = num_output
Expand Down Expand Up @@ -138,17 +146,14 @@ def __init__(
storage_context=storage_context, service_context=service_context
)

if self.mode == "query":
self.query_engine = self.index.as_query_engine(similarity_top_k=k)
logging.info("Done setting up Huggingface backend for query engine.")
elif self.mode == "chat":
if self.mode == "chat":
self.chat_engine = self.index.as_chat_engine(
chat_mode="context", similarity_top_k=k
)
logging.info("Done setting up Huggingface backend for chat engine.")
else:
logging.error("Mode must either be 'query' or 'chat'.")
sys.exit(1)
elif self.mode == "query":
self.query_engine = self.index.as_query_engine(similarity_top_k=k)
logging.info("Done setting up Huggingface backend for query engine.")

self.error_response_template = (
"Oh no! When I tried to get a response to your prompt, "
Expand Down Expand Up @@ -356,7 +361,7 @@ class LlamaIndexLlamaCPP(LlamaIndex):
def __init__(
self,
model_name: str,
path: bool,
is_path: bool,
n_gpu_layers: int = 0,
*args: Any,
**kwargs: Any,
Expand All @@ -369,14 +374,14 @@ def __init__(
----------
model_name : str
Either the path to the model or the URL to download the model from
path : bool, optional
is_path : bool, optional
If True, model_name is used as a path to the model file,
otherwise it should be the URL to download the model
n_gpu_layers : int, optional
Number of layers to offload to GPU.
If -1, all layers are offloaded, by default 0
"""
self.path = path
self.is_path = is_path
self.n_gpu_layers = n_gpu_layers
super().__init__(*args, model_name=model_name, **kwargs)

Expand All @@ -389,8 +394,8 @@ def _prep_llm(self) -> LLM:
)

return LlamaCPP(
model_url=self.model_name if not self.path else None,
model_path=self.model_name if self.path else None,
model_url=self.model_name if not self.is_path else None,
model_path=self.model_name if self.is_path else None,
temperature=0.1,
max_new_tokens=self.num_output,
context_window=self.max_input_size,
Expand Down