From 66c06d0fb8e53daf69b967c7c7c102976d5bfd48 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 17:05:47 -0400 Subject: [PATCH] modifications and corrections after testing --- src/grag/quantize/quantize.py | 5 ++-- src/grag/quantize/utils.py | 43 +++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index d05990c..68168a2 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,6 +1,6 @@ from grag.components.utils import get_config from grag.quantize.utils import ( - building_llama, + building_llamacpp, fetch_model_repo, get_llamacpp_repo, quantize_model, @@ -15,14 +15,13 @@ if user_input != "": root_path = user_input -# noinspection PyNoneFunctionAssignment res = get_llamacpp_repo(root_path) if "Already up to date." in str(res.stdout): print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") - building_llama(root_path) + building_llamacpp(root_path) response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() if response == "n": diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 7e2b92f..8d9d5bc 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -1,28 +1,34 @@ import os import subprocess from pathlib import Path +from typing import Optional, Union +from grag.components.utils import get_config from huggingface_hub import snapshot_download +config = get_config() -def get_llamacpp_repo(root_path: str) -> None: + +def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: """Clones or pulls the llama.cpp repository into the specified root path. Args: - root_path (str): The root directory where the llama.cpp repository will be cloned or updated. + root_path: The root directory where the llama.cpp repository will be cloned or updated. + + Returns: + A subprocess.CompletedProcess instance containing the result of the git operation. """ if os.path.exists(f"{root_path}/llama.cpp"): print(f"Repo exists at: {root_path}/llama.cpp") - res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) + res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True) else: - res = subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True, capture_output=True) + res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"], + check=True, capture_output=True) return res -def building_llama(root_path: str) -> None: +def building_llamacpp(root_path: str) -> None: """Attempts to build the llama.cpp project using make or cmake. Args: @@ -56,22 +62,31 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: """ local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" os.makedirs(local_dir, exist_ok=True) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True) print(f"Model downloaded in {local_dir}") -def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None: +def quantize_model(model_dir_path: str, quantization: str, root_path: str, + output_dir: Optional[Union[str, Path]] = None) -> None: """Quantizes a specified model using a given quantization level. Args: + output_dir (str, optional): Directory to save quantized model. Defaults to None model_dir_path (str): The directory path of the model to be quantized. quantization (str): The quantization level to apply. root_path (str): The root directory path of the project. """ os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + model_dir_path = Path(model_dir_path) + if output_dir is None: + output_dir = config['llm']['base_dir'] + + output_dir = Path(output_dir) / model_dir_path.name + os.makedirs(output_dir, exist_ok=True) + + subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True) + model_file = model_dir_path / "ggml-model-f32.gguf" + quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf" + subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True) + print(f"Quantized model present at {output_dir}") os.chdir(Path(__file__).parent) # Return to the root path after operation