From 66c06d0fb8e53daf69b967c7c7c102976d5bfd48 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sun, 24 Mar 2024 17:05:47 -0400
Subject: [PATCH] modifications and corrections after testing

---
 src/grag/quantize/quantize.py |  5 ++--
 src/grag/quantize/utils.py    | 43 +++++++++++++++++++++++------------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index d05990c..68168a2 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -1,6 +1,6 @@
 from grag.components.utils import get_config
 from grag.quantize.utils import (
-    building_llama,
+    building_llamacpp,
     fetch_model_repo,
     get_llamacpp_repo,
     quantize_model,
@@ -15,14 +15,13 @@
 if user_input != "":
     root_path = user_input
 
-# noinspection PyNoneFunctionAssignment
 res = get_llamacpp_repo(root_path)
 
 if "Already up to date." in str(res.stdout):
     print("Repository is already up to date. Skipping build.")
 else:
     print("Updates found. Starting build...")
-    building_llama(root_path)
+    building_llamacpp(root_path)
 
 response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower()
 if response == "n":
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
index 7e2b92f..8d9d5bc 100644
--- a/src/grag/quantize/utils.py
+++ b/src/grag/quantize/utils.py
@@ -1,28 +1,34 @@
 import os
 import subprocess
 from pathlib import Path
+from typing import Optional, Union
 
+from grag.components.utils import get_config
 from huggingface_hub import snapshot_download
 
+config = get_config()
 
-def get_llamacpp_repo(root_path: str) -> None:
+
+def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess:
     """Clones or pulls the llama.cpp repository into the specified root path.
 
     Args:
-        root_path (str): The root directory where the llama.cpp repository will be cloned or updated.
+        root_path: The root directory where the llama.cpp repository will be cloned or updated.
+
+    Returns:
+        A subprocess.CompletedProcess instance containing the result of the git operation.
     """
     if os.path.exists(f"{root_path}/llama.cpp"):
         print(f"Repo exists at: {root_path}/llama.cpp")
-        res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
+        res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True)
     else:
-        res = subprocess.run(
-            [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True, shell=True, capture_output=True)
+        res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"],
+                             check=True, capture_output=True)
 
     return res
 
 
-def building_llama(root_path: str) -> None:
+def building_llamacpp(root_path: str) -> None:
     """Attempts to build the llama.cpp project using make or cmake.
 
     Args:
@@ -56,22 +62,31 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
     """
     local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
     os.makedirs(local_dir, exist_ok=True)
-    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True)
+    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True)
     print(f"Model downloaded in {local_dir}")
 
 
-def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None:
+def quantize_model(model_dir_path: str, quantization: str, root_path: str,
+                   output_dir: Optional[Union[str, Path]] = None) -> None:
     """Quantizes a specified model using a given quantization level.
 
     Args:
+        output_dir (str, optional): Directory to save quantized model. Defaults to None 
         model_dir_path (str): The directory path of the model to be quantized.
         quantization (str): The quantization level to apply.
         root_path (str): The root directory path of the project.
     """
     os.chdir(f"{root_path}/llama.cpp/")
-    subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
-    model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
-    quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True)
-    print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
+    model_dir_path = Path(model_dir_path)
+    if output_dir is None:
+        output_dir = config['llm']['base_dir']
+
+    output_dir = Path(output_dir) / model_dir_path.name
+    os.makedirs(output_dir, exist_ok=True)
+
+    subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True)
+    model_file = model_dir_path / "ggml-model-f32.gguf"
+    quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf"
+    subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True)
+    print(f"Quantized model present at {output_dir}")
     os.chdir(Path(__file__).parent)  # Return to the root path after operation