rectified quantization, issue with llama.cpp

arjbingly · Mar 24, 2024 · 1bb1216 · 1bb1216
1 parent 11697c0
commit 1bb1216
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 9 deletions.
diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
@@ -17,7 +17,7 @@
 
 res = get_llamacpp_repo(root_path)
 
-if "Already up to date." in res.stdout:
+if "Already up to date." in str(res.stdout):
     print("Repository is already up to date. Skipping build.")
 else:
     print("Updates found. Starting build...")

diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
@@ -15,10 +15,11 @@ def get_llamacpp_repo(root_path: str) -> None:
         print(f"Repo exists at: {root_path}/llama.cpp")
         res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
     else:
-
-        subprocess.run(
+        res = subprocess.run(
             [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True, shell=True)
+            check=True, shell=True, capture_output=True)
+
+    return res
 
 
 def building_llama(root_path: str) -> None:
@@ -53,9 +54,9 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
         repo_id (str): Repository ID of the model to download.
         root_path (str): The root path where the model should be downloaded or copied.
     """
-    local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
-    os.mkdir(local_dir)
-    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
+    local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
+    os.makedirs(local_dir, exist_ok=True)
+    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True)
     print(f"Model downloaded in {local_dir}")
 
 
@@ -69,8 +70,8 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No
     """
     os.chdir(f"{root_path}/llama.cpp/")
     subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
-    model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
+    model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
     quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
+    subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True)
     print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
     os.chdir(Path(__file__).parent)  # Return to the root path after operation
diff --git a/src/tests/quantize/__init__.py b/src/tests/quantize/__init__.py
diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py