From 428c634e73c7134b4507f35d8df94fef6477902e Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Fri, 22 Mar 2024 18:02:54 -0400
Subject: [PATCH 1/9] quantization

---
 src/config.ini                |  5 ++-
 src/grag/quantize/quantize.py | 76 +++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 src/grag/quantize/quantize.py
diff --git a/src/config.ini b/src/config.ini
index 452ac04..74ab6c4 100644
--- a/src/config.ini
+++ b/src/config.ini
@@ -51,4 +51,7 @@ table_as_html : True
 data_path : ${root:root_path}/data
 
 [root]
-root_path : /home/ubuntu/volume_2k/Capstone_5
\ No newline at end of file
+root_path : /home/ubuntu/volume_2k/Capstone_5
+
+[quantize]
+llama_cpp_path : ${root:root_path}
\ No newline at end of file
diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
new file mode 100644
index 0000000..2728e13
--- /dev/null
+++ b/src/grag/quantize/quantize.py
@@ -0,0 +1,76 @@
+import os
+import subprocess
+
+from grag.components.utils import get_config
+from huggingface_hub import snapshot_download
+
+original_dir = os.getcwd()
+config = get_config()
+root_path = config['quantize']['llama_cpp_path']
+
+
+def get_llamacpp_repo():
+    if os.path.exists(f"{root_path}/llama.cpp"):
+        subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True)
+    else:
+        subprocess.run(
+            [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
+            check=True, shell=True)
+
+
+def building_llama():
+    os.chdir(f"{root_path}/llama.cpp/")
+    try:
+        subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL)
+        subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True)
+        print('Llama.cpp build successfull.')
+    except subprocess.CalledProcessError:
+        try:
+            subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL)
+            subprocess.run(['mkdir', 'build'], check=True)
+            subprocess.run(
+                ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config',
+                 'Release'], shell=True, check=True)
+            print('Llama.cpp build successfull.')
+        except subprocess.CalledProcessError:
+            print("Unable to build, cannot find make or cmake.")
+    os.chdir(original_dir)
+
+
+def fetch_model_repo():
+    response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower()
+    if response == "no":
+        print("Please copy the model folder to 'llama.cpp/models/' folder.")
+    elif response == "yes" or response == "":
+        repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ')
+        local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
+        os.mkdir(local_dir)
+        snapshot_download(repo_id=repo_id, local_dir=local_dir,
+                          local_dir_use_symlinks=False)
+        print(f"Model downloaded in {local_dir}")
+
+
+def quantize_model(quantization):
+    os.chdir(f"{root_path}/llama.cpp/")
+    subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
+
+    model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
+    quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
+    subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
+    print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
+    os.chdir(original_dir)
+
+
+if __name__ == "__main__":
+    get_llamacpp_repo()
+    building_llama()
+    fetch_model_repo()
+
+    quantization = input("Enter quantization: ")
+    quantize_model(quantization)
+    # if len(sys.argv) < 2 or len(sys.argv) > 3:
+    #     print("Usage: python script.py <model_dir_name> [<quantization>]")
+    #     sys.exit(1)
+    # model_dir_path = sys.argv[1]
+    # quantization = sys.argv[2] if len(sys.argv) == 3 else None
+    # execute_commands(model_dir_path, quantization)

From aec73771f90cdeeaa9d4fe128fc04fb5befb786e Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj@users.noreply.github.com>
Date: Fri, 22 Mar 2024 22:23:39 +0000
Subject: [PATCH 2/9] style fixes by ruff

---
 src/grag/quantize/quantize.py | 64 +++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index 2728e13..773e987 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -6,47 +6,73 @@
 
 original_dir = os.getcwd()
 config = get_config()
-root_path = config['quantize']['llama_cpp_path']
+root_path = config["quantize"]["llama_cpp_path"]
 
 
 def get_llamacpp_repo():
     if os.path.exists(f"{root_path}/llama.cpp"):
-        subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True)
+        subprocess.run(
+            [f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True
+        )
     else:
         subprocess.run(
             [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True, shell=True)
+            check=True,
+            shell=True,
+        )
 
 
 def building_llama():
     os.chdir(f"{root_path}/llama.cpp/")
     try:
-        subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True)
-        print('Llama.cpp build successfull.')
+        subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
+        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
+        print("Llama.cpp build successfull.")
     except subprocess.CalledProcessError:
         try:
-            subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL)
-            subprocess.run(['mkdir', 'build'], check=True)
+            subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL)
+            subprocess.run(["mkdir", "build"], check=True)
             subprocess.run(
-                ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config',
-                 'Release'], shell=True, check=True)
-            print('Llama.cpp build successfull.')
+                [
+                    "cd",
+                    "build",
+                    "&&",
+                    "cmake",
+                    "..",
+                    "-DLLAMA_CUBLAS=ON",
+                    "&&",
+                    "cmake",
+                    "--build",
+                    ".",
+                    "--config",
+                    "Release",
+                ],
+                shell=True,
+                check=True,
+            )
+            print("Llama.cpp build successfull.")
         except subprocess.CalledProcessError:
             print("Unable to build, cannot find make or cmake.")
     os.chdir(original_dir)
 
 
 def fetch_model_repo():
-    response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower()
+    response = (
+        input("Do you want us to download the model? (yes/no) [Enter for yes]: ")
+        .strip()
+        .lower()
+    )
     if response == "no":
         print("Please copy the model folder to 'llama.cpp/models/' folder.")
     elif response == "yes" or response == "":
-        repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ')
+        repo_id = input(
+            "Please enter the repo_id for the model (you can check on https://huggingface.co/models): "
+        )
         local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
         os.mkdir(local_dir)
-        snapshot_download(repo_id=repo_id, local_dir=local_dir,
-                          local_dir_use_symlinks=False)
+        snapshot_download(
+            repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False
+        )
         print(f"Model downloaded in {local_dir}")
 
 
@@ -55,8 +81,12 @@ def quantize_model(quantization):
     subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
 
     model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
-    quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
+    quantized_model_file = (
+        f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
+    )
+    subprocess.run(
+        ["llm_quantize", model_file, quantized_model_file, quantization], check=True
+    )
     print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
     os.chdir(original_dir)
 

From 8e78f75a4dec4a9fcebec9fb89052b05c97f62c5 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sat, 23 Mar 2024 18:49:45 -0400
Subject: [PATCH 3/9] quantize file

---
 src/grag/quantize/__init__.py |   0
 src/grag/quantize/quantize.py | 104 +++++++++++-----------------------
 src/grag/quantize/utils.py    |  76 +++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 71 deletions(-)
 create mode 100644 src/grag/quantize/__init__.py
 create mode 100644 src/grag/quantize/utils.py

diff --git a/src/grag/quantize/__init__.py b/src/grag/quantize/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index 2728e13..02065a9 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -1,76 +1,38 @@
-import os
-import subprocess
-
 from grag.components.utils import get_config
-from huggingface_hub import snapshot_download
+from grag.quantize.utils import (
+    building_llama,
+    fetch_model_repo,
+    get_llamacpp_repo,
+    quantize_model,
+)
 
-original_dir = os.getcwd()
 config = get_config()
 root_path = config['quantize']['llama_cpp_path']
 
-
-def get_llamacpp_repo():
-    if os.path.exists(f"{root_path}/llama.cpp"):
-        subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True)
-    else:
-        subprocess.run(
-            [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True, shell=True)
-
-
-def building_llama():
-    os.chdir(f"{root_path}/llama.cpp/")
-    try:
-        subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True)
-        print('Llama.cpp build successfull.')
-    except subprocess.CalledProcessError:
-        try:
-            subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL)
-            subprocess.run(['mkdir', 'build'], check=True)
-            subprocess.run(
-                ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config',
-                 'Release'], shell=True, check=True)
-            print('Llama.cpp build successfull.')
-        except subprocess.CalledProcessError:
-            print("Unable to build, cannot find make or cmake.")
-    os.chdir(original_dir)
-
-
-def fetch_model_repo():
-    response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower()
-    if response == "no":
-        print("Please copy the model folder to 'llama.cpp/models/' folder.")
-    elif response == "yes" or response == "":
-        repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ')
-        local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
-        os.mkdir(local_dir)
-        snapshot_download(repo_id=repo_id, local_dir=local_dir,
-                          local_dir_use_symlinks=False)
-        print(f"Model downloaded in {local_dir}")
-
-
-def quantize_model(quantization):
-    os.chdir(f"{root_path}/llama.cpp/")
-    subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
-
-    model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
-    quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
-    print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
-    os.chdir(original_dir)
-
-
-if __name__ == "__main__":
-    get_llamacpp_repo()
-    building_llama()
-    fetch_model_repo()
-
-    quantization = input("Enter quantization: ")
-    quantize_model(quantization)
-    # if len(sys.argv) < 2 or len(sys.argv) > 3:
-    #     print("Usage: python script.py <model_dir_name> [<quantization>]")
-    #     sys.exit(1)
-    # model_dir_path = sys.argv[1]
-    # quantization = sys.argv[2] if len(sys.argv) == 3 else None
-    # execute_commands(model_dir_path, quantization)
+user_input = input(
+    "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip()
+
+if user_input != "":
+    root_path = user_input
+
+res = get_llamacpp_repo(root_path)
+
+if "Already up to date." in res.stdout:
+    print("Repository is already up to date. Skipping build.")
+else:
+    print("Updates found. Starting build...")
+    building_llama(root_path)
+
+response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower()
+if response == "n":
+    print("Please copy the model folder to 'llama.cpp/models/' folder.")
+    _ = input("Enter if you have already copied the model:")
+    model_dir = input("Enter the model directory name: ")
+elif response == "y" or response == "":
+    repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ').strip()
+    fetch_model_repo(repo_id, root_path)
+    model_dir = repo_id.split('/')[1]
+
+quantization = input(
+    "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ")
+quantize_model(model_dir, quantization, root_path)
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
new file mode 100644
index 0000000..3df3c1a
--- /dev/null
+++ b/src/grag/quantize/utils.py
@@ -0,0 +1,76 @@
+import os
+import subprocess
+from pathlib import Path
+
+from huggingface_hub import snapshot_download
+
+
+def get_llamacpp_repo(root_path: str) -> None:
+    """Clones or pulls the llama.cpp repository into the specified root path.
+
+    Args:
+        root_path (str): The root directory where the llama.cpp repository will be cloned or updated.
+    """
+    if os.path.exists(f"{root_path}/llama.cpp"):
+        print(f"Repo exists at: {root_path}/llama.cpp")
+        res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
+    else:
+
+        subprocess.run(
+            [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
+            check=True, shell=True)
+
+
+def building_llama(root_path: str) -> None:
+    """Attempts to build the llama.cpp project using make or cmake.
+
+    Args:
+        root_path (str): The root directory where the llama.cpp project is located.
+    """
+    os.chdir(f"{root_path}/llama.cpp/")
+    try:
+        subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL)
+        subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True)
+        print('Llama.cpp build successful.')
+    except subprocess.CalledProcessError:
+        try:
+            subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL)
+            subprocess.run(['mkdir', 'build'], check=True)
+            subprocess.run(
+                ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config',
+                 'Release'], shell=True, check=True)
+            print('Llama.cpp build successful.')
+        except subprocess.CalledProcessError:
+            print("Unable to build, cannot find make or cmake.")
+    finally:
+        os.chdir(Path(__file__).parent)  # Assuming you want to return to the root path after operation
+
+
+def fetch_model_repo(repo_id: str, root_path: str) -> None:
+    """Download model from huggingface.co/models.
+
+    Args:
+        repo_id (str): Repository ID of the model to download.
+        root_path (str): The root path where the model should be downloaded or copied.
+    """
+    local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
+    os.mkdir(local_dir)
+    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
+    print(f"Model downloaded in {local_dir}")
+
+
+def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None:
+    """Quantizes a specified model using a given quantization level.
+
+    Args:
+        model_dir_path (str): The directory path of the model to be quantized.
+        quantization (str): The quantization level to apply.
+        root_path (str): The root directory path of the project.
+    """
+    os.chdir(f"{root_path}/llama.cpp/")
+    subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
+    model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
+    quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
+    subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
+    print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
+    os.chdir(Path(__file__).parent)  # Return to the root path after operation

From 11697c006bac3865203cc242c6841a024ec1f52a Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sat, 23 Mar 2024 18:55:11 -0400
Subject: [PATCH 4/9] Revert "Merge branch 'quantize' of
 https://github.com/arjbingly/Capstone_5 into quantize"

This reverts commit 79ebf3ae4bbc634f075d51791b1442569b1cd03a, reversing
changes made to 8e78f75a4dec4a9fcebec9fb89052b05c97f62c5.
---
 src/grag/quantize/quantize.py | 102 +---------------------------------
 1 file changed, 1 insertion(+), 101 deletions(-)

diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index c013b07..02065a9 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -7,70 +7,21 @@
 )
 
 config = get_config()
-root_path = config["quantize"]["llama_cpp_path"]
+root_path = config['quantize']['llama_cpp_path']
 
 user_input = input(
     "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip()
 
-<<<<<<< HEAD
 if user_input != "":
     root_path = user_input
-=======
-def get_llamacpp_repo():
-    if os.path.exists(f"{root_path}/llama.cpp"):
-        subprocess.run(
-            [f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True
-        )
-    else:
-        subprocess.run(
-            [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True,
-            shell=True,
-        )
->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e
 
 res = get_llamacpp_repo(root_path)
 
-<<<<<<< HEAD
 if "Already up to date." in res.stdout:
     print("Repository is already up to date. Skipping build.")
 else:
     print("Updates found. Starting build...")
     building_llama(root_path)
-=======
-def building_llama():
-    os.chdir(f"{root_path}/llama.cpp/")
-    try:
-        subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
-        print("Llama.cpp build successfull.")
-    except subprocess.CalledProcessError:
-        try:
-            subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL)
-            subprocess.run(["mkdir", "build"], check=True)
-            subprocess.run(
-                [
-                    "cd",
-                    "build",
-                    "&&",
-                    "cmake",
-                    "..",
-                    "-DLLAMA_CUBLAS=ON",
-                    "&&",
-                    "cmake",
-                    "--build",
-                    ".",
-                    "--config",
-                    "Release",
-                ],
-                shell=True,
-                check=True,
-            )
-            print("Llama.cpp build successfull.")
-        except subprocess.CalledProcessError:
-            print("Unable to build, cannot find make or cmake.")
-    os.chdir(original_dir)
->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e
 
 response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower()
 if response == "n":
@@ -82,57 +33,6 @@ def building_llama():
     fetch_model_repo(repo_id, root_path)
     model_dir = repo_id.split('/')[1]
 
-<<<<<<< HEAD
 quantization = input(
     "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ")
 quantize_model(model_dir, quantization, root_path)
-=======
-def fetch_model_repo():
-    response = (
-        input("Do you want us to download the model? (yes/no) [Enter for yes]: ")
-        .strip()
-        .lower()
-    )
-    if response == "no":
-        print("Please copy the model folder to 'llama.cpp/models/' folder.")
-    elif response == "yes" or response == "":
-        repo_id = input(
-            "Please enter the repo_id for the model (you can check on https://huggingface.co/models): "
-        )
-        local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
-        os.mkdir(local_dir)
-        snapshot_download(
-            repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False
-        )
-        print(f"Model downloaded in {local_dir}")
-
-
-def quantize_model(quantization):
-    os.chdir(f"{root_path}/llama.cpp/")
-    subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
-
-    model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
-    quantized_model_file = (
-        f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    )
-    subprocess.run(
-        ["llm_quantize", model_file, quantized_model_file, quantization], check=True
-    )
-    print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
-    os.chdir(original_dir)
-
-
-if __name__ == "__main__":
-    get_llamacpp_repo()
-    building_llama()
-    fetch_model_repo()
-
-    quantization = input("Enter quantization: ")
-    quantize_model(quantization)
-    # if len(sys.argv) < 2 or len(sys.argv) > 3:
-    #     print("Usage: python script.py <model_dir_name> [<quantization>]")
-    #     sys.exit(1)
-    # model_dir_path = sys.argv[1]
-    # quantization = sys.argv[2] if len(sys.argv) == 3 else None
-    # execute_commands(model_dir_path, quantization)
->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e

From 1bb12163f584150286714344082d60280113f9d1 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sat, 23 Mar 2024 20:12:26 -0400
Subject: [PATCH 5/9] rectified quantization, issue with llama.cpp

---
 src/grag/quantize/quantize.py       |  2 +-
 src/grag/quantize/utils.py          | 17 +++++++++--------
 src/tests/quantize/__init__.py      |  0
 src/tests/quantize/quantize_test.py |  0
 4 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 src/tests/quantize/__init__.py
 create mode 100644 src/tests/quantize/quantize_test.py

diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index 02065a9..8e42117 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -17,7 +17,7 @@
 
 res = get_llamacpp_repo(root_path)
 
-if "Already up to date." in res.stdout:
+if "Already up to date." in str(res.stdout):
     print("Repository is already up to date. Skipping build.")
 else:
     print("Updates found. Starting build...")
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
index 3df3c1a..661fb65 100644
--- a/src/grag/quantize/utils.py
+++ b/src/grag/quantize/utils.py
@@ -15,10 +15,11 @@ def get_llamacpp_repo(root_path: str) -> None:
         print(f"Repo exists at: {root_path}/llama.cpp")
         res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
     else:
-
-        subprocess.run(
+        res = subprocess.run(
             [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True, shell=True)
+            check=True, shell=True, capture_output=True)
+
+    return res
 
 
 def building_llama(root_path: str) -> None:
@@ -53,9 +54,9 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
         repo_id (str): Repository ID of the model to download.
         root_path (str): The root path where the model should be downloaded or copied.
     """
-    local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
-    os.mkdir(local_dir)
-    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
+    local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
+    os.makedirs(local_dir, exist_ok=True)
+    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True)
     print(f"Model downloaded in {local_dir}")
 
 
@@ -69,8 +70,8 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No
     """
     os.chdir(f"{root_path}/llama.cpp/")
     subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
-    model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
+    model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
     quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
+    subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True)
     print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
     os.chdir(Path(__file__).parent)  # Return to the root path after operation
diff --git a/src/tests/quantize/__init__.py b/src/tests/quantize/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py
new file mode 100644
index 0000000..e69de29

From a7354ee7be3dadeff6a596e88a4b16e36cccbb69 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sun, 24 Mar 2024 15:00:14 -0400
Subject: [PATCH 6/9] issue in llama.cpp

---
 llm_quantize/quantize.py      | 4 ++--
 src/grag/quantize/quantize.py | 1 +
 src/grag/quantize/utils.py    | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llm_quantize/quantize.py b/llm_quantize/quantize.py
index 7fb1c24..708b6c8 100644
--- a/llm_quantize/quantize.py
+++ b/llm_quantize/quantize.py
@@ -1,6 +1,6 @@
+import os
 import subprocess
 import sys
-import os
 
 
 def execute_commands(model_dir_path, quantization=None):
@@ -13,7 +13,7 @@ def execute_commands(model_dir_path, quantization=None):
         if quantization:
             model_file = f"llama.cpp/models/{model_dir_path}/ggml-model-f16.gguf"
             quantized_model_file = f"llama.cpp/models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-            subprocess.run(["llama.cpp/llm_quantize", model_file, quantized_model_file, quantization], check=True)
+            subprocess.run(["llama.cpp/quantize", model_file, quantized_model_file, quantization], check=True)
 
     else:
         print("llama.cpp doesn't exist, check readme how to clone.")
diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index 8e42117..d05990c 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -15,6 +15,7 @@
 if user_input != "":
     root_path = user_input
 
+# noinspection PyNoneFunctionAssignment
 res = get_llamacpp_repo(root_path)
 
 if "Already up to date." in str(res.stdout):
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
index 661fb65..7e2b92f 100644
--- a/src/grag/quantize/utils.py
+++ b/src/grag/quantize/utils.py
@@ -72,6 +72,6 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No
     subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
     model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
     quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True)
+    subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True)
     print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
     os.chdir(Path(__file__).parent)  # Return to the root path after operation

From 66c06d0fb8e53daf69b967c7c7c102976d5bfd48 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sun, 24 Mar 2024 17:05:47 -0400
Subject: [PATCH 7/9] modifications and corrections after testing

---
 src/grag/quantize/quantize.py |  5 ++--
 src/grag/quantize/utils.py    | 43 +++++++++++++++++++++++------------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index d05990c..68168a2 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -1,6 +1,6 @@
 from grag.components.utils import get_config
 from grag.quantize.utils import (
-    building_llama,
+    building_llamacpp,
     fetch_model_repo,
     get_llamacpp_repo,
     quantize_model,
@@ -15,14 +15,13 @@
 if user_input != "":
     root_path = user_input
 
-# noinspection PyNoneFunctionAssignment
 res = get_llamacpp_repo(root_path)
 
 if "Already up to date." in str(res.stdout):
     print("Repository is already up to date. Skipping build.")
 else:
     print("Updates found. Starting build...")
-    building_llama(root_path)
+    building_llamacpp(root_path)
 
 response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower()
 if response == "n":
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
index 7e2b92f..8d9d5bc 100644
--- a/src/grag/quantize/utils.py
+++ b/src/grag/quantize/utils.py
@@ -1,28 +1,34 @@
 import os
 import subprocess
 from pathlib import Path
+from typing import Optional, Union
 
+from grag.components.utils import get_config
 from huggingface_hub import snapshot_download
 
+config = get_config()
 
-def get_llamacpp_repo(root_path: str) -> None:
+
+def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess:
     """Clones or pulls the llama.cpp repository into the specified root path.
 
     Args:
-        root_path (str): The root directory where the llama.cpp repository will be cloned or updated.
+        root_path: The root directory where the llama.cpp repository will be cloned or updated.
+
+    Returns:
+        A subprocess.CompletedProcess instance containing the result of the git operation.
     """
     if os.path.exists(f"{root_path}/llama.cpp"):
         print(f"Repo exists at: {root_path}/llama.cpp")
-        res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
+        res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True)
     else:
-        res = subprocess.run(
-            [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
-            check=True, shell=True, capture_output=True)
+        res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"],
+                             check=True, capture_output=True)
 
     return res
 
 
-def building_llama(root_path: str) -> None:
+def building_llamacpp(root_path: str) -> None:
     """Attempts to build the llama.cpp project using make or cmake.
 
     Args:
@@ -56,22 +62,31 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
     """
     local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
     os.makedirs(local_dir, exist_ok=True)
-    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True)
+    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True)
     print(f"Model downloaded in {local_dir}")
 
 
-def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None:
+def quantize_model(model_dir_path: str, quantization: str, root_path: str,
+                   output_dir: Optional[Union[str, Path]] = None) -> None:
     """Quantizes a specified model using a given quantization level.
 
     Args:
+        output_dir (str, optional): Directory to save quantized model. Defaults to None 
         model_dir_path (str): The directory path of the model to be quantized.
         quantization (str): The quantization level to apply.
         root_path (str): The root directory path of the project.
     """
     os.chdir(f"{root_path}/llama.cpp/")
-    subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
-    model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
-    quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-    subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True)
-    print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
+    model_dir_path = Path(model_dir_path)
+    if output_dir is None:
+        output_dir = config['llm']['base_dir']
+
+    output_dir = Path(output_dir) / model_dir_path.name
+    os.makedirs(output_dir, exist_ok=True)
+
+    subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True)
+    model_file = model_dir_path / "ggml-model-f32.gguf"
+    quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf"
+    subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True)
+    print(f"Quantized model present at {output_dir}")
     os.chdir(Path(__file__).parent)  # Return to the root path after operation

From b90a8823d39215226b123553802efff0e9dd26d5 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Sun, 24 Mar 2024 17:52:50 -0400
Subject: [PATCH 8/9] quantizations all tests passed

---
 src/grag/quantize/quantize.py       | 72 +++++++++++++----------
 src/grag/quantize/utils.py          | 89 +++++++++++++++++++++--------
 src/tests/quantize/quantize_test.py | 37 ++++++++++++
 3 files changed, 146 insertions(+), 52 deletions(-)

diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
index 68168a2..64fba47 100644
--- a/src/grag/quantize/quantize.py
+++ b/src/grag/quantize/quantize.py
@@ -1,3 +1,7 @@
+"""Interactive file for quantizing models."""
+
+from pathlib import Path
+
 from grag.components.utils import get_config
 from grag.quantize.utils import (
     building_llamacpp,
@@ -7,32 +11,42 @@
 )
 
 config = get_config()
-root_path = config['quantize']['llama_cpp_path']
-
-user_input = input(
-    "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip()
-
-if user_input != "":
-    root_path = user_input
-
-res = get_llamacpp_repo(root_path)
-
-if "Already up to date." in str(res.stdout):
-    print("Repository is already up to date. Skipping build.")
-else:
-    print("Updates found. Starting build...")
-    building_llamacpp(root_path)
-
-response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower()
-if response == "n":
-    print("Please copy the model folder to 'llama.cpp/models/' folder.")
-    _ = input("Enter if you have already copied the model:")
-    model_dir = input("Enter the model directory name: ")
-elif response == "y" or response == "":
-    repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ').strip()
-    fetch_model_repo(repo_id, root_path)
-    model_dir = repo_id.split('/')[1]
-
-quantization = input(
-    "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ")
-quantize_model(model_dir, quantization, root_path)
+root_path = Path(config["quantize"]["llama_cpp_path"])
+
+if __name__ == "__main__":
+    user_input = input(
+        "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: "
+    ).strip()
+
+    if user_input != "":
+        root_path = Path(user_input)
+
+    res = get_llamacpp_repo(root_path)
+
+    if "Already up to date." in str(res.stdout):
+        print("Repository is already up to date. Skipping build.")
+    else:
+        print("Updates found. Starting build...")
+        building_llamacpp(root_path)
+
+    response = (
+        input("Do you want us to download the model? (y/n) [Enter for yes]: ")
+        .strip()
+        .lower()
+    )
+    if response == "n":
+        print("Please copy the model folder to 'llama.cpp/models/' folder.")
+        _ = input("Enter if you have already copied the model:")
+        model_dir = Path(input("Enter the model directory name: "))
+    elif response == "y" or response == "":
+        repo_id = input(
+            "Please enter the repo_id for the model (you can check on https://huggingface.co/models): "
+        ).strip()
+        fetch_model_repo(repo_id, root_path)
+        # model_dir = repo_id.split('/')[1]
+        model_dir = root_path / "llama.cpp" / "models" / repo_id.split("/")[1]
+
+    quantization = input(
+        "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : "
+    )
+    quantize_model(model_dir, quantization, root_path)
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
index 8d9d5bc..bc1d280 100644
--- a/src/grag/quantize/utils.py
+++ b/src/grag/quantize/utils.py
@@ -1,3 +1,5 @@
+"""Utility functions for quantization."""
+
 import os
 import subprocess
 from pathlib import Path
@@ -9,7 +11,7 @@
 config = get_config()
 
 
-def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess:
+def get_llamacpp_repo(root_path: Union[str, Path]) -> subprocess.CompletedProcess:
     """Clones or pulls the llama.cpp repository into the specified root path.
 
     Args:
@@ -20,15 +22,27 @@ def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess:
     """
     if os.path.exists(f"{root_path}/llama.cpp"):
         print(f"Repo exists at: {root_path}/llama.cpp")
-        res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True)
+        res = subprocess.run(
+            ["git", "-C", f"{root_path}/llama.cpp", "pull"],
+            check=True,
+            capture_output=True,
+        )
     else:
-        res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"],
-                             check=True, capture_output=True)
+        res = subprocess.run(
+            [
+                "git",
+                "clone",
+                "https://github.com/ggerganov/llama.cpp.git",
+                f"{root_path}/llama.cpp",
+            ],
+            check=True,
+            capture_output=True,
+        )
 
     return res
 
 
-def building_llamacpp(root_path: str) -> None:
+def building_llamacpp(root_path: Union[str, Path]) -> None:
     """Attempts to build the llama.cpp project using make or cmake.
 
     Args:
@@ -36,24 +50,41 @@ def building_llamacpp(root_path: str) -> None:
     """
     os.chdir(f"{root_path}/llama.cpp/")
     try:
-        subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True)
-        print('Llama.cpp build successful.')
+        subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
+        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
+        print("Llama.cpp build successful.")
     except subprocess.CalledProcessError:
         try:
-            subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL)
-            subprocess.run(['mkdir', 'build'], check=True)
+            subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL)
+            subprocess.run(["mkdir", "build"], check=True)
             subprocess.run(
-                ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config',
-                 'Release'], shell=True, check=True)
-            print('Llama.cpp build successful.')
+                [
+                    "cd",
+                    "build",
+                    "&&",
+                    "cmake",
+                    "..",
+                    "-DLLAMA_CUBLAS=ON",
+                    "&&",
+                    "cmake",
+                    "--build",
+                    ".",
+                    "--config",
+                    "Release",
+                ],
+                shell=True,
+                check=True,
+            )
+            print("Llama.cpp build successful.")
         except subprocess.CalledProcessError:
             print("Unable to build, cannot find make or cmake.")
     finally:
-        os.chdir(Path(__file__).parent)  # Assuming you want to return to the root path after operation
+        os.chdir(
+            Path(__file__).parent
+        )  # Assuming you want to return to the root path after operation
 
 
-def fetch_model_repo(repo_id: str, root_path: str) -> None:
+def fetch_model_repo(repo_id: str, root_path: Union[str, Path]) -> None:
     """Download model from huggingface.co/models.
 
     Args:
@@ -62,24 +93,33 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
     """
     local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
     os.makedirs(local_dir, exist_ok=True)
-    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True)
+    snapshot_download(
+        repo_id=repo_id,
+        local_dir=local_dir,
+        local_dir_use_symlinks="auto",
+        resume_download=True,
+    )
     print(f"Model downloaded in {local_dir}")
 
 
-def quantize_model(model_dir_path: str, quantization: str, root_path: str,
-                   output_dir: Optional[Union[str, Path]] = None) -> None:
+def quantize_model(
+    model_dir_path: Union[str, Path],
+    quantization: str,
+    root_path: Union[str, Path],
+    output_dir: Optional[Union[str, Path]] = None,
+) -> None:
     """Quantizes a specified model using a given quantization level.
 
     Args:
-        output_dir (str, optional): Directory to save quantized model. Defaults to None 
-        model_dir_path (str): The directory path of the model to be quantized.
+        output_dir (str, Path, optional): Directory to save quantized model. Defaults to None
+        model_dir_path (str, Path): The directory path of the model to be quantized.
         quantization (str): The quantization level to apply.
-        root_path (str): The root directory path of the project.
+        root_path (str, Path): The root directory path of the project.
     """
     os.chdir(f"{root_path}/llama.cpp/")
     model_dir_path = Path(model_dir_path)
     if output_dir is None:
-        output_dir = config['llm']['base_dir']
+        output_dir = config["llm"]["base_dir"]
 
     output_dir = Path(output_dir) / model_dir_path.name
     os.makedirs(output_dir, exist_ok=True)
@@ -87,6 +127,9 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str,
     subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True)
     model_file = model_dir_path / "ggml-model-f32.gguf"
     quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf"
-    subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True)
+    subprocess.run(
+        ["./quantize", str(model_file), str(quantized_model_file), quantization],
+        check=True,
+    )
     print(f"Quantized model present at {output_dir}")
     os.chdir(Path(__file__).parent)  # Return to the root path after operation
diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py
index e69de29..f7b3c51 100644
--- a/src/tests/quantize/quantize_test.py
+++ b/src/tests/quantize/quantize_test.py
@@ -0,0 +1,37 @@
+import os
+from pathlib import Path
+
+from grag.quantize.utils import (
+    building_llamacpp,
+    fetch_model_repo,
+    get_llamacpp_repo,
+    quantize_model,
+)
+
+root_path = Path(__file__).parent / 'test_data'
+os.makedirs(root_path, exist_ok=True)
+
+
+def test_get_llamacpp_repo():
+    get_llamacpp_repo(root_path)
+    repo_path = root_path / 'llama.cpp' / '.git'
+    assert os.path.exists(repo_path)
+
+
+def test_build_llamacpp():
+    building_llamacpp(root_path)
+    bin_path = root_path / 'llama.cpp' / 'quantize'
+    assert os.path.exists(bin_path)
+
+
+def test_fetch_model_repo():
+    fetch_model_repo('meta-llama/Llama-2-7b-chat', root_path)
+    model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat'
+    assert os.path.exists(model_dir_path)
+
+
+def test_quantize_model():
+    model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat'
+    quantize_model(model_dir_path, 'Q3_K_M', root_path, output_dir=model_dir_path.parent)
+    gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf"
+    assert os.path.exists(gguf_file_path)

From 14ca30db4b806996307c0e1de482e482c06b2826 Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj@users.noreply.github.com>
Date: Sun, 24 Mar 2024 21:57:48 +0000
Subject: [PATCH 9/9] style fixes by ruff

---
 src/tests/quantize/quantize_test.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py
index f7b3c51..af0e9dd 100644
--- a/src/tests/quantize/quantize_test.py
+++ b/src/tests/quantize/quantize_test.py
@@ -8,30 +8,32 @@
     quantize_model,
 )
 
-root_path = Path(__file__).parent / 'test_data'
+root_path = Path(__file__).parent / "test_data"
 os.makedirs(root_path, exist_ok=True)
 
 
 def test_get_llamacpp_repo():
     get_llamacpp_repo(root_path)
-    repo_path = root_path / 'llama.cpp' / '.git'
+    repo_path = root_path / "llama.cpp" / ".git"
     assert os.path.exists(repo_path)
 
 
 def test_build_llamacpp():
     building_llamacpp(root_path)
-    bin_path = root_path / 'llama.cpp' / 'quantize'
+    bin_path = root_path / "llama.cpp" / "quantize"
     assert os.path.exists(bin_path)
 
 
 def test_fetch_model_repo():
-    fetch_model_repo('meta-llama/Llama-2-7b-chat', root_path)
-    model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat'
+    fetch_model_repo("meta-llama/Llama-2-7b-chat", root_path)
+    model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat"
     assert os.path.exists(model_dir_path)
 
 
 def test_quantize_model():
-    model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat'
-    quantize_model(model_dir_path, 'Q3_K_M', root_path, output_dir=model_dir_path.parent)
+    model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat"
+    quantize_model(
+        model_dir_path, "Q3_K_M", root_path, output_dir=model_dir_path.parent
+    )
     gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf"
     assert os.path.exists(gguf_file_path)