arjbingly · sanchitvj · Mar 24, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 23, 2024
diff --git a/llm_quantize/quantize.py b/llm_quantize/quantize.py
@@ -1,6 +1,6 @@
+import os
 import subprocess
 import sys
-import os
 
 
 def execute_commands(model_dir_path, quantization=None):
@@ -13,7 +13,7 @@ def execute_commands(model_dir_path, quantization=None):
         if quantization:
             model_file = f"llama.cpp/models/{model_dir_path}/ggml-model-f16.gguf"
             quantized_model_file = f"llama.cpp/models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
-            subprocess.run(["llama.cpp/llm_quantize", model_file, quantized_model_file, quantization], check=True)
+            subprocess.run(["llama.cpp/quantize", model_file, quantized_model_file, quantization], check=True)
 
     else:
         print("llama.cpp doesn't exist, check readme how to clone.")

diff --git a/src/config.ini b/src/config.ini
@@ -65,4 +65,7 @@ table_as_html : True
 data_path : ${root:root_path}/data
 
 [root]
-root_path : /home/ubuntu/CapStone/Capstone_5
+root_path : /home/ubuntu/volume_2k/Capstone_5
+
+[quantize]
+llama_cpp_path : ${root:root_path}
diff --git a/src/grag/quantize/__init__.py b/src/grag/quantize/__init__.py
diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py
@@ -0,0 +1,52 @@
+"""Interactive file for quantizing models."""
+
+from pathlib import Path
+
+from grag.components.utils import get_config
+from grag.quantize.utils import (
+    building_llamacpp,
+    fetch_model_repo,
+    get_llamacpp_repo,
+    quantize_model,
+)
+
+config = get_config()
+root_path = Path(config["quantize"]["llama_cpp_path"])
+
+if __name__ == "__main__":
+    user_input = input(
+        "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: "
+    ).strip()
+
+    if user_input != "":
+        root_path = Path(user_input)
+
+    res = get_llamacpp_repo(root_path)
+
+    if "Already up to date." in str(res.stdout):
+        print("Repository is already up to date. Skipping build.")
+    else:
+        print("Updates found. Starting build...")
+        building_llamacpp(root_path)
+
+    response = (
+        input("Do you want us to download the model? (y/n) [Enter for yes]: ")
+        .strip()
+        .lower()
+    )
+    if response == "n":
+        print("Please copy the model folder to 'llama.cpp/models/' folder.")
+        _ = input("Enter if you have already copied the model:")
+        model_dir = Path(input("Enter the model directory name: "))
+    elif response == "y" or response == "":
+        repo_id = input(
+            "Please enter the repo_id for the model (you can check on https://huggingface.co/models): "
+        ).strip()
+        fetch_model_repo(repo_id, root_path)
+        # model_dir = repo_id.split('/')[1]
+        model_dir = root_path / "llama.cpp" / "models" / repo_id.split("/")[1]
+
+    quantization = input(
+        "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : "
+    )
+    quantize_model(model_dir, quantization, root_path)
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
@@ -0,0 +1,135 @@
+"""Utility functions for quantization."""
+
+import os
+import subprocess
+from pathlib import Path
+from typing import Optional, Union
+
+from grag.components.utils import get_config
+from huggingface_hub import snapshot_download
+
+config = get_config()
+
+
+def get_llamacpp_repo(root_path: Union[str, Path]) -> subprocess.CompletedProcess:
+    """Clones or pulls the llama.cpp repository into the specified root path.
+
+    Args:
+        root_path: The root directory where the llama.cpp repository will be cloned or updated.
+
+    Returns:
+        A subprocess.CompletedProcess instance containing the result of the git operation.
+    """
+    if os.path.exists(f"{root_path}/llama.cpp"):
+        print(f"Repo exists at: {root_path}/llama.cpp")
+        res = subprocess.run(
+            ["git", "-C", f"{root_path}/llama.cpp", "pull"],
+            check=True,
+            capture_output=True,
+        )
+    else:
+        res = subprocess.run(
+            [
+                "git",
+                "clone",
+                "https://github.com/ggerganov/llama.cpp.git",
+                f"{root_path}/llama.cpp",
+            ],
+            check=True,
+            capture_output=True,
+        )
+
+    return res
+
+
+def building_llamacpp(root_path: Union[str, Path]) -> None:
+    """Attempts to build the llama.cpp project using make or cmake.
+
+    Args:
+        root_path (str): The root directory where the llama.cpp project is located.
+    """
+    os.chdir(f"{root_path}/llama.cpp/")
+    try:
+        subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
+        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
+        print("Llama.cpp build successful.")
+    except subprocess.CalledProcessError:
+        try:
+            subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL)
+            subprocess.run(["mkdir", "build"], check=True)
+            subprocess.run(
+                [
+                    "cd",
+                    "build",
+                    "&&",
+                    "cmake",
+                    "..",
+                    "-DLLAMA_CUBLAS=ON",
+                    "&&",
+                    "cmake",
+                    "--build",
+                    ".",
+                    "--config",
+                    "Release",
+                ],
+                shell=True,
+                check=True,
+            )
+            print("Llama.cpp build successful.")
+        except subprocess.CalledProcessError:
+            print("Unable to build, cannot find make or cmake.")
+    finally:
+        os.chdir(
+            Path(__file__).parent
+        )  # Assuming you want to return to the root path after operation
+
+
+def fetch_model_repo(repo_id: str, root_path: Union[str, Path]) -> None:
+    """Download model from huggingface.co/models.
+
+    Args:
+        repo_id (str): Repository ID of the model to download.
+        root_path (str): The root path where the model should be downloaded or copied.
+    """
+    local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
+    os.makedirs(local_dir, exist_ok=True)
+    snapshot_download(
+        repo_id=repo_id,
+        local_dir=local_dir,
+        local_dir_use_symlinks="auto",
+        resume_download=True,
+    )
+    print(f"Model downloaded in {local_dir}")
+
+
+def quantize_model(
+    model_dir_path: Union[str, Path],
+    quantization: str,
+    root_path: Union[str, Path],
+    output_dir: Optional[Union[str, Path]] = None,
+) -> None:
+    """Quantizes a specified model using a given quantization level.
+
+    Args:
+        output_dir (str, Path, optional): Directory to save quantized model. Defaults to None
+        model_dir_path (str, Path): The directory path of the model to be quantized.
+        quantization (str): The quantization level to apply.
+        root_path (str, Path): The root directory path of the project.
+    """
+    os.chdir(f"{root_path}/llama.cpp/")
+    model_dir_path = Path(model_dir_path)
+    if output_dir is None:
+        output_dir = config["llm"]["base_dir"]
+
+    output_dir = Path(output_dir) / model_dir_path.name
+    os.makedirs(output_dir, exist_ok=True)
+
+    subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True)
+    model_file = model_dir_path / "ggml-model-f32.gguf"
+    quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf"
+    subprocess.run(
+        ["./quantize", str(model_file), str(quantized_model_file), quantization],
+        check=True,
+    )
+    print(f"Quantized model present at {output_dir}")
+    os.chdir(Path(__file__).parent)  # Return to the root path after operation
diff --git a/src/tests/quantize/__init__.py b/src/tests/quantize/__init__.py
diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py
@@ -0,0 +1,39 @@
+import os
+from pathlib import Path
+
+from grag.quantize.utils import (
+    building_llamacpp,
+    fetch_model_repo,
+    get_llamacpp_repo,
+    quantize_model,
+)
+
+root_path = Path(__file__).parent / "test_data"
+os.makedirs(root_path, exist_ok=True)
+
+
+def test_get_llamacpp_repo():
+    get_llamacpp_repo(root_path)
+    repo_path = root_path / "llama.cpp" / ".git"
+    assert os.path.exists(repo_path)
+
+
+def test_build_llamacpp():
+    building_llamacpp(root_path)
+    bin_path = root_path / "llama.cpp" / "quantize"
+    assert os.path.exists(bin_path)
+
+
+def test_fetch_model_repo():
+    fetch_model_repo("meta-llama/Llama-2-7b-chat", root_path)
+    model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat"
+    assert os.path.exists(model_dir_path)
+
+
+def test_quantize_model():
+    model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat"
+    quantize_model(
+        model_dir_path, "Q3_K_M", root_path, output_dir=model_dir_path.parent
+    )
+    gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf"
+    assert os.path.exists(gguf_file_path)