Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quantization module #48

Merged
merged 11 commits into from
Mar 24, 2024
4 changes: 2 additions & 2 deletions llm_quantize/quantize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import subprocess
import sys
import os


def execute_commands(model_dir_path, quantization=None):
Expand All @@ -13,7 +13,7 @@ def execute_commands(model_dir_path, quantization=None):
if quantization:
model_file = f"llama.cpp/models/{model_dir_path}/ggml-model-f16.gguf"
quantized_model_file = f"llama.cpp/models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
subprocess.run(["llama.cpp/llm_quantize", model_file, quantized_model_file, quantization], check=True)
subprocess.run(["llama.cpp/quantize", model_file, quantized_model_file, quantization], check=True)

else:
print("llama.cpp doesn't exist, check readme how to clone.")
Expand Down
5 changes: 4 additions & 1 deletion src/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,7 @@ table_as_html : True
data_path : ${root:root_path}/data

[root]
root_path : /home/ubuntu/CapStone/Capstone_5
root_path : /home/ubuntu/volume_2k/Capstone_5

[quantize]
llama_cpp_path : ${root:root_path}
Empty file added src/grag/quantize/__init__.py
Empty file.
52 changes: 52 additions & 0 deletions src/grag/quantize/quantize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Interactive file for quantizing models."""

from pathlib import Path

from grag.components.utils import get_config
from grag.quantize.utils import (
building_llamacpp,
fetch_model_repo,
get_llamacpp_repo,
quantize_model,
)

config = get_config()
root_path = Path(config["quantize"]["llama_cpp_path"])

if __name__ == "__main__":
user_input = input(
"Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: "
).strip()

if user_input != "":
root_path = Path(user_input)

res = get_llamacpp_repo(root_path)

if "Already up to date." in str(res.stdout):
print("Repository is already up to date. Skipping build.")
else:
print("Updates found. Starting build...")
building_llamacpp(root_path)

response = (
input("Do you want us to download the model? (y/n) [Enter for yes]: ")
.strip()
.lower()
)
if response == "n":
print("Please copy the model folder to 'llama.cpp/models/' folder.")
_ = input("Enter if you have already copied the model:")
model_dir = Path(input("Enter the model directory name: "))
elif response == "y" or response == "":
repo_id = input(
"Please enter the repo_id for the model (you can check on https://huggingface.co/models): "
).strip()
fetch_model_repo(repo_id, root_path)
# model_dir = repo_id.split('/')[1]
model_dir = root_path / "llama.cpp" / "models" / repo_id.split("/")[1]

quantization = input(
"Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : "
)
quantize_model(model_dir, quantization, root_path)
135 changes: 135 additions & 0 deletions src/grag/quantize/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Utility functions for quantization."""

import os
import subprocess
from pathlib import Path
from typing import Optional, Union

from grag.components.utils import get_config
from huggingface_hub import snapshot_download

config = get_config()


def get_llamacpp_repo(root_path: Union[str, Path]) -> subprocess.CompletedProcess:
"""Clones or pulls the llama.cpp repository into the specified root path.

Args:
root_path: The root directory where the llama.cpp repository will be cloned or updated.

Returns:
A subprocess.CompletedProcess instance containing the result of the git operation.
"""
if os.path.exists(f"{root_path}/llama.cpp"):
print(f"Repo exists at: {root_path}/llama.cpp")
res = subprocess.run(
["git", "-C", f"{root_path}/llama.cpp", "pull"],
check=True,
capture_output=True,
)
else:
res = subprocess.run(
[
"git",
"clone",
"https://github.com/ggerganov/llama.cpp.git",
f"{root_path}/llama.cpp",
],
check=True,
capture_output=True,
)

return res


def building_llamacpp(root_path: Union[str, Path]) -> None:
"""Attempts to build the llama.cpp project using make or cmake.

Args:
root_path (str): The root directory where the llama.cpp project is located.
"""
os.chdir(f"{root_path}/llama.cpp/")
try:
subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
print("Llama.cpp build successful.")
except subprocess.CalledProcessError:
try:
subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL)
subprocess.run(["mkdir", "build"], check=True)
subprocess.run(
[
"cd",
"build",
"&&",
"cmake",
"..",
"-DLLAMA_CUBLAS=ON",
"&&",
"cmake",
"--build",
".",
"--config",
"Release",
],
shell=True,
check=True,
)
print("Llama.cpp build successful.")
except subprocess.CalledProcessError:
print("Unable to build, cannot find make or cmake.")
finally:
os.chdir(
Path(__file__).parent
) # Assuming you want to return to the root path after operation


def fetch_model_repo(repo_id: str, root_path: Union[str, Path]) -> None:
"""Download model from huggingface.co/models.

Args:
repo_id (str): Repository ID of the model to download.
root_path (str): The root path where the model should be downloaded or copied.
"""
local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
os.makedirs(local_dir, exist_ok=True)
snapshot_download(
repo_id=repo_id,
local_dir=local_dir,
local_dir_use_symlinks="auto",
resume_download=True,
)
print(f"Model downloaded in {local_dir}")


def quantize_model(
model_dir_path: Union[str, Path],
quantization: str,
root_path: Union[str, Path],
output_dir: Optional[Union[str, Path]] = None,
) -> None:
"""Quantizes a specified model using a given quantization level.

Args:
output_dir (str, Path, optional): Directory to save quantized model. Defaults to None
model_dir_path (str, Path): The directory path of the model to be quantized.
quantization (str): The quantization level to apply.
root_path (str, Path): The root directory path of the project.
"""
os.chdir(f"{root_path}/llama.cpp/")
model_dir_path = Path(model_dir_path)
if output_dir is None:
output_dir = config["llm"]["base_dir"]

output_dir = Path(output_dir) / model_dir_path.name
os.makedirs(output_dir, exist_ok=True)

subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True)
model_file = model_dir_path / "ggml-model-f32.gguf"
quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf"
subprocess.run(
["./quantize", str(model_file), str(quantized_model_file), quantization],
check=True,
)
print(f"Quantized model present at {output_dir}")
os.chdir(Path(__file__).parent) # Return to the root path after operation
Empty file added src/tests/quantize/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions src/tests/quantize/quantize_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
from pathlib import Path

from grag.quantize.utils import (
building_llamacpp,
fetch_model_repo,
get_llamacpp_repo,
quantize_model,
)

root_path = Path(__file__).parent / "test_data"
os.makedirs(root_path, exist_ok=True)


def test_get_llamacpp_repo():
get_llamacpp_repo(root_path)
repo_path = root_path / "llama.cpp" / ".git"
assert os.path.exists(repo_path)


def test_build_llamacpp():
building_llamacpp(root_path)
bin_path = root_path / "llama.cpp" / "quantize"
assert os.path.exists(bin_path)


def test_fetch_model_repo():
fetch_model_repo("meta-llama/Llama-2-7b-chat", root_path)
model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat"
assert os.path.exists(model_dir_path)


def test_quantize_model():
model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat"
quantize_model(
model_dir_path, "Q3_K_M", root_path, output_dir=model_dir_path.parent
)
gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf"
assert os.path.exists(gguf_file_path)
Loading