Skip to content

Commit

Permalink
modifications and corrections after testing
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchitvj committed Mar 24, 2024
1 parent a7354ee commit 66c06d0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 17 deletions.
5 changes: 2 additions & 3 deletions src/grag/quantize/quantize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from grag.components.utils import get_config
from grag.quantize.utils import (
building_llama,
building_llamacpp,
fetch_model_repo,
get_llamacpp_repo,
quantize_model,
Expand All @@ -15,14 +15,13 @@
if user_input != "":
root_path = user_input

# noinspection PyNoneFunctionAssignment
res = get_llamacpp_repo(root_path)

if "Already up to date." in str(res.stdout):
print("Repository is already up to date. Skipping build.")
else:
print("Updates found. Starting build...")
building_llama(root_path)
building_llamacpp(root_path)

response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower()
if response == "n":
Expand Down
43 changes: 29 additions & 14 deletions src/grag/quantize/utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
import os
import subprocess
from pathlib import Path
from typing import Optional, Union

from grag.components.utils import get_config
from huggingface_hub import snapshot_download

config = get_config()

def get_llamacpp_repo(root_path: str) -> None:

def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess:
"""Clones or pulls the llama.cpp repository into the specified root path.
Args:
root_path (str): The root directory where the llama.cpp repository will be cloned or updated.
root_path: The root directory where the llama.cpp repository will be cloned or updated.
Returns:
A subprocess.CompletedProcess instance containing the result of the git operation.
"""
if os.path.exists(f"{root_path}/llama.cpp"):
print(f"Repo exists at: {root_path}/llama.cpp")
res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True)
else:
res = subprocess.run(
[f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
check=True, shell=True, capture_output=True)
res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"],
check=True, capture_output=True)

return res


def building_llama(root_path: str) -> None:
def building_llamacpp(root_path: str) -> None:
"""Attempts to build the llama.cpp project using make or cmake.
Args:
Expand Down Expand Up @@ -56,22 +62,31 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
"""
local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
os.makedirs(local_dir, exist_ok=True)
snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True)
snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True)
print(f"Model downloaded in {local_dir}")


def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None:
def quantize_model(model_dir_path: str, quantization: str, root_path: str,
output_dir: Optional[Union[str, Path]] = None) -> None:
"""Quantizes a specified model using a given quantization level.
Args:
output_dir (str, optional): Directory to save quantized model. Defaults to None
model_dir_path (str): The directory path of the model to be quantized.
quantization (str): The quantization level to apply.
root_path (str): The root directory path of the project.
"""
os.chdir(f"{root_path}/llama.cpp/")
subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True)
print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
model_dir_path = Path(model_dir_path)
if output_dir is None:
output_dir = config['llm']['base_dir']

output_dir = Path(output_dir) / model_dir_path.name
os.makedirs(output_dir, exist_ok=True)

subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True)
model_file = model_dir_path / "ggml-model-f32.gguf"
quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf"
subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True)
print(f"Quantized model present at {output_dir}")
os.chdir(Path(__file__).parent) # Return to the root path after operation

0 comments on commit 66c06d0

Please sign in to comment.