Skip to content

Commit

Permalink
rectified quantization, issue with llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchitvj committed Mar 24, 2024
1 parent 11697c0 commit 1bb1216
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/grag/quantize/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

res = get_llamacpp_repo(root_path)

if "Already up to date." in res.stdout:
if "Already up to date." in str(res.stdout):
print("Repository is already up to date. Skipping build.")
else:
print("Updates found. Starting build...")
Expand Down
17 changes: 9 additions & 8 deletions src/grag/quantize/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ def get_llamacpp_repo(root_path: str) -> None:
print(f"Repo exists at: {root_path}/llama.cpp")
res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True)
else:

subprocess.run(
res = subprocess.run(
[f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"],
check=True, shell=True)
check=True, shell=True, capture_output=True)

return res


def building_llama(root_path: str) -> None:
Expand Down Expand Up @@ -53,9 +54,9 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None:
repo_id (str): Repository ID of the model to download.
root_path (str): The root path where the model should be downloaded or copied.
"""
local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}"
os.mkdir(local_dir)
snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}"
os.makedirs(local_dir, exist_ok=True)
snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True)
print(f"Model downloaded in {local_dir}")


Expand All @@ -69,8 +70,8 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No
"""
os.chdir(f"{root_path}/llama.cpp/")
subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True)
model_file = f"models/{model_dir_path}/ggml-model-f16.gguf"
model_file = f"models/{model_dir_path}/ggml-model-f32.gguf"
quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf"
subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True)
subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True)
print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}")
os.chdir(Path(__file__).parent) # Return to the root path after operation
Empty file added src/tests/quantize/__init__.py
Empty file.
Empty file.

0 comments on commit 1bb1216

Please sign in to comment.