diff --git a/llm_quantize/quantize.py b/llm_quantize/quantize.py index 7fb1c24..708b6c8 100644 --- a/llm_quantize/quantize.py +++ b/llm_quantize/quantize.py @@ -1,6 +1,6 @@ +import os import subprocess import sys -import os def execute_commands(model_dir_path, quantization=None): @@ -13,7 +13,7 @@ def execute_commands(model_dir_path, quantization=None): if quantization: model_file = f"llama.cpp/models/{model_dir_path}/ggml-model-f16.gguf" quantized_model_file = f"llama.cpp/models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llama.cpp/llm_quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["llama.cpp/quantize", model_file, quantized_model_file, quantization], check=True) else: print("llama.cpp doesn't exist, check readme how to clone.") diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 8e42117..d05990c 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -15,6 +15,7 @@ if user_input != "": root_path = user_input +# noinspection PyNoneFunctionAssignment res = get_llamacpp_repo(root_path) if "Already up to date." in str(res.stdout): diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 661fb65..7e2b92f 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -72,6 +72,6 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(Path(__file__).parent) # Return to the root path after operation