From 428c634e73c7134b4507f35d8df94fef6477902e Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Fri, 22 Mar 2024 18:02:54 -0400 Subject: [PATCH 1/9] quantization --- src/config.ini | 5 ++- src/grag/quantize/quantize.py | 76 +++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 src/grag/quantize/quantize.py diff --git a/src/config.ini b/src/config.ini index 452ac04..74ab6c4 100644 --- a/src/config.ini +++ b/src/config.ini @@ -51,4 +51,7 @@ table_as_html : True data_path : ${root:root_path}/data [root] -root_path : /home/ubuntu/volume_2k/Capstone_5 \ No newline at end of file +root_path : /home/ubuntu/volume_2k/Capstone_5 + +[quantize] +llama_cpp_path : ${root:root_path} \ No newline at end of file diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py new file mode 100644 index 0000000..2728e13 --- /dev/null +++ b/src/grag/quantize/quantize.py @@ -0,0 +1,76 @@ +import os +import subprocess + +from grag.components.utils import get_config +from huggingface_hub import snapshot_download + +original_dir = os.getcwd() +config = get_config() +root_path = config['quantize']['llama_cpp_path'] + + +def get_llamacpp_repo(): + if os.path.exists(f"{root_path}/llama.cpp"): + subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True) + else: + subprocess.run( + [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], + check=True, shell=True) + + +def building_llama(): + os.chdir(f"{root_path}/llama.cpp/") + try: + subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) + print('Llama.cpp build successfull.') + except subprocess.CalledProcessError: + try: + subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['mkdir', 'build'], check=True) + subprocess.run( + ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', + 'Release'], shell=True, check=True) + print('Llama.cpp build successfull.') + except subprocess.CalledProcessError: + print("Unable to build, cannot find make or cmake.") + os.chdir(original_dir) + + +def fetch_model_repo(): + response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower() + if response == "no": + print("Please copy the model folder to 'llama.cpp/models/' folder.") + elif response == "yes" or response == "": + repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ') + local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" + os.mkdir(local_dir) + snapshot_download(repo_id=repo_id, local_dir=local_dir, + local_dir_use_symlinks=False) + print(f"Model downloaded in {local_dir}") + + +def quantize_model(quantization): + os.chdir(f"{root_path}/llama.cpp/") + subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) + + model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" + quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" + subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + os.chdir(original_dir) + + +if __name__ == "__main__": + get_llamacpp_repo() + building_llama() + fetch_model_repo() + + quantization = input("Enter quantization: ") + quantize_model(quantization) + # if len(sys.argv) < 2 or len(sys.argv) > 3: + # print("Usage: python script.py []") + # sys.exit(1) + # model_dir_path = sys.argv[1] + # quantization = sys.argv[2] if len(sys.argv) == 3 else None + # execute_commands(model_dir_path, quantization) From aec73771f90cdeeaa9d4fe128fc04fb5befb786e Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Fri, 22 Mar 2024 22:23:39 +0000 Subject: [PATCH 2/9] style fixes by ruff --- src/grag/quantize/quantize.py | 64 +++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 2728e13..773e987 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -6,47 +6,73 @@ original_dir = os.getcwd() config = get_config() -root_path = config['quantize']['llama_cpp_path'] +root_path = config["quantize"]["llama_cpp_path"] def get_llamacpp_repo(): if os.path.exists(f"{root_path}/llama.cpp"): - subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True) + subprocess.run( + [f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True + ) else: subprocess.run( [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True) + check=True, + shell=True, + ) def building_llama(): os.chdir(f"{root_path}/llama.cpp/") try: - subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) - print('Llama.cpp build successfull.') + subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) + print("Llama.cpp build successfull.") except subprocess.CalledProcessError: try: - subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['mkdir', 'build'], check=True) + subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["mkdir", "build"], check=True) subprocess.run( - ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', - 'Release'], shell=True, check=True) - print('Llama.cpp build successfull.') + [ + "cd", + "build", + "&&", + "cmake", + "..", + "-DLLAMA_CUBLAS=ON", + "&&", + "cmake", + "--build", + ".", + "--config", + "Release", + ], + shell=True, + check=True, + ) + print("Llama.cpp build successfull.") except subprocess.CalledProcessError: print("Unable to build, cannot find make or cmake.") os.chdir(original_dir) def fetch_model_repo(): - response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower() + response = ( + input("Do you want us to download the model? (yes/no) [Enter for yes]: ") + .strip() + .lower() + ) if response == "no": print("Please copy the model folder to 'llama.cpp/models/' folder.") elif response == "yes" or response == "": - repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ') + repo_id = input( + "Please enter the repo_id for the model (you can check on https://huggingface.co/models): " + ) local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" os.mkdir(local_dir) - snapshot_download(repo_id=repo_id, local_dir=local_dir, - local_dir_use_symlinks=False) + snapshot_download( + repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False + ) print(f"Model downloaded in {local_dir}") @@ -55,8 +81,12 @@ def quantize_model(quantization): subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + quantized_model_file = ( + f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" + ) + subprocess.run( + ["llm_quantize", model_file, quantized_model_file, quantization], check=True + ) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(original_dir) From 8e78f75a4dec4a9fcebec9fb89052b05c97f62c5 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sat, 23 Mar 2024 18:49:45 -0400 Subject: [PATCH 3/9] quantize file --- src/grag/quantize/__init__.py | 0 src/grag/quantize/quantize.py | 104 +++++++++++----------------------- src/grag/quantize/utils.py | 76 +++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 71 deletions(-) create mode 100644 src/grag/quantize/__init__.py create mode 100644 src/grag/quantize/utils.py diff --git a/src/grag/quantize/__init__.py b/src/grag/quantize/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 2728e13..02065a9 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,76 +1,38 @@ -import os -import subprocess - from grag.components.utils import get_config -from huggingface_hub import snapshot_download +from grag.quantize.utils import ( + building_llama, + fetch_model_repo, + get_llamacpp_repo, + quantize_model, +) -original_dir = os.getcwd() config = get_config() root_path = config['quantize']['llama_cpp_path'] - -def get_llamacpp_repo(): - if os.path.exists(f"{root_path}/llama.cpp"): - subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True) - else: - subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True) - - -def building_llama(): - os.chdir(f"{root_path}/llama.cpp/") - try: - subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) - print('Llama.cpp build successfull.') - except subprocess.CalledProcessError: - try: - subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['mkdir', 'build'], check=True) - subprocess.run( - ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', - 'Release'], shell=True, check=True) - print('Llama.cpp build successfull.') - except subprocess.CalledProcessError: - print("Unable to build, cannot find make or cmake.") - os.chdir(original_dir) - - -def fetch_model_repo(): - response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower() - if response == "no": - print("Please copy the model folder to 'llama.cpp/models/' folder.") - elif response == "yes" or response == "": - repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ') - local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" - os.mkdir(local_dir) - snapshot_download(repo_id=repo_id, local_dir=local_dir, - local_dir_use_symlinks=False) - print(f"Model downloaded in {local_dir}") - - -def quantize_model(quantization): - os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - - model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") - os.chdir(original_dir) - - -if __name__ == "__main__": - get_llamacpp_repo() - building_llama() - fetch_model_repo() - - quantization = input("Enter quantization: ") - quantize_model(quantization) - # if len(sys.argv) < 2 or len(sys.argv) > 3: - # print("Usage: python script.py []") - # sys.exit(1) - # model_dir_path = sys.argv[1] - # quantization = sys.argv[2] if len(sys.argv) == 3 else None - # execute_commands(model_dir_path, quantization) +user_input = input( + "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip() + +if user_input != "": + root_path = user_input + +res = get_llamacpp_repo(root_path) + +if "Already up to date." in res.stdout: + print("Repository is already up to date. Skipping build.") +else: + print("Updates found. Starting build...") + building_llama(root_path) + +response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() +if response == "n": + print("Please copy the model folder to 'llama.cpp/models/' folder.") + _ = input("Enter if you have already copied the model:") + model_dir = input("Enter the model directory name: ") +elif response == "y" or response == "": + repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ').strip() + fetch_model_repo(repo_id, root_path) + model_dir = repo_id.split('/')[1] + +quantization = input( + "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ") +quantize_model(model_dir, quantization, root_path) diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py new file mode 100644 index 0000000..3df3c1a --- /dev/null +++ b/src/grag/quantize/utils.py @@ -0,0 +1,76 @@ +import os +import subprocess +from pathlib import Path + +from huggingface_hub import snapshot_download + + +def get_llamacpp_repo(root_path: str) -> None: + """Clones or pulls the llama.cpp repository into the specified root path. + + Args: + root_path (str): The root directory where the llama.cpp repository will be cloned or updated. + """ + if os.path.exists(f"{root_path}/llama.cpp"): + print(f"Repo exists at: {root_path}/llama.cpp") + res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) + else: + + subprocess.run( + [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], + check=True, shell=True) + + +def building_llama(root_path: str) -> None: + """Attempts to build the llama.cpp project using make or cmake. + + Args: + root_path (str): The root directory where the llama.cpp project is located. + """ + os.chdir(f"{root_path}/llama.cpp/") + try: + subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) + print('Llama.cpp build successful.') + except subprocess.CalledProcessError: + try: + subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['mkdir', 'build'], check=True) + subprocess.run( + ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', + 'Release'], shell=True, check=True) + print('Llama.cpp build successful.') + except subprocess.CalledProcessError: + print("Unable to build, cannot find make or cmake.") + finally: + os.chdir(Path(__file__).parent) # Assuming you want to return to the root path after operation + + +def fetch_model_repo(repo_id: str, root_path: str) -> None: + """Download model from huggingface.co/models. + + Args: + repo_id (str): Repository ID of the model to download. + root_path (str): The root path where the model should be downloaded or copied. + """ + local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" + os.mkdir(local_dir) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False) + print(f"Model downloaded in {local_dir}") + + +def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None: + """Quantizes a specified model using a given quantization level. + + Args: + model_dir_path (str): The directory path of the model to be quantized. + quantization (str): The quantization level to apply. + root_path (str): The root directory path of the project. + """ + os.chdir(f"{root_path}/llama.cpp/") + subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) + model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" + quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" + subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + os.chdir(Path(__file__).parent) # Return to the root path after operation From 11697c006bac3865203cc242c6841a024ec1f52a Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sat, 23 Mar 2024 18:55:11 -0400 Subject: [PATCH 4/9] Revert "Merge branch 'quantize' of https://github.com/arjbingly/Capstone_5 into quantize" This reverts commit 79ebf3ae4bbc634f075d51791b1442569b1cd03a, reversing changes made to 8e78f75a4dec4a9fcebec9fb89052b05c97f62c5. --- src/grag/quantize/quantize.py | 102 +--------------------------------- 1 file changed, 1 insertion(+), 101 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index c013b07..02065a9 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -7,70 +7,21 @@ ) config = get_config() -root_path = config["quantize"]["llama_cpp_path"] +root_path = config['quantize']['llama_cpp_path'] user_input = input( "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip() -<<<<<<< HEAD if user_input != "": root_path = user_input -======= -def get_llamacpp_repo(): - if os.path.exists(f"{root_path}/llama.cpp"): - subprocess.run( - [f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True - ) - else: - subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, - shell=True, - ) ->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e res = get_llamacpp_repo(root_path) -<<<<<<< HEAD if "Already up to date." in res.stdout: print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") building_llama(root_path) -======= -def building_llama(): - os.chdir(f"{root_path}/llama.cpp/") - try: - subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) - subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) - print("Llama.cpp build successfull.") - except subprocess.CalledProcessError: - try: - subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL) - subprocess.run(["mkdir", "build"], check=True) - subprocess.run( - [ - "cd", - "build", - "&&", - "cmake", - "..", - "-DLLAMA_CUBLAS=ON", - "&&", - "cmake", - "--build", - ".", - "--config", - "Release", - ], - shell=True, - check=True, - ) - print("Llama.cpp build successfull.") - except subprocess.CalledProcessError: - print("Unable to build, cannot find make or cmake.") - os.chdir(original_dir) ->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() if response == "n": @@ -82,57 +33,6 @@ def building_llama(): fetch_model_repo(repo_id, root_path) model_dir = repo_id.split('/')[1] -<<<<<<< HEAD quantization = input( "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ") quantize_model(model_dir, quantization, root_path) -======= -def fetch_model_repo(): - response = ( - input("Do you want us to download the model? (yes/no) [Enter for yes]: ") - .strip() - .lower() - ) - if response == "no": - print("Please copy the model folder to 'llama.cpp/models/' folder.") - elif response == "yes" or response == "": - repo_id = input( - "Please enter the repo_id for the model (you can check on https://huggingface.co/models): " - ) - local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" - os.mkdir(local_dir) - snapshot_download( - repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False - ) - print(f"Model downloaded in {local_dir}") - - -def quantize_model(quantization): - os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - - model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" - quantized_model_file = ( - f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - ) - subprocess.run( - ["llm_quantize", model_file, quantized_model_file, quantization], check=True - ) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") - os.chdir(original_dir) - - -if __name__ == "__main__": - get_llamacpp_repo() - building_llama() - fetch_model_repo() - - quantization = input("Enter quantization: ") - quantize_model(quantization) - # if len(sys.argv) < 2 or len(sys.argv) > 3: - # print("Usage: python script.py []") - # sys.exit(1) - # model_dir_path = sys.argv[1] - # quantization = sys.argv[2] if len(sys.argv) == 3 else None - # execute_commands(model_dir_path, quantization) ->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e From 1bb12163f584150286714344082d60280113f9d1 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sat, 23 Mar 2024 20:12:26 -0400 Subject: [PATCH 5/9] rectified quantization, issue with llama.cpp --- src/grag/quantize/quantize.py | 2 +- src/grag/quantize/utils.py | 17 +++++++++-------- src/tests/quantize/__init__.py | 0 src/tests/quantize/quantize_test.py | 0 4 files changed, 10 insertions(+), 9 deletions(-) create mode 100644 src/tests/quantize/__init__.py create mode 100644 src/tests/quantize/quantize_test.py diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 02065a9..8e42117 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -17,7 +17,7 @@ res = get_llamacpp_repo(root_path) -if "Already up to date." in res.stdout: +if "Already up to date." in str(res.stdout): print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 3df3c1a..661fb65 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -15,10 +15,11 @@ def get_llamacpp_repo(root_path: str) -> None: print(f"Repo exists at: {root_path}/llama.cpp") res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) else: - - subprocess.run( + res = subprocess.run( [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True) + check=True, shell=True, capture_output=True) + + return res def building_llama(root_path: str) -> None: @@ -53,9 +54,9 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: repo_id (str): Repository ID of the model to download. root_path (str): The root path where the model should be downloaded or copied. """ - local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" - os.mkdir(local_dir) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False) + local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" + os.makedirs(local_dir, exist_ok=True) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True) print(f"Model downloaded in {local_dir}") @@ -69,8 +70,8 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No """ os.chdir(f"{root_path}/llama.cpp/") subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" + model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(Path(__file__).parent) # Return to the root path after operation diff --git a/src/tests/quantize/__init__.py b/src/tests/quantize/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py new file mode 100644 index 0000000..e69de29 From a7354ee7be3dadeff6a596e88a4b16e36cccbb69 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 15:00:14 -0400 Subject: [PATCH 6/9] issue in llama.cpp --- llm_quantize/quantize.py | 4 ++-- src/grag/quantize/quantize.py | 1 + src/grag/quantize/utils.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llm_quantize/quantize.py b/llm_quantize/quantize.py index 7fb1c24..708b6c8 100644 --- a/llm_quantize/quantize.py +++ b/llm_quantize/quantize.py @@ -1,6 +1,6 @@ +import os import subprocess import sys -import os def execute_commands(model_dir_path, quantization=None): @@ -13,7 +13,7 @@ def execute_commands(model_dir_path, quantization=None): if quantization: model_file = f"llama.cpp/models/{model_dir_path}/ggml-model-f16.gguf" quantized_model_file = f"llama.cpp/models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llama.cpp/llm_quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["llama.cpp/quantize", model_file, quantized_model_file, quantization], check=True) else: print("llama.cpp doesn't exist, check readme how to clone.") diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 8e42117..d05990c 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -15,6 +15,7 @@ if user_input != "": root_path = user_input +# noinspection PyNoneFunctionAssignment res = get_llamacpp_repo(root_path) if "Already up to date." in str(res.stdout): diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 661fb65..7e2b92f 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -72,6 +72,6 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(Path(__file__).parent) # Return to the root path after operation From 66c06d0fb8e53daf69b967c7c7c102976d5bfd48 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 17:05:47 -0400 Subject: [PATCH 7/9] modifications and corrections after testing --- src/grag/quantize/quantize.py | 5 ++-- src/grag/quantize/utils.py | 43 +++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index d05990c..68168a2 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,6 +1,6 @@ from grag.components.utils import get_config from grag.quantize.utils import ( - building_llama, + building_llamacpp, fetch_model_repo, get_llamacpp_repo, quantize_model, @@ -15,14 +15,13 @@ if user_input != "": root_path = user_input -# noinspection PyNoneFunctionAssignment res = get_llamacpp_repo(root_path) if "Already up to date." in str(res.stdout): print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") - building_llama(root_path) + building_llamacpp(root_path) response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() if response == "n": diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 7e2b92f..8d9d5bc 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -1,28 +1,34 @@ import os import subprocess from pathlib import Path +from typing import Optional, Union +from grag.components.utils import get_config from huggingface_hub import snapshot_download +config = get_config() -def get_llamacpp_repo(root_path: str) -> None: + +def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: """Clones or pulls the llama.cpp repository into the specified root path. Args: - root_path (str): The root directory where the llama.cpp repository will be cloned or updated. + root_path: The root directory where the llama.cpp repository will be cloned or updated. + + Returns: + A subprocess.CompletedProcess instance containing the result of the git operation. """ if os.path.exists(f"{root_path}/llama.cpp"): print(f"Repo exists at: {root_path}/llama.cpp") - res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) + res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True) else: - res = subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True, capture_output=True) + res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"], + check=True, capture_output=True) return res -def building_llama(root_path: str) -> None: +def building_llamacpp(root_path: str) -> None: """Attempts to build the llama.cpp project using make or cmake. Args: @@ -56,22 +62,31 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: """ local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" os.makedirs(local_dir, exist_ok=True) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True) print(f"Model downloaded in {local_dir}") -def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None: +def quantize_model(model_dir_path: str, quantization: str, root_path: str, + output_dir: Optional[Union[str, Path]] = None) -> None: """Quantizes a specified model using a given quantization level. Args: + output_dir (str, optional): Directory to save quantized model. Defaults to None model_dir_path (str): The directory path of the model to be quantized. quantization (str): The quantization level to apply. root_path (str): The root directory path of the project. """ os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + model_dir_path = Path(model_dir_path) + if output_dir is None: + output_dir = config['llm']['base_dir'] + + output_dir = Path(output_dir) / model_dir_path.name + os.makedirs(output_dir, exist_ok=True) + + subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True) + model_file = model_dir_path / "ggml-model-f32.gguf" + quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf" + subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True) + print(f"Quantized model present at {output_dir}") os.chdir(Path(__file__).parent) # Return to the root path after operation From b90a8823d39215226b123553802efff0e9dd26d5 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 17:52:50 -0400 Subject: [PATCH 8/9] quantizations all tests passed --- src/grag/quantize/quantize.py | 72 +++++++++++++---------- src/grag/quantize/utils.py | 89 +++++++++++++++++++++-------- src/tests/quantize/quantize_test.py | 37 ++++++++++++ 3 files changed, 146 insertions(+), 52 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 68168a2..64fba47 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,3 +1,7 @@ +"""Interactive file for quantizing models.""" + +from pathlib import Path + from grag.components.utils import get_config from grag.quantize.utils import ( building_llamacpp, @@ -7,32 +11,42 @@ ) config = get_config() -root_path = config['quantize']['llama_cpp_path'] - -user_input = input( - "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip() - -if user_input != "": - root_path = user_input - -res = get_llamacpp_repo(root_path) - -if "Already up to date." in str(res.stdout): - print("Repository is already up to date. Skipping build.") -else: - print("Updates found. Starting build...") - building_llamacpp(root_path) - -response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() -if response == "n": - print("Please copy the model folder to 'llama.cpp/models/' folder.") - _ = input("Enter if you have already copied the model:") - model_dir = input("Enter the model directory name: ") -elif response == "y" or response == "": - repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ').strip() - fetch_model_repo(repo_id, root_path) - model_dir = repo_id.split('/')[1] - -quantization = input( - "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ") -quantize_model(model_dir, quantization, root_path) +root_path = Path(config["quantize"]["llama_cpp_path"]) + +if __name__ == "__main__": + user_input = input( + "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: " + ).strip() + + if user_input != "": + root_path = Path(user_input) + + res = get_llamacpp_repo(root_path) + + if "Already up to date." in str(res.stdout): + print("Repository is already up to date. Skipping build.") + else: + print("Updates found. Starting build...") + building_llamacpp(root_path) + + response = ( + input("Do you want us to download the model? (y/n) [Enter for yes]: ") + .strip() + .lower() + ) + if response == "n": + print("Please copy the model folder to 'llama.cpp/models/' folder.") + _ = input("Enter if you have already copied the model:") + model_dir = Path(input("Enter the model directory name: ")) + elif response == "y" or response == "": + repo_id = input( + "Please enter the repo_id for the model (you can check on https://huggingface.co/models): " + ).strip() + fetch_model_repo(repo_id, root_path) + # model_dir = repo_id.split('/')[1] + model_dir = root_path / "llama.cpp" / "models" / repo_id.split("/")[1] + + quantization = input( + "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : " + ) + quantize_model(model_dir, quantization, root_path) diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 8d9d5bc..bc1d280 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -1,3 +1,5 @@ +"""Utility functions for quantization.""" + import os import subprocess from pathlib import Path @@ -9,7 +11,7 @@ config = get_config() -def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: +def get_llamacpp_repo(root_path: Union[str, Path]) -> subprocess.CompletedProcess: """Clones or pulls the llama.cpp repository into the specified root path. Args: @@ -20,15 +22,27 @@ def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: """ if os.path.exists(f"{root_path}/llama.cpp"): print(f"Repo exists at: {root_path}/llama.cpp") - res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True) + res = subprocess.run( + ["git", "-C", f"{root_path}/llama.cpp", "pull"], + check=True, + capture_output=True, + ) else: - res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"], - check=True, capture_output=True) + res = subprocess.run( + [ + "git", + "clone", + "https://github.com/ggerganov/llama.cpp.git", + f"{root_path}/llama.cpp", + ], + check=True, + capture_output=True, + ) return res -def building_llamacpp(root_path: str) -> None: +def building_llamacpp(root_path: Union[str, Path]) -> None: """Attempts to build the llama.cpp project using make or cmake. Args: @@ -36,24 +50,41 @@ def building_llamacpp(root_path: str) -> None: """ os.chdir(f"{root_path}/llama.cpp/") try: - subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) - print('Llama.cpp build successful.') + subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) + print("Llama.cpp build successful.") except subprocess.CalledProcessError: try: - subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['mkdir', 'build'], check=True) + subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["mkdir", "build"], check=True) subprocess.run( - ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', - 'Release'], shell=True, check=True) - print('Llama.cpp build successful.') + [ + "cd", + "build", + "&&", + "cmake", + "..", + "-DLLAMA_CUBLAS=ON", + "&&", + "cmake", + "--build", + ".", + "--config", + "Release", + ], + shell=True, + check=True, + ) + print("Llama.cpp build successful.") except subprocess.CalledProcessError: print("Unable to build, cannot find make or cmake.") finally: - os.chdir(Path(__file__).parent) # Assuming you want to return to the root path after operation + os.chdir( + Path(__file__).parent + ) # Assuming you want to return to the root path after operation -def fetch_model_repo(repo_id: str, root_path: str) -> None: +def fetch_model_repo(repo_id: str, root_path: Union[str, Path]) -> None: """Download model from huggingface.co/models. Args: @@ -62,24 +93,33 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: """ local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" os.makedirs(local_dir, exist_ok=True) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True) + snapshot_download( + repo_id=repo_id, + local_dir=local_dir, + local_dir_use_symlinks="auto", + resume_download=True, + ) print(f"Model downloaded in {local_dir}") -def quantize_model(model_dir_path: str, quantization: str, root_path: str, - output_dir: Optional[Union[str, Path]] = None) -> None: +def quantize_model( + model_dir_path: Union[str, Path], + quantization: str, + root_path: Union[str, Path], + output_dir: Optional[Union[str, Path]] = None, +) -> None: """Quantizes a specified model using a given quantization level. Args: - output_dir (str, optional): Directory to save quantized model. Defaults to None - model_dir_path (str): The directory path of the model to be quantized. + output_dir (str, Path, optional): Directory to save quantized model. Defaults to None + model_dir_path (str, Path): The directory path of the model to be quantized. quantization (str): The quantization level to apply. - root_path (str): The root directory path of the project. + root_path (str, Path): The root directory path of the project. """ os.chdir(f"{root_path}/llama.cpp/") model_dir_path = Path(model_dir_path) if output_dir is None: - output_dir = config['llm']['base_dir'] + output_dir = config["llm"]["base_dir"] output_dir = Path(output_dir) / model_dir_path.name os.makedirs(output_dir, exist_ok=True) @@ -87,6 +127,9 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str, subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True) model_file = model_dir_path / "ggml-model-f32.gguf" quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf" - subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True) + subprocess.run( + ["./quantize", str(model_file), str(quantized_model_file), quantization], + check=True, + ) print(f"Quantized model present at {output_dir}") os.chdir(Path(__file__).parent) # Return to the root path after operation diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py index e69de29..f7b3c51 100644 --- a/src/tests/quantize/quantize_test.py +++ b/src/tests/quantize/quantize_test.py @@ -0,0 +1,37 @@ +import os +from pathlib import Path + +from grag.quantize.utils import ( + building_llamacpp, + fetch_model_repo, + get_llamacpp_repo, + quantize_model, +) + +root_path = Path(__file__).parent / 'test_data' +os.makedirs(root_path, exist_ok=True) + + +def test_get_llamacpp_repo(): + get_llamacpp_repo(root_path) + repo_path = root_path / 'llama.cpp' / '.git' + assert os.path.exists(repo_path) + + +def test_build_llamacpp(): + building_llamacpp(root_path) + bin_path = root_path / 'llama.cpp' / 'quantize' + assert os.path.exists(bin_path) + + +def test_fetch_model_repo(): + fetch_model_repo('meta-llama/Llama-2-7b-chat', root_path) + model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' + assert os.path.exists(model_dir_path) + + +def test_quantize_model(): + model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' + quantize_model(model_dir_path, 'Q3_K_M', root_path, output_dir=model_dir_path.parent) + gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf" + assert os.path.exists(gguf_file_path) From 14ca30db4b806996307c0e1de482e482c06b2826 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 21:57:48 +0000 Subject: [PATCH 9/9] style fixes by ruff --- src/tests/quantize/quantize_test.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py index f7b3c51..af0e9dd 100644 --- a/src/tests/quantize/quantize_test.py +++ b/src/tests/quantize/quantize_test.py @@ -8,30 +8,32 @@ quantize_model, ) -root_path = Path(__file__).parent / 'test_data' +root_path = Path(__file__).parent / "test_data" os.makedirs(root_path, exist_ok=True) def test_get_llamacpp_repo(): get_llamacpp_repo(root_path) - repo_path = root_path / 'llama.cpp' / '.git' + repo_path = root_path / "llama.cpp" / ".git" assert os.path.exists(repo_path) def test_build_llamacpp(): building_llamacpp(root_path) - bin_path = root_path / 'llama.cpp' / 'quantize' + bin_path = root_path / "llama.cpp" / "quantize" assert os.path.exists(bin_path) def test_fetch_model_repo(): - fetch_model_repo('meta-llama/Llama-2-7b-chat', root_path) - model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' + fetch_model_repo("meta-llama/Llama-2-7b-chat", root_path) + model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat" assert os.path.exists(model_dir_path) def test_quantize_model(): - model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' - quantize_model(model_dir_path, 'Q3_K_M', root_path, output_dir=model_dir_path.parent) + model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat" + quantize_model( + model_dir_path, "Q3_K_M", root_path, output_dir=model_dir_path.parent + ) gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf" assert os.path.exists(gguf_file_path)