diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 56d3551..d816f96 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -6,6 +6,10 @@ pipeline { } environment { PYTHONPATH = "${env.WORKSPACE}/.venv/bin" + CUDACXX = '/usr/local/cuda-12/bin/nvcc' + CMAKE_ARGS = "-DLLAMA_CUBLAS=on" + PATH="/usr/local/cuda-12.3/bin:$PATH" + LD_LIBRARY_PATH="/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH" } @@ -27,7 +31,7 @@ pipeline { stage('Install dependencies'){ steps { withPythonEnv(PYTHONPATH){ - sh "pip install -e ." + sh 'pip install -e .' } } @@ -35,7 +39,6 @@ pipeline { stage('Config'){ steps{ - sh 'echo $env.JENKINS_HOME' withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config.py' sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test' @@ -84,22 +87,21 @@ pipeline { stage('Tests'){ steps{ - sh 'echo $USER' sh 'docker pull chromadb/chroma' sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma' withPythonEnv(PYTHONPATH){ sh 'pip install pytest' sh 'python3 ci/unlock_deeplake.py' - sh 'pytest src --junitxml=pytest-report.xml' + sh 'pytest src -vvv --junitxml=pytest-report.xml' } } post { always{ + sh 'docker stop jenkins-chroma' + sh 'docker rm jenkins-chroma' withChecks('Integration Tests'){ junit 'pytest-report.xml' } - sh 'docker stop jenkins-chroma' - sh 'docker rm jenkins-chroma' cleanWs( cleanWhenNotBuilt: false, diff --git a/ci/env_test.py b/ci/env_test.py new file mode 100644 index 0000000..02b5df5 --- /dev/null +++ b/ci/env_test.py @@ -0,0 +1,7 @@ +import os + +from grag.components.utils import get_config + +get_config(load_env=True) + +print(os.environ['HF_TOKEN']) diff --git a/ci/modify_config.py b/ci/modify_config.py index 759e88d..f210f69 100644 --- a/ci/modify_config.py +++ b/ci/modify_config.py @@ -12,6 +12,7 @@ config['root']['root_path'] = f'{workspace}' config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data' config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models' +config['env']['env_path'] = f'{jenkins_home}/env_file/.env' with open(f'{workspace}/src/config.ini', 'w') as configfile: config.write(configfile) diff --git a/ci/modify_config_test.py b/ci/modify_config_test.py deleted file mode 100644 index b93a40a..0000000 --- a/ci/modify_config_test.py +++ /dev/null @@ -1,6 +0,0 @@ -from grag.components.utils import get_config - -config = get_config() -print(f"{config['root']['root_path']=}") -print(f"{config['data']['data_path'] = }") -print(f"{config['llm']['base_dir'] = }") diff --git a/pyproject.toml b/pyproject.toml index 22da656..5fae3db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,9 @@ dependencies = [ "huggingface_hub>=0.20.2", "pydantic>=2.5.0", "rouge-score>=0.1.2", - "deeplake>=3.8.27" + "deeplake>=3.8.27", + "bitsandbytes>=0.43.0", + "accelerate>=0.28.0" ] [project.optional-dependencies] diff --git a/src/config.ini b/src/config.ini index 1760277..18abdd9 100644 --- a/src/config.ini +++ b/src/config.ini @@ -9,8 +9,8 @@ max_new_tokens : 1024 temperature : 0.1 n_batch_gpu_cpp : 1024 n_ctx_cpp : 6000 -n_gpu_layers_cpp : 16 -# The number of layers to put on the GPU. Mixtral-18 +n_gpu_layers_cpp : -1 +# The number of layers to put on the GPU. Mixtral-18, gemma-20 std_out : True base_dir : ${root:root_path}/models @@ -58,6 +58,9 @@ table_as_html : True [data] data_path : ${root:root_path}/data +[env] +env_path : ${root:root_path}/.env + [root] root_path : /home/ubuntu/volume_2k/Capstone_5 diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py index 54f14e8..bf0665d 100644 --- a/src/grag/components/llm.py +++ b/src/grag/components/llm.py @@ -4,7 +4,6 @@ from pathlib import Path import torch -from dotenv import load_dotenv from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.llms import LlamaCpp @@ -18,7 +17,7 @@ from .utils import get_config -llm_conf = get_config()["llm"] +llm_conf = get_config(load_env=True)["llm"] print("CUDA: ", torch.cuda.is_available()) @@ -117,9 +116,8 @@ def hf_pipeline(self, is_local=False): ) except OSError: # LocalTokenNotFoundError: # If loading fails due to an auth token error, then load the token and retry - load_dotenv() - auth_token = os.getenv("AUTH_TOKEN") - if not auth_token: + # load_dotenv() + if not os.getenv("HF_TOKEN"): raise ValueError("Authentication token not provided.") tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True) model = AutoModelForCausalLM.from_pretrained( diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 41bff2d..dd9d240 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -78,6 +78,7 @@ def __init__( byte_store=self.store, # type: ignore id_key=self.id_key, ) + self.docstore = self.retriever.docstore self.splitter = TextSplitter() self.top_k: int = top_k self.retriever.search_kwargs = {"k": self.top_k} diff --git a/src/grag/components/utils.py b/src/grag/components/utils.py index 491c9ec..958dc35 100644 --- a/src/grag/components/utils.py +++ b/src/grag/components/utils.py @@ -12,6 +12,7 @@ from pathlib import Path from typing import List +from dotenv import load_dotenv from langchain_core.documents import Document @@ -42,7 +43,7 @@ def find_config_path(current_path: Path) -> Path: Raises: FileNotFoundError: If 'config.ini' cannot be found in any of the parent directories. """ - config_path = Path("src/config.ini") + config_path = Path("config.ini") while not (current_path / config_path).exists(): current_path = current_path.parent if current_path == current_path.parent: @@ -50,7 +51,7 @@ def find_config_path(current_path: Path) -> Path: return current_path / config_path -def get_config() -> ConfigParser: +def get_config(load_env=False) -> ConfigParser: """Retrieves and parses the configuration settings from the 'config.ini' file. This function locates the 'config.ini' file by calling `find_config_path` using the script's current location. @@ -67,9 +68,15 @@ def get_config() -> ConfigParser: else: config_path = find_config_path(script_location) os.environ["CONFIG_PATH"] = str(config_path) - print(f"Loaded config from {config_path}.") + # Initialize parser and read config config = ConfigParser(interpolation=ExtendedInterpolation()) config.read(config_path) - + print(f"Loaded config from {config_path}.") + # load_dotenv(config['env']['env_path']) + if load_env: + env_path = Path(config['env']['env_path']) + if env_path.exists(): + load_dotenv(env_path) + print(f"Loaded environment variables from {env_path}") return config diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index bc1d280..0b90516 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -51,7 +51,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None: os.chdir(f"{root_path}/llama.cpp/") try: subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) - subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) + subprocess.run(["make", "LLAMA_CUDA=1"], check=True) print("Llama.cpp build successful.") except subprocess.CalledProcessError: try: @@ -64,7 +64,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None: "&&", "cmake", "..", - "-DLLAMA_CUBLAS=ON", + "-DLLAMA_CUDA=ON", "&&", "cmake", "--build", diff --git a/src/tests/components/embedding_test.py b/src/tests/components/embedding_test.py index 1eda90f..2aecc26 100644 --- a/src/tests/components/embedding_test.py +++ b/src/tests/components/embedding_test.py @@ -47,3 +47,4 @@ def test_embeddings(embedding_config): cosine_similarity(doc_vecs[0], doc_vecs[2]), ] assert similarity_scores[0] > similarity_scores[1] + del embedding diff --git a/src/tests/components/llm_test.py b/src/tests/components/llm_test.py index df0f4d9..66b51dd 100644 --- a/src/tests/components/llm_test.py +++ b/src/tests/components/llm_test.py @@ -2,21 +2,24 @@ import pytest from grag.components.llm import LLM +from grag.components.utils import get_config + +config = get_config(load_env=True) llama_models = [ "Llama-2-7b-chat", "Llama-2-13b-chat", - "Mixtral-8x7B-Instruct-v0.1", "gemma-7b-it", + "Mixtral-8x7B-Instruct-v0.1", ] hf_models = [ "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-chat-hf", - # 'mistralai/Mixtral-8x7B-Instruct-v0.1', "google/gemma-7b-it", ] -cpp_quantization = ["Q5_K_M", "Q5_K_M", "Q4_K_M", "f16"] -hf_quantization = ["Q8", "Q4", "Q4"] # , 'Q4'] +cpp_quantization = ["Q5_K_M", "Q5_K_M", "f16", "Q4_K_M"] +gpu_layers = ['-1', '-1', '18', '16'] +hf_quantization = ["Q8", "Q4", "Q4"] params = [(model, quant) for model, quant in zip(hf_models, hf_quantization)] @@ -29,12 +32,12 @@ def test_hf_web_pipe(hf_models, quantization): del model -params = [(model, quant) for model, quant in zip(llama_models, cpp_quantization)] +params = [(model, gpu_layer, quant) for model, gpu_layer, quant in zip(llama_models, gpu_layers, cpp_quantization)] -@pytest.mark.parametrize("model_name, quantization", params) -def test_llamacpp_pipe(model_name, quantization): - llm_ = LLM(quantization=quantization, model_name=model_name, pipeline="llama_cpp") +@pytest.mark.parametrize("model_name, gpu_layer, quantization", params) +def test_llamacpp_pipe(model_name, gpu_layer, quantization): + llm_ = LLM(quantization=quantization, model_name=model_name, n_gpu_layers=gpu_layer, pipeline="llama_cpp") model = llm_.load_model() response = model.invoke("Who are you?") assert isinstance(response, Text) diff --git a/src/tests/components/multivec_retriever_test.py b/src/tests/components/multivec_retriever_test.py index 3f847bd..8211b3a 100644 --- a/src/tests/components/multivec_retriever_test.py +++ b/src/tests/components/multivec_retriever_test.py @@ -1,19 +1,31 @@ -import json +import os +import shutil +from pathlib import Path from grag.components.multivec_retriever import Retriever +from grag.components.utils import get_config from grag.components.vectordb.deeplake_client import DeepLakeClient from langchain_core.documents import Document -client = DeepLakeClient(collection_name="ci_test") -retriever = Retriever(vectordb=client) # pass test collection +config = get_config() + +test_path = Path(config['data']['data_path']) / 'vectordb/test_retriever' +if os.path.exists(test_path): + shutil.rmtree(test_path) + print('Deleting test retriever: {}'.format(test_path)) + +# client = DeepLakeClient(collection_name="test_retriever") +# retriever = Retriever(vectordb=client) # pass test collection doc = Document(page_content="Hello worlds", metadata={"source": "bars"}) -def test_retriver_id_gen(): +def test_retriever_id_gen(): + client = DeepLakeClient(collection_name="test_retriever") + retriever = Retriever(vectordb=client) doc = Document(page_content="Hello world", metadata={"source": "bar"}) id_ = retriever.id_gen(doc) - assert isinstance(id, str) + assert isinstance(id_, str) assert len(id_) == 32 doc.page_content = doc.page_content + 'ABC' id_1 = retriever.id_gen(doc) @@ -21,14 +33,18 @@ def test_retriver_id_gen(): doc.metadata["source"] = "bars" id_1 = retriever.id_gen(doc) assert id_ != id_1 + del client, retriever def test_retriever_gen_doc_ids(): + client = DeepLakeClient(collection_name="test_retriever") + retriever = Retriever(vectordb=client) docs = [Document(page_content="Hello world", metadata={"source": "bar"}), Document(page_content="Hello", metadata={"source": "foo"})] ids = retriever.gen_doc_ids(docs) assert len(ids) == len(docs) assert all(isinstance(id, str) for id in ids) + del client, retriever def test_retriever_split_docs(): @@ -36,6 +52,8 @@ def test_retriever_split_docs(): def test_retriever_add_docs(): + client = DeepLakeClient(collection_name="test_retriever") + retriever = Retriever(vectordb=client) # small enough docs to not split. docs = [Document(page_content= """And so on this rainbow day, with storms all around them, and blue sky @@ -75,11 +93,11 @@ def test_retriever_add_docs(): ] ids = retriever.gen_doc_ids(docs) retriever.add_docs(docs) - retrieved = retriever.store.mget(ids) + retrieved = retriever.docstore.mget(ids) assert len(retrieved) == len(ids) - for i, doc in enumerate(docs): - retrieved_doc = json.loads(retrieved[i].decode()) - assert doc.metadata == retrieved_doc.metadata + for ret, doc in zip(retrieved, docs): + assert ret.metadata == doc.metadata + del client, retriever def test_retriever_aadd_docs(): diff --git a/src/tests/components/utils_test.py b/src/tests/components/utils_test.py new file mode 100644 index 0000000..ddbca0f --- /dev/null +++ b/src/tests/components/utils_test.py @@ -0,0 +1,8 @@ +import os + +from grag.components.utils import get_config + + +def test_get_config(): + config = get_config(load_env=True) + assert os.environ["HF_TOKEN"] diff --git a/src/tests/components/vectordb/chroma_client_test.py b/src/tests/components/vectordb/chroma_client_test.py index c491dfd..f07908c 100644 --- a/src/tests/components/vectordb/chroma_client_test.py +++ b/src/tests/components/vectordb/chroma_client_test.py @@ -9,6 +9,7 @@ def test_chroma_connection(): chroma_client = ChromaClient() response = chroma_client.test_connection() assert isinstance(response, int) + del chroma_client def test_chroma_add_docs(): @@ -52,6 +53,7 @@ def test_chroma_add_docs(): docs = [Document(page_content=doc) for doc in docs] chroma_client.add_docs(docs) assert len(chroma_client) == len(docs) + del chroma_client def test_chroma_aadd_docs(): @@ -96,6 +98,7 @@ def test_chroma_aadd_docs(): loop = asyncio.get_event_loop() loop.run_until_complete(chroma_client.aadd_docs(docs)) assert len(chroma_client) == len(docs) + del chroma_client chrome_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)] @@ -122,6 +125,7 @@ def test_chroma_get_chunk(top_k, with_score): assert all(isinstance(doc[1], float) for doc in retrieved_chunks) else: assert all(isinstance(doc, Document) for doc in retrieved_chunks) + del chroma_client @pytest.mark.parametrize("top_k,with_score", chrome_get_chunk_params) @@ -146,3 +150,4 @@ def test_chroma_aget_chunk(top_k, with_score): assert all(isinstance(doc[1], float) for doc in retrieved_chunks) else: assert all(isinstance(doc, Document) for doc in retrieved_chunks) + del chroma_client diff --git a/src/tests/components/vectordb/deeplake_client_test.py b/src/tests/components/vectordb/deeplake_client_test.py index cea5e61..70fcf31 100644 --- a/src/tests/components/vectordb/deeplake_client_test.py +++ b/src/tests/components/vectordb/deeplake_client_test.py @@ -1,9 +1,19 @@ import asyncio +import os +import shutil +from pathlib import Path import pytest +from grag.components.utils import get_config from grag.components.vectordb.deeplake_client import DeepLakeClient from langchain_core.documents import Document +config = get_config() +test_path = Path(config['data']['data_path']) / 'vectordb/test_client' +if os.path.exists(test_path): + shutil.rmtree(test_path) + print('Deleting test retriever: {}'.format(test_path)) + def test_deeplake_add_docs(): docs = [ @@ -40,7 +50,7 @@ def test_deeplake_add_docs(): storm-clouds was split to the blinding zigzag of lightning, and the thunder rolled and boomed, like the Colorado in flood.""", ] - deeplake_client = DeepLakeClient(collection_name="test") + deeplake_client = DeepLakeClient(collection_name="test_client") if len(deeplake_client) > 0: deeplake_client.delete() docs = [Document(page_content=doc) for doc in docs] @@ -49,7 +59,7 @@ def test_deeplake_add_docs(): del deeplake_client -def test_chroma_aadd_docs(): +def test_deeplake_aadd_docs(): docs = [ """And so on this rainbow day, with storms all around them, and blue sky above, they rode only as far as the valley. But from there, before they @@ -84,7 +94,7 @@ def test_chroma_aadd_docs(): storm-clouds was split to the blinding zigzag of lightning, and the thunder rolled and boomed, like the Colorado in flood.""", ] - deeplake_client = DeepLakeClient(collection_name="test") + deeplake_client = DeepLakeClient(collection_name="test_client") if len(deeplake_client) > 0: deeplake_client.delete() docs = [Document(page_content=doc) for doc in docs] @@ -108,7 +118,7 @@ def test_deeplake_get_chunk(top_k, with_score): ankles from Joel Creech's lasso had never mended. The girl was unutterably happy, but it was possible that she would never race a horse again.""" - deeplake_client = DeepLakeClient(collection_name="test", read_only=True) + deeplake_client = DeepLakeClient(collection_name="test_client", read_only=True) retrieved_chunks = deeplake_client.get_chunk( query=query, top_k=top_k, with_score=with_score ) @@ -132,7 +142,7 @@ def test_deeplake_aget_chunk(top_k, with_score): ankles from Joel Creech's lasso had never mended. The girl was unutterably happy, but it was possible that she would never race a horse again.""" - deeplake_client = DeepLakeClient(collection_name="test", read_only=True) + deeplake_client = DeepLakeClient(collection_name="test_client", read_only=True) loop = asyncio.get_event_loop() retrieved_chunks = loop.run_until_complete( deeplake_client.aget_chunk(query=query, top_k=top_k, with_score=with_score) diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py index af0e9dd..68078fe 100644 --- a/src/tests/quantize/quantize_test.py +++ b/src/tests/quantize/quantize_test.py @@ -1,4 +1,5 @@ import os +import shutil from pathlib import Path from grag.quantize.utils import ( @@ -9,6 +10,8 @@ ) root_path = Path(__file__).parent / "test_data" +if os.path.exists(root_path): + shutil.rmtree(root_path) os.makedirs(root_path, exist_ok=True) diff --git a/src/tests/rag/basic_rag_test.py b/src/tests/rag/basic_rag_test.py index 0b93643..1695e93 100644 --- a/src/tests/rag/basic_rag_test.py +++ b/src/tests/rag/basic_rag_test.py @@ -9,7 +9,8 @@ def test_rag_stuff(): - rag = BasicRAG(doc_chain="stuff", retriever=retriever) + rag = BasicRAG(doc_chain="stuff", retriever=retriever, + llm_kwargs={"model_name": "Llama-2-7b-chat", "n_gpu_layers": "-1"}) response, sources = rag("What is Flash Attention?") assert isinstance(response, Text) assert isinstance(sources, List) @@ -18,7 +19,8 @@ def test_rag_stuff(): def test_rag_refine(): - rag = BasicRAG(doc_chain="refine", retriever=retriever) + rag = BasicRAG(doc_chain="refine", retriever=retriever, + llm_kwargs={"model_name": "Llama-2-7b-chat", "n_gpu_layers": "-1"}) response, sources = rag("What is Flash Attention?") assert isinstance(response, List) assert all(isinstance(s, str) for s in response)