arjbingly · arjbingly · Apr 4, 2024 · Apr 4, 2024
diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile
@@ -6,10 +6,6 @@ pipeline {
     }
     environment {
         PYTHONPATH = "${env.WORKSPACE}/.venv/bin"
-        CUDACXX = '/usr/local/cuda-12/bin/nvcc'
-        CMAKE_ARGS = "-DLLAMA_CUBLAS=on"
-        PATH="/usr/local/cuda-12.3/bin:$PATH"
-        LD_LIBRARY_PATH="/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH"
         }
 
 
@@ -31,14 +27,15 @@ pipeline {
         stage('Install dependencies'){
             steps {
                 withPythonEnv(PYTHONPATH){
-                    sh 'pip install -e .'
+                    sh "pip install -e ."
                 }
             }
 
         }
 
         stage('Config'){
             steps{
+                sh 'echo $env.JENKINS_HOME'
                 withPythonEnv(PYTHONPATH){
                     sh 'python3 ci/modify_config.py'
                     sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test'
@@ -87,21 +84,22 @@ pipeline {
 
         stage('Tests'){
             steps{
+                sh 'echo $USER'
                 sh 'docker pull chromadb/chroma'
                 sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma'
                 withPythonEnv(PYTHONPATH){
                     sh 'pip install pytest'
                     sh 'python3 ci/unlock_deeplake.py'
-                    sh 'pytest src -vvv --junitxml=pytest-report.xml'
+                    sh 'pytest src --junitxml=pytest-report.xml'
                 }
             }
             post {
                 always{
-                    sh 'docker stop jenkins-chroma'
-                    sh 'docker rm jenkins-chroma'
                      withChecks('Integration Tests'){
                         junit 'pytest-report.xml'
                      }
+                    sh 'docker stop jenkins-chroma'
+                    sh 'docker rm jenkins-chroma'
 
                      cleanWs(
                         cleanWhenNotBuilt: false,

diff --git a/ci/env_test.py b/ci/env_test.py
diff --git a/ci/modify_config.py b/ci/modify_config.py
@@ -12,7 +12,6 @@
 config['root']['root_path'] = f'{workspace}'
 config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data'
 config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models'
-config['env']['env_path'] = f'{jenkins_home}/env_file/.env'
 
 with open(f'{workspace}/src/config.ini', 'w') as configfile:
     config.write(configfile)
diff --git a/ci/modify_config_test.py b/ci/modify_config_test.py
@@ -0,0 +1,6 @@
+from grag.components.utils import get_config
+
+config = get_config()
+print(f"{config['root']['root_path']=}")
+print(f"{config['data']['data_path'] = }")
+print(f"{config['llm']['base_dir'] = }")
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,9 +42,7 @@ dependencies = [
     "huggingface_hub>=0.20.2",
     "pydantic>=2.5.0",
     "rouge-score>=0.1.2",
-    "deeplake>=3.8.27",
-    "bitsandbytes>=0.43.0",
-    "accelerate>=0.28.0"
+    "deeplake>=3.8.27"
 ]
 
 [project.optional-dependencies]

diff --git a/src/config.ini b/src/config.ini
@@ -9,8 +9,8 @@ max_new_tokens : 1024
 temperature : 0.1
 n_batch_gpu_cpp : 1024
 n_ctx_cpp : 6000
-n_gpu_layers_cpp : -1
-# The number of layers to put on the GPU. Mixtral-18, gemma-20
+n_gpu_layers_cpp : 16
+# The number of layers to put on the GPU. Mixtral-18
 std_out : True
 base_dir : ${root:root_path}/models
 
@@ -58,9 +58,6 @@ table_as_html : True
 [data]
 data_path : ${root:root_path}/data
 
-[env]
-env_path : ${root:root_path}/.env
-
 [root]
 root_path : /home/ubuntu/volume_2k/Capstone_5
 

diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 
 import torch
+from dotenv import load_dotenv
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain_community.llms import LlamaCpp
@@ -17,7 +18,7 @@
 
 from .utils import get_config
 
-llm_conf = get_config(load_env=True)["llm"]
+llm_conf = get_config()["llm"]
 
 print("CUDA: ", torch.cuda.is_available())
 
@@ -116,8 +117,9 @@ def hf_pipeline(self, is_local=False):
             )
         except OSError:  # LocalTokenNotFoundError:
             # If loading fails due to an auth token error, then load the token and retry
-            # load_dotenv()
-            if not os.getenv("HF_TOKEN"):
+            load_dotenv()
+            auth_token = os.getenv("AUTH_TOKEN")
+            if not auth_token:
                 raise ValueError("Authentication token not provided.")
             tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True)
             model = AutoModelForCausalLM.from_pretrained(

diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
@@ -78,7 +78,6 @@ def __init__(
             byte_store=self.store,  # type: ignore
             id_key=self.id_key,
         )
-        self.docstore = self.retriever.docstore
         self.splitter = TextSplitter()
         self.top_k: int = top_k
         self.retriever.search_kwargs = {"k": self.top_k}

diff --git a/src/grag/components/utils.py b/src/grag/components/utils.py
@@ -12,7 +12,6 @@
 from pathlib import Path
 from typing import List
 
-from dotenv import load_dotenv
 from langchain_core.documents import Document
 
 
@@ -43,15 +42,15 @@ def find_config_path(current_path: Path) -> Path:
     Raises:
         FileNotFoundError: If 'config.ini' cannot be found in any of the parent directories.
     """
-    config_path = Path("config.ini")
+    config_path = Path("src/config.ini")
     while not (current_path / config_path).exists():
         current_path = current_path.parent
         if current_path == current_path.parent:
             raise FileNotFoundError(f"config.ini not found in {config_path}.")
     return current_path / config_path
 
 
-def get_config(load_env=False) -> ConfigParser:
+def get_config() -> ConfigParser:
     """Retrieves and parses the configuration settings from the 'config.ini' file.
 
     This function locates the 'config.ini' file by calling `find_config_path` using the script's current location.
@@ -68,15 +67,9 @@ def get_config(load_env=False) -> ConfigParser:
     else:
         config_path = find_config_path(script_location)
         os.environ["CONFIG_PATH"] = str(config_path)
-
+    print(f"Loaded config from {config_path}.")
     # Initialize parser and read config
     config = ConfigParser(interpolation=ExtendedInterpolation())
     config.read(config_path)
-    print(f"Loaded config from {config_path}.")
-    # load_dotenv(config['env']['env_path'])
-    if load_env:
-        env_path = Path(config['env']['env_path'])
-        if env_path.exists():
-            load_dotenv(env_path)
-            print(f"Loaded environment variables from {env_path}")
+
     return config
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
@@ -51,7 +51,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
     os.chdir(f"{root_path}/llama.cpp/")
     try:
         subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(["make", "LLAMA_CUDA=1"], check=True)
+        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
         print("Llama.cpp build successful.")
     except subprocess.CalledProcessError:
         try:
@@ -64,7 +64,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
                     "&&",
                     "cmake",
                     "..",
-                    "-DLLAMA_CUDA=ON",
+                    "-DLLAMA_CUBLAS=ON",
                     "&&",
                     "cmake",
                     "--build",

diff --git a/src/tests/components/embedding_test.py b/src/tests/components/embedding_test.py
@@ -47,4 +47,3 @@ def test_embeddings(embedding_config):
             cosine_similarity(doc_vecs[0], doc_vecs[2]),
         ]
     assert similarity_scores[0] > similarity_scores[1]
-    del embedding
diff --git a/src/tests/components/llm_test.py b/src/tests/components/llm_test.py
@@ -2,24 +2,21 @@
 
 import pytest
 from grag.components.llm import LLM
-from grag.components.utils import get_config
-
-config = get_config(load_env=True)
 
 llama_models = [
     "Llama-2-7b-chat",
     "Llama-2-13b-chat",
-    "gemma-7b-it",
     "Mixtral-8x7B-Instruct-v0.1",
+    "gemma-7b-it",
 ]
 hf_models = [
     "meta-llama/Llama-2-7b-chat-hf",
     "meta-llama/Llama-2-13b-chat-hf",
+    # 'mistralai/Mixtral-8x7B-Instruct-v0.1',
     "google/gemma-7b-it",
 ]
-cpp_quantization = ["Q5_K_M", "Q5_K_M", "f16", "Q4_K_M"]
-gpu_layers = ['-1', '-1', '18', '16']
-hf_quantization = ["Q8", "Q4", "Q4"]
+cpp_quantization = ["Q5_K_M", "Q5_K_M", "Q4_K_M", "f16"]
+hf_quantization = ["Q8", "Q4", "Q4"]  # , 'Q4']
 params = [(model, quant) for model, quant in zip(hf_models, hf_quantization)]
 
 
@@ -32,12 +29,12 @@ def test_hf_web_pipe(hf_models, quantization):
     del model
 
 
-params = [(model, gpu_layer, quant) for model, gpu_layer, quant in zip(llama_models, gpu_layers, cpp_quantization)]
+params = [(model, quant) for model, quant in zip(llama_models, cpp_quantization)]
 
 
-@pytest.mark.parametrize("model_name, gpu_layer, quantization", params)
-def test_llamacpp_pipe(model_name, gpu_layer, quantization):
-    llm_ = LLM(quantization=quantization, model_name=model_name, n_gpu_layers=gpu_layer, pipeline="llama_cpp")
+@pytest.mark.parametrize("model_name, quantization", params)
+def test_llamacpp_pipe(model_name, quantization):
+    llm_ = LLM(quantization=quantization, model_name=model_name, pipeline="llama_cpp")
     model = llm_.load_model()
     response = model.invoke("Who are you?")
     assert isinstance(response, Text)

diff --git a/src/tests/components/multivec_retriever_test.py b/src/tests/components/multivec_retriever_test.py
@@ -1,59 +1,41 @@
-import os
-import shutil
-from pathlib import Path
+import json
 
 from grag.components.multivec_retriever import Retriever
-from grag.components.utils import get_config
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 from langchain_core.documents import Document
 
-config = get_config()
-
-test_path = Path(config['data']['data_path']) / 'vectordb/test_retriever'
-if os.path.exists(test_path):
-    shutil.rmtree(test_path)
-    print('Deleting test retriever: {}'.format(test_path))
-
-# client = DeepLakeClient(collection_name="test_retriever")
-# retriever = Retriever(vectordb=client)  # pass test collection
+client = DeepLakeClient(collection_name="ci_test")
+retriever = Retriever(vectordb=client)  # pass test collection
 
 doc = Document(page_content="Hello worlds", metadata={"source": "bars"})
 
 
-def test_retriever_id_gen():
-    client = DeepLakeClient(collection_name="test_retriever")
-    retriever = Retriever(vectordb=client)
+def test_retriver_id_gen():
     doc = Document(page_content="Hello world", metadata={"source": "bar"})
     id_ = retriever.id_gen(doc)
-    assert isinstance(id_, str)
+    assert isinstance(id, str)
     assert len(id_) == 32
     doc.page_content = doc.page_content + 'ABC'
     id_1 = retriever.id_gen(doc)
     assert id_ == id_1
     doc.metadata["source"] = "bars"
     id_1 = retriever.id_gen(doc)
     assert id_ != id_1
-    del client, retriever
 
 
 def test_retriever_gen_doc_ids():
-    client = DeepLakeClient(collection_name="test_retriever")
-    retriever = Retriever(vectordb=client)
     docs = [Document(page_content="Hello world", metadata={"source": "bar"}),
             Document(page_content="Hello", metadata={"source": "foo"})]
     ids = retriever.gen_doc_ids(docs)
     assert len(ids) == len(docs)
     assert all(isinstance(id, str) for id in ids)
-    del client, retriever
 
 
 def test_retriever_split_docs():
     pass
 
 
 def test_retriever_add_docs():
-    client = DeepLakeClient(collection_name="test_retriever")
-    retriever = Retriever(vectordb=client)
     # small enough docs to not split.
     docs = [Document(page_content=
                      """And so on this rainbow day, with storms all around them, and blue sky
@@ -93,11 +75,11 @@ def test_retriever_add_docs():
             ]
     ids = retriever.gen_doc_ids(docs)
     retriever.add_docs(docs)
-    retrieved = retriever.docstore.mget(ids)
+    retrieved = retriever.store.mget(ids)
     assert len(retrieved) == len(ids)
-    for ret, doc in zip(retrieved, docs):
-        assert ret.metadata == doc.metadata
-    del client, retriever
+    for i, doc in enumerate(docs):
+        retrieved_doc = json.loads(retrieved[i].decode())
+        assert doc.metadata == retrieved_doc.metadata
 
 
 def test_retriever_aadd_docs():

diff --git a/src/tests/components/utils_test.py b/src/tests/components/utils_test.py
diff --git a/src/tests/components/vectordb/chroma_client_test.py b/src/tests/components/vectordb/chroma_client_test.py
@@ -9,7 +9,6 @@ def test_chroma_connection():
     chroma_client = ChromaClient()
     response = chroma_client.test_connection()
     assert isinstance(response, int)
-    del chroma_client
 
 
 def test_chroma_add_docs():
@@ -53,7 +52,6 @@ def test_chroma_add_docs():
     docs = [Document(page_content=doc) for doc in docs]
     chroma_client.add_docs(docs)
     assert len(chroma_client) == len(docs)
-    del chroma_client
 
 
 def test_chroma_aadd_docs():
@@ -98,7 +96,6 @@ def test_chroma_aadd_docs():
     loop = asyncio.get_event_loop()
     loop.run_until_complete(chroma_client.aadd_docs(docs))
     assert len(chroma_client) == len(docs)
-    del chroma_client
 
 
 chrome_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)]
@@ -125,7 +122,6 @@ def test_chroma_get_chunk(top_k, with_score):
         assert all(isinstance(doc[1], float) for doc in retrieved_chunks)
     else:
         assert all(isinstance(doc, Document) for doc in retrieved_chunks)
-    del chroma_client
 
 
 @pytest.mark.parametrize("top_k,with_score", chrome_get_chunk_params)
@@ -150,4 +146,3 @@ def test_chroma_aget_chunk(top_k, with_score):
         assert all(isinstance(doc[1], float) for doc in retrieved_chunks)
     else:
         assert all(isinstance(doc, Document) for doc in retrieved_chunks)
-    del chroma_client