diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile
index 56d3551..d816f96 100644
--- a/ci/Jenkinsfile
+++ b/ci/Jenkinsfile
@@ -6,6 +6,10 @@ pipeline {
     }
     environment {
         PYTHONPATH = "${env.WORKSPACE}/.venv/bin"
+        CUDACXX = '/usr/local/cuda-12/bin/nvcc'
+        CMAKE_ARGS = "-DLLAMA_CUBLAS=on"
+        PATH="/usr/local/cuda-12.3/bin:$PATH"
+        LD_LIBRARY_PATH="/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH"
         }
 
 
@@ -27,7 +31,7 @@ pipeline {
         stage('Install dependencies'){
             steps {
                 withPythonEnv(PYTHONPATH){
-                    sh "pip install -e ."
+                    sh 'pip install -e .'
                 }
             }
 
@@ -35,7 +39,6 @@ pipeline {
 
         stage('Config'){
             steps{
-                sh 'echo $env.JENKINS_HOME'
                 withPythonEnv(PYTHONPATH){
                     sh 'python3 ci/modify_config.py'
                     sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test'
@@ -84,22 +87,21 @@ pipeline {
 
         stage('Tests'){
             steps{
-                sh 'echo $USER'
                 sh 'docker pull chromadb/chroma'
                 sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma'
                 withPythonEnv(PYTHONPATH){
                     sh 'pip install pytest'
                     sh 'python3 ci/unlock_deeplake.py'
-                    sh 'pytest src --junitxml=pytest-report.xml'
+                    sh 'pytest src -vvv --junitxml=pytest-report.xml'
                 }
             }
             post {
                 always{
+                    sh 'docker stop jenkins-chroma'
+                    sh 'docker rm jenkins-chroma'
                      withChecks('Integration Tests'){
                         junit 'pytest-report.xml'
                      }
-                    sh 'docker stop jenkins-chroma'
-                    sh 'docker rm jenkins-chroma'
 
                      cleanWs(
                         cleanWhenNotBuilt: false,
diff --git a/ci/env_test.py b/ci/env_test.py
new file mode 100644
index 0000000..02b5df5
--- /dev/null
+++ b/ci/env_test.py
@@ -0,0 +1,7 @@
+import os
+
+from grag.components.utils import get_config
+
+get_config(load_env=True)
+
+print(os.environ['HF_TOKEN'])
diff --git a/ci/modify_config.py b/ci/modify_config.py
index 759e88d..f210f69 100644
--- a/ci/modify_config.py
+++ b/ci/modify_config.py
@@ -12,6 +12,7 @@
 config['root']['root_path'] = f'{workspace}'
 config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data'
 config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models'
+config['env']['env_path'] = f'{jenkins_home}/env_file/.env'
 
 with open(f'{workspace}/src/config.ini', 'w') as configfile:
     config.write(configfile)
diff --git a/ci/modify_config_test.py b/ci/modify_config_test.py
deleted file mode 100644
index b93a40a..0000000
--- a/ci/modify_config_test.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from grag.components.utils import get_config
-
-config = get_config()
-print(f"{config['root']['root_path']=}")
-print(f"{config['data']['data_path'] = }")
-print(f"{config['llm']['base_dir'] = }")
diff --git a/pyproject.toml b/pyproject.toml
index 22da656..5fae3db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,9 @@ dependencies = [
     "huggingface_hub>=0.20.2",
     "pydantic>=2.5.0",
     "rouge-score>=0.1.2",
-    "deeplake>=3.8.27"
+    "deeplake>=3.8.27",
+    "bitsandbytes>=0.43.0",
+    "accelerate>=0.28.0"
 ]
 
 [project.optional-dependencies]
diff --git a/src/config.ini b/src/config.ini
index 1760277..18abdd9 100644
--- a/src/config.ini
+++ b/src/config.ini
@@ -9,8 +9,8 @@ max_new_tokens : 1024
 temperature : 0.1
 n_batch_gpu_cpp : 1024
 n_ctx_cpp : 6000
-n_gpu_layers_cpp : 16
-# The number of layers to put on the GPU. Mixtral-18
+n_gpu_layers_cpp : -1
+# The number of layers to put on the GPU. Mixtral-18, gemma-20
 std_out : True
 base_dir : ${root:root_path}/models
 
@@ -58,6 +58,9 @@ table_as_html : True
 [data]
 data_path : ${root:root_path}/data
 
+[env]
+env_path : ${root:root_path}/.env
+
 [root]
 root_path : /home/ubuntu/volume_2k/Capstone_5
 
diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py
index 54f14e8..bf0665d 100644
--- a/src/grag/components/llm.py
+++ b/src/grag/components/llm.py
@@ -4,7 +4,6 @@
 from pathlib import Path
 
 import torch
-from dotenv import load_dotenv
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain_community.llms import LlamaCpp
@@ -18,7 +17,7 @@
 
 from .utils import get_config
 
-llm_conf = get_config()["llm"]
+llm_conf = get_config(load_env=True)["llm"]
 
 print("CUDA: ", torch.cuda.is_available())
 
@@ -117,9 +116,8 @@ def hf_pipeline(self, is_local=False):
             )
         except OSError:  # LocalTokenNotFoundError:
             # If loading fails due to an auth token error, then load the token and retry
-            load_dotenv()
-            auth_token = os.getenv("AUTH_TOKEN")
-            if not auth_token:
+            # load_dotenv()
+            if not os.getenv("HF_TOKEN"):
                 raise ValueError("Authentication token not provided.")
             tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True)
             model = AutoModelForCausalLM.from_pretrained(
diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
index 41bff2d..dd9d240 100644
--- a/src/grag/components/multivec_retriever.py
+++ b/src/grag/components/multivec_retriever.py
@@ -78,6 +78,7 @@ def __init__(
             byte_store=self.store,  # type: ignore
             id_key=self.id_key,
         )
+        self.docstore = self.retriever.docstore
         self.splitter = TextSplitter()
         self.top_k: int = top_k
         self.retriever.search_kwargs = {"k": self.top_k}
diff --git a/src/grag/components/utils.py b/src/grag/components/utils.py
index 491c9ec..958dc35 100644
--- a/src/grag/components/utils.py
+++ b/src/grag/components/utils.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 from typing import List
 
+from dotenv import load_dotenv
 from langchain_core.documents import Document
 
 
@@ -42,7 +43,7 @@ def find_config_path(current_path: Path) -> Path:
     Raises:
         FileNotFoundError: If 'config.ini' cannot be found in any of the parent directories.
     """
-    config_path = Path("src/config.ini")
+    config_path = Path("config.ini")
     while not (current_path / config_path).exists():
         current_path = current_path.parent
         if current_path == current_path.parent:
@@ -50,7 +51,7 @@ def find_config_path(current_path: Path) -> Path:
     return current_path / config_path
 
 
-def get_config() -> ConfigParser:
+def get_config(load_env=False) -> ConfigParser:
     """Retrieves and parses the configuration settings from the 'config.ini' file.
 
     This function locates the 'config.ini' file by calling `find_config_path` using the script's current location.
@@ -67,9 +68,15 @@ def get_config() -> ConfigParser:
     else:
         config_path = find_config_path(script_location)
         os.environ["CONFIG_PATH"] = str(config_path)
-    print(f"Loaded config from {config_path}.")
+
     # Initialize parser and read config
     config = ConfigParser(interpolation=ExtendedInterpolation())
     config.read(config_path)
-
+    print(f"Loaded config from {config_path}.")
+    # load_dotenv(config['env']['env_path'])
+    if load_env:
+        env_path = Path(config['env']['env_path'])
+        if env_path.exists():
+            load_dotenv(env_path)
+            print(f"Loaded environment variables from {env_path}")
     return config
diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
index bc1d280..0b90516 100644
--- a/src/grag/quantize/utils.py
+++ b/src/grag/quantize/utils.py
@@ -51,7 +51,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
     os.chdir(f"{root_path}/llama.cpp/")
     try:
         subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
+        subprocess.run(["make", "LLAMA_CUDA=1"], check=True)
         print("Llama.cpp build successful.")
     except subprocess.CalledProcessError:
         try:
@@ -64,7 +64,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
                     "&&",
                     "cmake",
                     "..",
-                    "-DLLAMA_CUBLAS=ON",
+                    "-DLLAMA_CUDA=ON",
                     "&&",
                     "cmake",
                     "--build",
diff --git a/src/tests/components/embedding_test.py b/src/tests/components/embedding_test.py
index 1eda90f..2aecc26 100644
--- a/src/tests/components/embedding_test.py
+++ b/src/tests/components/embedding_test.py
@@ -47,3 +47,4 @@ def test_embeddings(embedding_config):
             cosine_similarity(doc_vecs[0], doc_vecs[2]),
         ]
     assert similarity_scores[0] > similarity_scores[1]
+    del embedding
diff --git a/src/tests/components/llm_test.py b/src/tests/components/llm_test.py
index df0f4d9..66b51dd 100644
--- a/src/tests/components/llm_test.py
+++ b/src/tests/components/llm_test.py
@@ -2,21 +2,24 @@
 
 import pytest
 from grag.components.llm import LLM
+from grag.components.utils import get_config
+
+config = get_config(load_env=True)
 
 llama_models = [
     "Llama-2-7b-chat",
     "Llama-2-13b-chat",
-    "Mixtral-8x7B-Instruct-v0.1",
     "gemma-7b-it",
+    "Mixtral-8x7B-Instruct-v0.1",
 ]
 hf_models = [
     "meta-llama/Llama-2-7b-chat-hf",
     "meta-llama/Llama-2-13b-chat-hf",
-    # 'mistralai/Mixtral-8x7B-Instruct-v0.1',
     "google/gemma-7b-it",
 ]
-cpp_quantization = ["Q5_K_M", "Q5_K_M", "Q4_K_M", "f16"]
-hf_quantization = ["Q8", "Q4", "Q4"]  # , 'Q4']
+cpp_quantization = ["Q5_K_M", "Q5_K_M", "f16", "Q4_K_M"]
+gpu_layers = ['-1', '-1', '18', '16']
+hf_quantization = ["Q8", "Q4", "Q4"]
 params = [(model, quant) for model, quant in zip(hf_models, hf_quantization)]
 
 
@@ -29,12 +32,12 @@ def test_hf_web_pipe(hf_models, quantization):
     del model
 
 
-params = [(model, quant) for model, quant in zip(llama_models, cpp_quantization)]
+params = [(model, gpu_layer, quant) for model, gpu_layer, quant in zip(llama_models, gpu_layers, cpp_quantization)]
 
 
-@pytest.mark.parametrize("model_name, quantization", params)
-def test_llamacpp_pipe(model_name, quantization):
-    llm_ = LLM(quantization=quantization, model_name=model_name, pipeline="llama_cpp")
+@pytest.mark.parametrize("model_name, gpu_layer, quantization", params)
+def test_llamacpp_pipe(model_name, gpu_layer, quantization):
+    llm_ = LLM(quantization=quantization, model_name=model_name, n_gpu_layers=gpu_layer, pipeline="llama_cpp")
     model = llm_.load_model()
     response = model.invoke("Who are you?")
     assert isinstance(response, Text)
diff --git a/src/tests/components/multivec_retriever_test.py b/src/tests/components/multivec_retriever_test.py
index 3f847bd..8211b3a 100644
--- a/src/tests/components/multivec_retriever_test.py
+++ b/src/tests/components/multivec_retriever_test.py
@@ -1,19 +1,31 @@
-import json
+import os
+import shutil
+from pathlib import Path
 
 from grag.components.multivec_retriever import Retriever
+from grag.components.utils import get_config
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 from langchain_core.documents import Document
 
-client = DeepLakeClient(collection_name="ci_test")
-retriever = Retriever(vectordb=client)  # pass test collection
+config = get_config()
+
+test_path = Path(config['data']['data_path']) / 'vectordb/test_retriever'
+if os.path.exists(test_path):
+    shutil.rmtree(test_path)
+    print('Deleting test retriever: {}'.format(test_path))
+
+# client = DeepLakeClient(collection_name="test_retriever")
+# retriever = Retriever(vectordb=client)  # pass test collection
 
 doc = Document(page_content="Hello worlds", metadata={"source": "bars"})
 
 
-def test_retriver_id_gen():
+def test_retriever_id_gen():
+    client = DeepLakeClient(collection_name="test_retriever")
+    retriever = Retriever(vectordb=client)
     doc = Document(page_content="Hello world", metadata={"source": "bar"})
     id_ = retriever.id_gen(doc)
-    assert isinstance(id, str)
+    assert isinstance(id_, str)
     assert len(id_) == 32
     doc.page_content = doc.page_content + 'ABC'
     id_1 = retriever.id_gen(doc)
@@ -21,14 +33,18 @@ def test_retriver_id_gen():
     doc.metadata["source"] = "bars"
     id_1 = retriever.id_gen(doc)
     assert id_ != id_1
+    del client, retriever
 
 
 def test_retriever_gen_doc_ids():
+    client = DeepLakeClient(collection_name="test_retriever")
+    retriever = Retriever(vectordb=client)
     docs = [Document(page_content="Hello world", metadata={"source": "bar"}),
             Document(page_content="Hello", metadata={"source": "foo"})]
     ids = retriever.gen_doc_ids(docs)
     assert len(ids) == len(docs)
     assert all(isinstance(id, str) for id in ids)
+    del client, retriever
 
 
 def test_retriever_split_docs():
@@ -36,6 +52,8 @@ def test_retriever_split_docs():
 
 
 def test_retriever_add_docs():
+    client = DeepLakeClient(collection_name="test_retriever")
+    retriever = Retriever(vectordb=client)
     # small enough docs to not split.
     docs = [Document(page_content=
                      """And so on this rainbow day, with storms all around them, and blue sky
@@ -75,11 +93,11 @@ def test_retriever_add_docs():
             ]
     ids = retriever.gen_doc_ids(docs)
     retriever.add_docs(docs)
-    retrieved = retriever.store.mget(ids)
+    retrieved = retriever.docstore.mget(ids)
     assert len(retrieved) == len(ids)
-    for i, doc in enumerate(docs):
-        retrieved_doc = json.loads(retrieved[i].decode())
-        assert doc.metadata == retrieved_doc.metadata
+    for ret, doc in zip(retrieved, docs):
+        assert ret.metadata == doc.metadata
+    del client, retriever
 
 
 def test_retriever_aadd_docs():
diff --git a/src/tests/components/utils_test.py b/src/tests/components/utils_test.py
new file mode 100644
index 0000000..ddbca0f
--- /dev/null
+++ b/src/tests/components/utils_test.py
@@ -0,0 +1,8 @@
+import os
+
+from grag.components.utils import get_config
+
+
+def test_get_config():
+    config = get_config(load_env=True)
+    assert os.environ["HF_TOKEN"]
diff --git a/src/tests/components/vectordb/chroma_client_test.py b/src/tests/components/vectordb/chroma_client_test.py
index c491dfd..f07908c 100644
--- a/src/tests/components/vectordb/chroma_client_test.py
+++ b/src/tests/components/vectordb/chroma_client_test.py
@@ -9,6 +9,7 @@ def test_chroma_connection():
     chroma_client = ChromaClient()
     response = chroma_client.test_connection()
     assert isinstance(response, int)
+    del chroma_client
 
 
 def test_chroma_add_docs():
@@ -52,6 +53,7 @@ def test_chroma_add_docs():
     docs = [Document(page_content=doc) for doc in docs]
     chroma_client.add_docs(docs)
     assert len(chroma_client) == len(docs)
+    del chroma_client
 
 
 def test_chroma_aadd_docs():
@@ -96,6 +98,7 @@ def test_chroma_aadd_docs():
     loop = asyncio.get_event_loop()
     loop.run_until_complete(chroma_client.aadd_docs(docs))
     assert len(chroma_client) == len(docs)
+    del chroma_client
 
 
 chrome_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)]
@@ -122,6 +125,7 @@ def test_chroma_get_chunk(top_k, with_score):
         assert all(isinstance(doc[1], float) for doc in retrieved_chunks)
     else:
         assert all(isinstance(doc, Document) for doc in retrieved_chunks)
+    del chroma_client
 
 
 @pytest.mark.parametrize("top_k,with_score", chrome_get_chunk_params)
@@ -146,3 +150,4 @@ def test_chroma_aget_chunk(top_k, with_score):
         assert all(isinstance(doc[1], float) for doc in retrieved_chunks)
     else:
         assert all(isinstance(doc, Document) for doc in retrieved_chunks)
+    del chroma_client
diff --git a/src/tests/components/vectordb/deeplake_client_test.py b/src/tests/components/vectordb/deeplake_client_test.py
index cea5e61..70fcf31 100644
--- a/src/tests/components/vectordb/deeplake_client_test.py
+++ b/src/tests/components/vectordb/deeplake_client_test.py
@@ -1,9 +1,19 @@
 import asyncio
+import os
+import shutil
+from pathlib import Path
 
 import pytest
+from grag.components.utils import get_config
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 from langchain_core.documents import Document
 
+config = get_config()
+test_path = Path(config['data']['data_path']) / 'vectordb/test_client'
+if os.path.exists(test_path):
+    shutil.rmtree(test_path)
+    print('Deleting test retriever: {}'.format(test_path))
+
 
 def test_deeplake_add_docs():
     docs = [
@@ -40,7 +50,7 @@ def test_deeplake_add_docs():
     storm-clouds was split to the blinding zigzag of lightning, and the
     thunder rolled and boomed, like the Colorado in flood.""",
     ]
-    deeplake_client = DeepLakeClient(collection_name="test")
+    deeplake_client = DeepLakeClient(collection_name="test_client")
     if len(deeplake_client) > 0:
         deeplake_client.delete()
     docs = [Document(page_content=doc) for doc in docs]
@@ -49,7 +59,7 @@ def test_deeplake_add_docs():
     del deeplake_client
 
 
-def test_chroma_aadd_docs():
+def test_deeplake_aadd_docs():
     docs = [
         """And so on this rainbow day, with storms all around them, and blue sky
     above, they rode only as far as the valley. But from there, before they
@@ -84,7 +94,7 @@ def test_chroma_aadd_docs():
     storm-clouds was split to the blinding zigzag of lightning, and the
     thunder rolled and boomed, like the Colorado in flood.""",
     ]
-    deeplake_client = DeepLakeClient(collection_name="test")
+    deeplake_client = DeepLakeClient(collection_name="test_client")
     if len(deeplake_client) > 0:
         deeplake_client.delete()
     docs = [Document(page_content=doc) for doc in docs]
@@ -108,7 +118,7 @@ def test_deeplake_get_chunk(top_k, with_score):
     ankles from Joel Creech's lasso had never mended. The girl was
     unutterably happy, but it was possible that she would never race a
     horse again."""
-    deeplake_client = DeepLakeClient(collection_name="test", read_only=True)
+    deeplake_client = DeepLakeClient(collection_name="test_client", read_only=True)
     retrieved_chunks = deeplake_client.get_chunk(
         query=query, top_k=top_k, with_score=with_score
     )
@@ -132,7 +142,7 @@ def test_deeplake_aget_chunk(top_k, with_score):
     ankles from Joel Creech's lasso had never mended. The girl was
     unutterably happy, but it was possible that she would never race a
     horse again."""
-    deeplake_client = DeepLakeClient(collection_name="test", read_only=True)
+    deeplake_client = DeepLakeClient(collection_name="test_client", read_only=True)
     loop = asyncio.get_event_loop()
     retrieved_chunks = loop.run_until_complete(
         deeplake_client.aget_chunk(query=query, top_k=top_k, with_score=with_score)
diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py
index af0e9dd..68078fe 100644
--- a/src/tests/quantize/quantize_test.py
+++ b/src/tests/quantize/quantize_test.py
@@ -1,4 +1,5 @@
 import os
+import shutil
 from pathlib import Path
 
 from grag.quantize.utils import (
@@ -9,6 +10,8 @@
 )
 
 root_path = Path(__file__).parent / "test_data"
+if os.path.exists(root_path):
+    shutil.rmtree(root_path)
 os.makedirs(root_path, exist_ok=True)
 
 
diff --git a/src/tests/rag/basic_rag_test.py b/src/tests/rag/basic_rag_test.py
index 0b93643..1695e93 100644
--- a/src/tests/rag/basic_rag_test.py
+++ b/src/tests/rag/basic_rag_test.py
@@ -9,7 +9,8 @@
 
 
 def test_rag_stuff():
-    rag = BasicRAG(doc_chain="stuff", retriever=retriever)
+    rag = BasicRAG(doc_chain="stuff", retriever=retriever,
+                   llm_kwargs={"model_name": "Llama-2-7b-chat", "n_gpu_layers": "-1"})
     response, sources = rag("What is Flash Attention?")
     assert isinstance(response, Text)
     assert isinstance(sources, List)
@@ -18,7 +19,8 @@ def test_rag_stuff():
 
 
 def test_rag_refine():
-    rag = BasicRAG(doc_chain="refine", retriever=retriever)
+    rag = BasicRAG(doc_chain="refine", retriever=retriever,
+                   llm_kwargs={"model_name": "Llama-2-7b-chat", "n_gpu_layers": "-1"})
     response, sources = rag("What is Flash Attention?")
     assert isinstance(response, List)
     assert all(isinstance(s, str) for s in response)