Jenkins CI

arjbingly · Apr 4, 2024 · 555e39c · 555e39c
1 parent 03a421b
commit 555e39c
Show file tree

Hide file tree

Showing 20 changed files with 267 additions and 47 deletions.
diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile
@@ -0,0 +1,119 @@
+pipeline {
+    agent any
+
+    options{
+        skipDefaultCheckout(true)
+    }
+    environment {
+        PYTHONPATH = "${env.WORKSPACE}/.venv/bin"
+        CUDACXX = '/usr/local/cuda-12/bin/nvcc'
+        CMAKE_ARGS = "-DLLAMA_CUBLAS=on"
+        PATH="/usr/local/cuda-12.3/bin:$PATH"
+        LD_LIBRARY_PATH="/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH"
+        }
+
+
+    stages {
+
+        stage('Checkout') {
+            steps {
+                    cleanWs()
+                    checkout scm
+            }
+        }
+
+        stage('Create venv'){
+            steps {
+                sh 'python3 -m venv .venv'
+            }
+        }
+
+        stage('Install dependencies'){
+            steps {
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install -e .'
+                }
+            }
+
+        }
+
+        stage('Config'){
+            steps{
+                withPythonEnv(PYTHONPATH){
+                    sh 'python3 ci/modify_config.py'
+                    sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test'
+                    sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $JENKINS_HOME/ci_test_data/data/vectordb'
+                }
+            }
+        }
+
+        stage('Linting'){
+            steps {
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install ruff'
+                    catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){
+                        sh 'ruff check . --exclude .pyenv-var-lib-jenkins-workspace-capstone_5-.venv-bin --output-format junit -o ruff-report.xml'
+                        sh 'ruff format .'
+                    }
+                }
+            }
+            post {
+                always{
+                    withChecks('Lint Checks'){
+                         junit 'ruff-report.xml'
+                     }
+                }
+            }
+        }
+
+        stage('Static type check'){
+            steps {
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install mypy'
+                    catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){
+                        sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml'
+                    }
+                }
+            }
+            post {
+                always{
+                     withChecks('Static Type Checks'){
+                         junit 'mypy-report.xml'
+                     }
+
+                }
+            }
+        }
+
+        stage('Tests'){
+            steps{
+                sh 'docker pull chromadb/chroma'
+                sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma'
+                withPythonEnv(PYTHONPATH){
+                    sh 'pip install pytest'
+                    sh 'python3 ci/unlock_deeplake.py'
+                    sh 'pytest src -vvv --junitxml=pytest-report.xml'
+                }
+            }
+            post {
+                always{
+                    sh 'docker stop jenkins-chroma'
+                    sh 'docker rm jenkins-chroma'
+                     withChecks('Integration Tests'){
+                        junit 'pytest-report.xml'
+                     }
+
+                     cleanWs(
+                        cleanWhenNotBuilt: false,
+                        deleteDirs: true,
+                        disableDeferredWipeout: true,
+                        notFailBuild: true,
+                        patterns: [[pattern: '.gitignore', type: 'INCLUDE'],
+                                   [pattern: '.propsfile', type: 'EXCLUDE']]
+                             )
+                }
+            }
+        }
+
+    }
+}
diff --git a/ci/env_test.py b/ci/env_test.py
@@ -0,0 +1,7 @@
+import os
+
+from grag.components.utils import get_config
+
+get_config(load_env=True)
+
+print(os.environ['HF_TOKEN'])
diff --git a/ci/modify_config.py b/ci/modify_config.py
@@ -0,0 +1,18 @@
+import configparser
+import os
+
+from grag.components.utils import get_config
+
+config = configparser.ConfigParser()
+
+workspace = os.getenv('WORKSPACE')
+jenkins_home = os.getenv('JENKINS_HOME')
+
+config = get_config()
+config['root']['root_path'] = f'{workspace}'
+config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data'
+config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models'
+config['env']['env_path'] = f'{jenkins_home}/env_file/.env'
+
+with open(f'{workspace}/src/config.ini', 'w') as configfile:
+    config.write(configfile)
diff --git a/ci/unlock_deeplake.py b/ci/unlock_deeplake.py
@@ -0,0 +1,11 @@
+import os
+import shutil
+from pathlib import Path
+
+jenkins_home = os.getenv('JENKINS_HOME')
+
+lock_path = Path(jenkins_home) / 'ci_test_data/data/vectordb/ci_test/dataset_lock.lock'
+
+if os.path.exists(lock_path):
+    shutil.rmtree(lock_path)
+    print('Deleting lock file: {}'.format(lock_path))
diff --git a/cookbook/Basic-RAG/BasicRAG_ingest.py b/cookbook/Basic-RAG/BasicRAG_ingest.py
@@ -5,9 +5,11 @@
 from grag.components.multivec_retriever import Retriever
 from grag.components.vectordb.deeplake_client import DeepLakeClient
 
-client = DeepLakeClient(collection_name="test")
-retriever = Retriever(vectordb=client)
+# from grag.components.vectordb.chroma_client import ChromaClient
 
-dir_path = Path(__file__).parents[2] / "data/client_test/test/"
+client = DeepLakeClient(collection_name="ci_test")
+# client = ChromaClient(collection_name="ci_test")
+retriever = Retriever(vectordb=client)
 
+dir_path = Path(__file__).parents[2] / "data/test/pdfs/new_papers"
 retriever.ingest(dir_path)
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,14 +35,16 @@ dependencies = [
     "sentence-transformers==2.2.2",
     "instructorembedding>=1.0.1",
     "streamlit>=1.31.1",
-    "unstructured>=0.12.3",
+    "unstructured[pdf]>=0.12.3",
     "pdfplumber>=0.10.3",
     "llama-cpp-python>=0.2.43",
     "tqdm>=4.65.0",
     "huggingface_hub>=0.20.2",
     "pydantic>=2.5.0",
     "rouge-score>=0.1.2",
-    "deeplake>=3.8.27"
+    "deeplake>=3.8.27",
+    "bitsandbytes>=0.43.0",
+    "accelerate>=0.28.0"
 ]
 
 [project.optional-dependencies]
@@ -112,7 +114,7 @@ exclude_lines = [
 [tool.ruff]
 line-length = 88
 indent-width = 4
-extend-exclude = ["tests", "others"]
+extend-exclude = ["tests", "others", "docs", "ci"]
 
 [tool.ruff.lint]
 select = ["E4", "E7", "E9", "F", "I", "D"]

diff --git a/src/config.ini b/src/config.ini
@@ -9,8 +9,8 @@ max_new_tokens : 1024
 temperature : 0.1
 n_batch_gpu_cpp : 1024
 n_ctx_cpp : 6000
-n_gpu_layers_cpp : 16
-# The number of layers to put on the GPU. Mixtral-18
+n_gpu_layers_cpp : -1
+# The number of layers to put on the GPU. Mixtral-18, gemma-20
 std_out : True
 base_dir : ${root:root_path}/models
 
@@ -58,6 +58,9 @@ table_as_html : True
 [data]
 data_path : ${root:root_path}/data
 
+[env]
+env_path : ${root:root_path}/.env
+
 [root]
 root_path : /home/ubuntu/volume_2k/Capstone_5
 

diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py
@@ -4,7 +4,6 @@
 from pathlib import Path
 
 import torch
-from dotenv import load_dotenv
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain_community.llms import LlamaCpp
@@ -18,7 +17,7 @@
 
 from .utils import get_config
 
-llm_conf = get_config()["llm"]
+llm_conf = get_config(load_env=True)["llm"]
 
 print("CUDA: ", torch.cuda.is_available())
 
@@ -117,9 +116,8 @@ def hf_pipeline(self, is_local=False):
             )
         except OSError:  # LocalTokenNotFoundError:
             # If loading fails due to an auth token error, then load the token and retry
-            load_dotenv()
-            auth_token = os.getenv("AUTH_TOKEN")
-            if not auth_token:
+            # load_dotenv()
+            if not os.getenv("HF_TOKEN"):
                 raise ValueError("Authentication token not provided.")
             tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True)
             model = AutoModelForCausalLM.from_pretrained(

diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
@@ -75,9 +75,10 @@ def __init__(
         self.store = LocalFileStore(self.store_path)
         self.retriever = MultiVectorRetriever(
             vectorstore=self.vectordb.langchain_client,
-            docstore=self.store,  # type: ignore
+            byte_store=self.store,  # type: ignore
             id_key=self.id_key,
         )
+        self.docstore = self.retriever.docstore
         self.splitter = TextSplitter()
         self.top_k: int = top_k
         self.retriever.search_kwargs = {"k": self.top_k}

diff --git a/src/grag/components/utils.py b/src/grag/components/utils.py
@@ -12,6 +12,7 @@
 from pathlib import Path
 from typing import List
 
+from dotenv import load_dotenv
 from langchain_core.documents import Document
 
 
@@ -42,15 +43,15 @@ def find_config_path(current_path: Path) -> Path:
     Raises:
         FileNotFoundError: If 'config.ini' cannot be found in any of the parent directories.
     """
-    config_path = Path("src/config.ini")
+    config_path = Path("config.ini")
     while not (current_path / config_path).exists():
         current_path = current_path.parent
         if current_path == current_path.parent:
             raise FileNotFoundError(f"config.ini not found in {config_path}.")
     return current_path / config_path
 
 
-def get_config() -> ConfigParser:
+def get_config(load_env=False) -> ConfigParser:
     """Retrieves and parses the configuration settings from the 'config.ini' file.
 
     This function locates the 'config.ini' file by calling `find_config_path` using the script's current location.
@@ -67,9 +68,15 @@ def get_config() -> ConfigParser:
     else:
         config_path = find_config_path(script_location)
         os.environ["CONFIG_PATH"] = str(config_path)
-    print(f"Loaded config from {config_path}.")
+
     # Initialize parser and read config
     config = ConfigParser(interpolation=ExtendedInterpolation())
     config.read(config_path)
-
+    print(f"Loaded config from {config_path}.")
+    # load_dotenv(config['env']['env_path'])
+    if load_env:
+        env_path = Path(config['env']['env_path'])
+        if env_path.exists():
+            load_dotenv(env_path)
+            print(f"Loaded environment variables from {env_path}")
     return config
diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py
@@ -44,8 +44,8 @@ class ChromaClient(VectorDB):
 
     def __init__(
         self,
-        host: int = chroma_conf["host"],
-        port: int = chroma_conf["port"],
+        host: str = chroma_conf["host"],
+        port: str = chroma_conf["port"],
         collection_name: str = chroma_conf["collection_name"],
         embedding_type: str = chroma_conf["embedding_type"],
         embedding_model: str = chroma_conf["embedding_model"],
@@ -69,7 +69,7 @@ def __init__(
             embedding_model=self.embedding_model, embedding_type=self.embedding_type
         ).embedding_function
 
-        self.client = chromadb.HttpClient(host=self.host, port=self.port)
+        self.client = chromadb.HttpClient(host=self.host, port=self.port)  # type: ignore
         self.collection = self.client.get_or_create_collection(
             name=self.collection_name
         )

diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py
@@ -51,7 +51,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
     os.chdir(f"{root_path}/llama.cpp/")
     try:
         subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
-        subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
+        subprocess.run(["make", "LLAMA_CUDA=1"], check=True)
         print("Llama.cpp build successful.")
     except subprocess.CalledProcessError:
         try:
@@ -64,7 +64,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
                     "&&",
                     "cmake",
                     "..",
-                    "-DLLAMA_CUBLAS=ON",
+                    "-DLLAMA_CUDA=ON",
                     "&&",
                     "cmake",
                     "--build",

diff --git a/src/tests/components/embedding_test.py b/src/tests/components/embedding_test.py
@@ -47,3 +47,4 @@ def test_embeddings(embedding_config):
             cosine_similarity(doc_vecs[0], doc_vecs[2]),
         ]
     assert similarity_scores[0] > similarity_scores[1]
+    del embedding
diff --git a/src/tests/components/llm_test.py b/src/tests/components/llm_test.py
@@ -2,21 +2,24 @@
 
 import pytest
 from grag.components.llm import LLM
+from grag.components.utils import get_config
+
+config = get_config(load_env=True)
 
 llama_models = [
     "Llama-2-7b-chat",
     "Llama-2-13b-chat",
-    "Mixtral-8x7B-Instruct-v0.1",
     "gemma-7b-it",
+    "Mixtral-8x7B-Instruct-v0.1",
 ]
 hf_models = [
     "meta-llama/Llama-2-7b-chat-hf",
     "meta-llama/Llama-2-13b-chat-hf",
-    # 'mistralai/Mixtral-8x7B-Instruct-v0.1',
     "google/gemma-7b-it",
 ]
-cpp_quantization = ["Q5_K_M", "Q5_K_M", "Q4_K_M", "f16"]
-hf_quantization = ["Q8", "Q4", "Q4"]  # , 'Q4']
+cpp_quantization = ["Q5_K_M", "Q5_K_M", "f16", "Q4_K_M"]
+gpu_layers = ['-1', '-1', '18', '16']
+hf_quantization = ["Q8", "Q4", "Q4"]
 params = [(model, quant) for model, quant in zip(hf_models, hf_quantization)]
 
 
@@ -29,12 +32,12 @@ def test_hf_web_pipe(hf_models, quantization):
     del model
 
 
-params = [(model, quant) for model, quant in zip(llama_models, cpp_quantization)]
+params = [(model, gpu_layer, quant) for model, gpu_layer, quant in zip(llama_models, gpu_layers, cpp_quantization)]
 
 
-@pytest.mark.parametrize("model_name, quantization", params)
-def test_llamacpp_pipe(model_name, quantization):
-    llm_ = LLM(quantization=quantization, model_name=model_name, pipeline="llama_cpp")
+@pytest.mark.parametrize("model_name, gpu_layer, quantization", params)
+def test_llamacpp_pipe(model_name, gpu_layer, quantization):
+    llm_ = LLM(quantization=quantization, model_name=model_name, n_gpu_layers=gpu_layer, pipeline="llama_cpp")
     model = llm_.load_model()
     response = model.invoke("Who are you?")
     assert isinstance(response, Text)