Skip to content

Commit

Permalink
Merge pull request #67 from arjbingly/revert-66-tests
Browse files Browse the repository at this point in the history
Revert "CI Pipeline"
  • Loading branch information
arjbingly authored Apr 4, 2024
2 parents 73af8cd + 68d31be commit c74e0bd
Show file tree
Hide file tree
Showing 18 changed files with 50 additions and 115 deletions.
14 changes: 6 additions & 8 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ pipeline {
}
environment {
PYTHONPATH = "${env.WORKSPACE}/.venv/bin"
CUDACXX = '/usr/local/cuda-12/bin/nvcc'
CMAKE_ARGS = "-DLLAMA_CUBLAS=on"
PATH="/usr/local/cuda-12.3/bin:$PATH"
LD_LIBRARY_PATH="/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH"
}


Expand All @@ -31,14 +27,15 @@ pipeline {
stage('Install dependencies'){
steps {
withPythonEnv(PYTHONPATH){
sh 'pip install -e .'
sh "pip install -e ."
}
}

}

stage('Config'){
steps{
sh 'echo $env.JENKINS_HOME'
withPythonEnv(PYTHONPATH){
sh 'python3 ci/modify_config.py'
sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test'
Expand Down Expand Up @@ -87,21 +84,22 @@ pipeline {

stage('Tests'){
steps{
sh 'echo $USER'
sh 'docker pull chromadb/chroma'
sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma'
withPythonEnv(PYTHONPATH){
sh 'pip install pytest'
sh 'python3 ci/unlock_deeplake.py'
sh 'pytest src -vvv --junitxml=pytest-report.xml'
sh 'pytest src --junitxml=pytest-report.xml'
}
}
post {
always{
sh 'docker stop jenkins-chroma'
sh 'docker rm jenkins-chroma'
withChecks('Integration Tests'){
junit 'pytest-report.xml'
}
sh 'docker stop jenkins-chroma'
sh 'docker rm jenkins-chroma'

cleanWs(
cleanWhenNotBuilt: false,
Expand Down
7 changes: 0 additions & 7 deletions ci/env_test.py

This file was deleted.

1 change: 0 additions & 1 deletion ci/modify_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
config['root']['root_path'] = f'{workspace}'
config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data'
config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models'
config['env']['env_path'] = f'{jenkins_home}/env_file/.env'

with open(f'{workspace}/src/config.ini', 'w') as configfile:
config.write(configfile)
6 changes: 6 additions & 0 deletions ci/modify_config_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from grag.components.utils import get_config

config = get_config()
print(f"{config['root']['root_path']=}")
print(f"{config['data']['data_path'] = }")
print(f"{config['llm']['base_dir'] = }")
4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ dependencies = [
"huggingface_hub>=0.20.2",
"pydantic>=2.5.0",
"rouge-score>=0.1.2",
"deeplake>=3.8.27",
"bitsandbytes>=0.43.0",
"accelerate>=0.28.0"
"deeplake>=3.8.27"
]

[project.optional-dependencies]
Expand Down
7 changes: 2 additions & 5 deletions src/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ max_new_tokens : 1024
temperature : 0.1
n_batch_gpu_cpp : 1024
n_ctx_cpp : 6000
n_gpu_layers_cpp : -1
# The number of layers to put on the GPU. Mixtral-18, gemma-20
n_gpu_layers_cpp : 16
# The number of layers to put on the GPU. Mixtral-18
std_out : True
base_dir : ${root:root_path}/models

Expand Down Expand Up @@ -58,9 +58,6 @@ table_as_html : True
[data]
data_path : ${root:root_path}/data

[env]
env_path : ${root:root_path}/.env

[root]
root_path : /home/ubuntu/volume_2k/Capstone_5

Expand Down
8 changes: 5 additions & 3 deletions src/grag/components/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

import torch
from dotenv import load_dotenv
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
Expand All @@ -17,7 +18,7 @@

from .utils import get_config

llm_conf = get_config(load_env=True)["llm"]
llm_conf = get_config()["llm"]

print("CUDA: ", torch.cuda.is_available())

Expand Down Expand Up @@ -116,8 +117,9 @@ def hf_pipeline(self, is_local=False):
)
except OSError: # LocalTokenNotFoundError:
# If loading fails due to an auth token error, then load the token and retry
# load_dotenv()
if not os.getenv("HF_TOKEN"):
load_dotenv()
auth_token = os.getenv("AUTH_TOKEN")
if not auth_token:
raise ValueError("Authentication token not provided.")
tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True)
model = AutoModelForCausalLM.from_pretrained(
Expand Down
1 change: 0 additions & 1 deletion src/grag/components/multivec_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def __init__(
byte_store=self.store, # type: ignore
id_key=self.id_key,
)
self.docstore = self.retriever.docstore
self.splitter = TextSplitter()
self.top_k: int = top_k
self.retriever.search_kwargs = {"k": self.top_k}
Expand Down
15 changes: 4 additions & 11 deletions src/grag/components/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pathlib import Path
from typing import List

from dotenv import load_dotenv
from langchain_core.documents import Document


Expand Down Expand Up @@ -43,15 +42,15 @@ def find_config_path(current_path: Path) -> Path:
Raises:
FileNotFoundError: If 'config.ini' cannot be found in any of the parent directories.
"""
config_path = Path("config.ini")
config_path = Path("src/config.ini")
while not (current_path / config_path).exists():
current_path = current_path.parent
if current_path == current_path.parent:
raise FileNotFoundError(f"config.ini not found in {config_path}.")
return current_path / config_path


def get_config(load_env=False) -> ConfigParser:
def get_config() -> ConfigParser:
"""Retrieves and parses the configuration settings from the 'config.ini' file.
This function locates the 'config.ini' file by calling `find_config_path` using the script's current location.
Expand All @@ -68,15 +67,9 @@ def get_config(load_env=False) -> ConfigParser:
else:
config_path = find_config_path(script_location)
os.environ["CONFIG_PATH"] = str(config_path)

print(f"Loaded config from {config_path}.")
# Initialize parser and read config
config = ConfigParser(interpolation=ExtendedInterpolation())
config.read(config_path)
print(f"Loaded config from {config_path}.")
# load_dotenv(config['env']['env_path'])
if load_env:
env_path = Path(config['env']['env_path'])
if env_path.exists():
load_dotenv(env_path)
print(f"Loaded environment variables from {env_path}")

return config
4 changes: 2 additions & 2 deletions src/grag/quantize/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
os.chdir(f"{root_path}/llama.cpp/")
try:
subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL)
subprocess.run(["make", "LLAMA_CUDA=1"], check=True)
subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True)
print("Llama.cpp build successful.")
except subprocess.CalledProcessError:
try:
Expand All @@ -64,7 +64,7 @@ def building_llamacpp(root_path: Union[str, Path]) -> None:
"&&",
"cmake",
"..",
"-DLLAMA_CUDA=ON",
"-DLLAMA_CUBLAS=ON",
"&&",
"cmake",
"--build",
Expand Down
1 change: 0 additions & 1 deletion src/tests/components/embedding_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,3 @@ def test_embeddings(embedding_config):
cosine_similarity(doc_vecs[0], doc_vecs[2]),
]
assert similarity_scores[0] > similarity_scores[1]
del embedding
19 changes: 8 additions & 11 deletions src/tests/components/llm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,21 @@

import pytest
from grag.components.llm import LLM
from grag.components.utils import get_config

config = get_config(load_env=True)

llama_models = [
"Llama-2-7b-chat",
"Llama-2-13b-chat",
"gemma-7b-it",
"Mixtral-8x7B-Instruct-v0.1",
"gemma-7b-it",
]
hf_models = [
"meta-llama/Llama-2-7b-chat-hf",
"meta-llama/Llama-2-13b-chat-hf",
# 'mistralai/Mixtral-8x7B-Instruct-v0.1',
"google/gemma-7b-it",
]
cpp_quantization = ["Q5_K_M", "Q5_K_M", "f16", "Q4_K_M"]
gpu_layers = ['-1', '-1', '18', '16']
hf_quantization = ["Q8", "Q4", "Q4"]
cpp_quantization = ["Q5_K_M", "Q5_K_M", "Q4_K_M", "f16"]
hf_quantization = ["Q8", "Q4", "Q4"] # , 'Q4']
params = [(model, quant) for model, quant in zip(hf_models, hf_quantization)]


Expand All @@ -32,12 +29,12 @@ def test_hf_web_pipe(hf_models, quantization):
del model


params = [(model, gpu_layer, quant) for model, gpu_layer, quant in zip(llama_models, gpu_layers, cpp_quantization)]
params = [(model, quant) for model, quant in zip(llama_models, cpp_quantization)]


@pytest.mark.parametrize("model_name, gpu_layer, quantization", params)
def test_llamacpp_pipe(model_name, gpu_layer, quantization):
llm_ = LLM(quantization=quantization, model_name=model_name, n_gpu_layers=gpu_layer, pipeline="llama_cpp")
@pytest.mark.parametrize("model_name, quantization", params)
def test_llamacpp_pipe(model_name, quantization):
llm_ = LLM(quantization=quantization, model_name=model_name, pipeline="llama_cpp")
model = llm_.load_model()
response = model.invoke("Who are you?")
assert isinstance(response, Text)
Expand Down
36 changes: 9 additions & 27 deletions src/tests/components/multivec_retriever_test.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,41 @@
import os
import shutil
from pathlib import Path
import json

from grag.components.multivec_retriever import Retriever
from grag.components.utils import get_config
from grag.components.vectordb.deeplake_client import DeepLakeClient
from langchain_core.documents import Document

config = get_config()

test_path = Path(config['data']['data_path']) / 'vectordb/test_retriever'
if os.path.exists(test_path):
shutil.rmtree(test_path)
print('Deleting test retriever: {}'.format(test_path))

# client = DeepLakeClient(collection_name="test_retriever")
# retriever = Retriever(vectordb=client) # pass test collection
client = DeepLakeClient(collection_name="ci_test")
retriever = Retriever(vectordb=client) # pass test collection

doc = Document(page_content="Hello worlds", metadata={"source": "bars"})


def test_retriever_id_gen():
client = DeepLakeClient(collection_name="test_retriever")
retriever = Retriever(vectordb=client)
def test_retriver_id_gen():
doc = Document(page_content="Hello world", metadata={"source": "bar"})
id_ = retriever.id_gen(doc)
assert isinstance(id_, str)
assert isinstance(id, str)
assert len(id_) == 32
doc.page_content = doc.page_content + 'ABC'
id_1 = retriever.id_gen(doc)
assert id_ == id_1
doc.metadata["source"] = "bars"
id_1 = retriever.id_gen(doc)
assert id_ != id_1
del client, retriever


def test_retriever_gen_doc_ids():
client = DeepLakeClient(collection_name="test_retriever")
retriever = Retriever(vectordb=client)
docs = [Document(page_content="Hello world", metadata={"source": "bar"}),
Document(page_content="Hello", metadata={"source": "foo"})]
ids = retriever.gen_doc_ids(docs)
assert len(ids) == len(docs)
assert all(isinstance(id, str) for id in ids)
del client, retriever


def test_retriever_split_docs():
pass


def test_retriever_add_docs():
client = DeepLakeClient(collection_name="test_retriever")
retriever = Retriever(vectordb=client)
# small enough docs to not split.
docs = [Document(page_content=
"""And so on this rainbow day, with storms all around them, and blue sky
Expand Down Expand Up @@ -93,11 +75,11 @@ def test_retriever_add_docs():
]
ids = retriever.gen_doc_ids(docs)
retriever.add_docs(docs)
retrieved = retriever.docstore.mget(ids)
retrieved = retriever.store.mget(ids)
assert len(retrieved) == len(ids)
for ret, doc in zip(retrieved, docs):
assert ret.metadata == doc.metadata
del client, retriever
for i, doc in enumerate(docs):
retrieved_doc = json.loads(retrieved[i].decode())
assert doc.metadata == retrieved_doc.metadata


def test_retriever_aadd_docs():
Expand Down
8 changes: 0 additions & 8 deletions src/tests/components/utils_test.py

This file was deleted.

5 changes: 0 additions & 5 deletions src/tests/components/vectordb/chroma_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ def test_chroma_connection():
chroma_client = ChromaClient()
response = chroma_client.test_connection()
assert isinstance(response, int)
del chroma_client


def test_chroma_add_docs():
Expand Down Expand Up @@ -53,7 +52,6 @@ def test_chroma_add_docs():
docs = [Document(page_content=doc) for doc in docs]
chroma_client.add_docs(docs)
assert len(chroma_client) == len(docs)
del chroma_client


def test_chroma_aadd_docs():
Expand Down Expand Up @@ -98,7 +96,6 @@ def test_chroma_aadd_docs():
loop = asyncio.get_event_loop()
loop.run_until_complete(chroma_client.aadd_docs(docs))
assert len(chroma_client) == len(docs)
del chroma_client


chrome_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)]
Expand All @@ -125,7 +122,6 @@ def test_chroma_get_chunk(top_k, with_score):
assert all(isinstance(doc[1], float) for doc in retrieved_chunks)
else:
assert all(isinstance(doc, Document) for doc in retrieved_chunks)
del chroma_client


@pytest.mark.parametrize("top_k,with_score", chrome_get_chunk_params)
Expand All @@ -150,4 +146,3 @@ def test_chroma_aget_chunk(top_k, with_score):
assert all(isinstance(doc[1], float) for doc in retrieved_chunks)
else:
assert all(isinstance(doc, Document) for doc in retrieved_chunks)
del chroma_client
Loading

0 comments on commit c74e0bd

Please sign in to comment.