Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI, JenkinsFile #65

Merged
merged 48 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
0216f36
mypy modifications
sanchitvj Mar 31, 2024
33d553f
Jenkisfile, CI
sanchitvj Mar 31, 2024
24f22da
issue with ci
sanchitvj Mar 31, 2024
15e904f
jenkinsfile error
sanchitvj Mar 31, 2024
1b58f91
virtual environment issue
sanchitvj Mar 31, 2024
6eb78ee
grag installation
sanchitvj Mar 31, 2024
e683a1a
rewriting config.ini
sanchitvj Mar 31, 2024
8ad88fb
model path modified
sanchitvj Mar 31, 2024
c8e48e6
.
sanchitvj Mar 31, 2024
676169a
installations
sanchitvj Mar 31, 2024
88b3e60
.
sanchitvj Mar 31, 2024
43122a1
modified jenkisfile
sanchitvj Mar 31, 2024
c7be5f1
Jenkins declarative checkout
arjbingly Apr 1, 2024
0a3f43c
Unstructured deps
arjbingly Apr 1, 2024
749e02f
Declarative checkout scm
arjbingly Apr 1, 2024
ed5d893
Workspace cleanup
arjbingly Apr 1, 2024
d3611c4
Python env jenkins
arjbingly Apr 1, 2024
974a9fb
Python env jenkins
arjbingly Apr 1, 2024
076ebad
failing build because of environment
sanchitvj Apr 1, 2024
5d0caa4
.
sanchitvj Apr 1, 2024
1b6ebe6
.
sanchitvj Apr 1, 2024
06bb9e5
.
sanchitvj Apr 1, 2024
31ef1fb
.
sanchitvj Apr 1, 2024
75152d8
.
sanchitvj Apr 1, 2024
bc64eaa
.
sanchitvj Apr 1, 2024
aa63b16
changed environment, clean added
sanchitvj Apr 1, 2024
ce0fd6c
resolved multivector retriever, added chroma as docker to Jenkins
sanchitvj Apr 1, 2024
8011aba
excluded ci, and docs from ruff check
sanchitvj Apr 1, 2024
b597720
added .venv to exclude form ruff
sanchitvj Apr 1, 2024
6d7a5bc
.
sanchitvj Apr 1, 2024
c1e43db
.
sanchitvj Apr 1, 2024
4f45713
docker
sanchitvj Apr 1, 2024
ddcd752
-detach before -port
sanchitvj Apr 1, 2024
fcc4b68
toml issue
sanchitvj Apr 1, 2024
54d011e
.
sanchitvj Apr 1, 2024
c862020
.
sanchitvj Apr 1, 2024
7b63ef9
.
sanchitvj Apr 1, 2024
7fc8850
.
sanchitvj Apr 1, 2024
0f113cb
.
sanchitvj Apr 1, 2024
be1991c
.
sanchitvj Apr 1, 2024
7e60085
.
sanchitvj Apr 1, 2024
d4c3249
.
sanchitvj Apr 1, 2024
ca7be65
.
sanchitvj Apr 1, 2024
b0d0455
Jenkisfile, CI
sanchitvj Apr 1, 2024
a5952c3
Merge branch 'tests' of https://github.com/arjbingly/Capstone_5 into …
sanchitvj Apr 1, 2024
20c325d
Merge branch 'tests' of https://github.com/arjbingly/Capstone_5 into …
sanchitvj Apr 1, 2024
b350478
Jenkins pipeline working
sanchitvj Apr 1, 2024
bfa94ff
Merge branch 'tests' of https://github.com/arjbingly/Capstone_5 into …
sanchitvj Apr 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
pipeline {
agent any

options{
skipDefaultCheckout(true)
}
environment {
PYTHONPATH = "${env.WORKSPACE}/.venv/bin"
}


stages {

stage('Checkout') {
steps {
cleanWs()
checkout scm
}
}

stage('Create venv'){
steps {
sh 'python3 -m venv .venv'
}
}

stage('Install dependencies'){
steps {
withPythonEnv(PYTHONPATH){
sh "pip install -e ."
}
}

}

stage('Config'){
steps{
sh 'echo $env.JENKINS_HOME'
withPythonEnv(PYTHONPATH){
sh 'python3 ci/modify_config.py'
sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test'
sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $JENKINS_HOME/ci_test_data/data/vectordb'
}
}
}

stage('Linting'){
steps {
withPythonEnv(PYTHONPATH){
sh 'pip install ruff'
catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){
sh 'ruff check . --exclude .pyenv-var-lib-jenkins-workspace-capstone_5-.venv-bin --output-format junit -o ruff-report.xml'
sh 'ruff format .'
}
}
}
post {
always{
withChecks('Lint Checks'){
junit 'ruff-report.xml'
}
}
}
}

stage('Static type check'){
steps {
withPythonEnv(PYTHONPATH){
sh 'pip install mypy'
catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){
sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml'
}
}
}
post {
always{
withChecks('Static Type Checks'){
junit 'mypy-report.xml'
}

}
}
}

stage('Tests'){
steps{
sh 'echo $USER'
sh 'docker pull chromadb/chroma'
sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma'
withPythonEnv(PYTHONPATH){
sh 'pip install pytest'
sh 'python3 ci/unlock_deeplake.py'
sh 'pytest src --junitxml=pytest-report.xml'
}
}
post {
always{
withChecks('Integration Tests'){
junit 'pytest-report.xml'
}
sh 'docker stop jenkins-chroma'
sh 'docker rm jenkins-chroma'

cleanWs(
cleanWhenNotBuilt: false,
deleteDirs: true,
disableDeferredWipeout: true,
notFailBuild: true,
patterns: [[pattern: '.gitignore', type: 'INCLUDE'],
[pattern: '.propsfile', type: 'EXCLUDE']]
)
}
}
}

}
}
17 changes: 17 additions & 0 deletions ci/modify_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import configparser
import os

from grag.components.utils import get_config

config = configparser.ConfigParser()

workspace = os.getenv('WORKSPACE')
jenkins_home = os.getenv('JENKINS_HOME')

config = get_config()
config['root']['root_path'] = f'{workspace}'
config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data'
config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models'

with open(f'{workspace}/src/config.ini', 'w') as configfile:
config.write(configfile)
6 changes: 6 additions & 0 deletions ci/modify_config_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from grag.components.utils import get_config

config = get_config()
print(f"{config['root']['root_path']=}")
print(f"{config['data']['data_path'] = }")
print(f"{config['llm']['base_dir'] = }")
11 changes: 11 additions & 0 deletions ci/unlock_deeplake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os
import shutil
from pathlib import Path

jenkins_home = os.getenv('JENKINS_HOME')

lock_path = Path(jenkins_home) / 'ci_test_data/data/vectordb/ci_test/dataset_lock.lock'

if os.path.exists(lock_path):
shutil.rmtree(lock_path)
print('Deleting lock file: {}'.format(lock_path))
8 changes: 5 additions & 3 deletions cookbook/Basic-RAG/BasicRAG_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from grag.components.multivec_retriever import Retriever
from grag.components.vectordb.deeplake_client import DeepLakeClient

client = DeepLakeClient(collection_name="test")
retriever = Retriever(vectordb=client)
# from grag.components.vectordb.chroma_client import ChromaClient

dir_path = Path(__file__).parents[2] / "data/client_test/test/"
client = DeepLakeClient(collection_name="ci_test")
# client = ChromaClient(collection_name="ci_test")
retriever = Retriever(vectordb=client)

dir_path = Path(__file__).parents[2] / "data/test/pdfs/new_papers"
retriever.ingest(dir_path)
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ dependencies = [
"sentence-transformers==2.2.2",
"instructorembedding>=1.0.1",
"streamlit>=1.31.1",
"unstructured>=0.12.3",
"unstructured[pdf]>=0.12.3",
"pdfplumber>=0.10.3",
"llama-cpp-python>=0.2.43",
"tqdm>=4.65.0",
Expand Down Expand Up @@ -102,7 +102,7 @@ exclude_lines = [
[tool.ruff]
line-length = 88
indent-width = 4
extend-exclude = ["tests", "others"]
extend-exclude = ["tests", "others", "docs", "ci"]

[tool.ruff.lint]
select = ["E4", "E7", "E9", "F", "I", "D"]
Expand Down
2 changes: 1 addition & 1 deletion src/grag/components/multivec_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
self.store = LocalFileStore(self.store_path)
self.retriever = MultiVectorRetriever(
vectorstore=self.vectordb.langchain_client,
docstore=self.store, # type: ignore
byte_store=self.store, # type: ignore
id_key=self.id_key,
)
self.splitter = TextSplitter()
Expand Down
6 changes: 3 additions & 3 deletions src/grag/components/vectordb/chroma_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class ChromaClient(VectorDB):

def __init__(
self,
host: int = chroma_conf["host"],
port: int = chroma_conf["port"],
host: str = chroma_conf["host"],
port: str = chroma_conf["port"],
collection_name: str = chroma_conf["collection_name"],
embedding_type: str = chroma_conf["embedding_type"],
embedding_model: str = chroma_conf["embedding_model"],
Expand All @@ -69,7 +69,7 @@ def __init__(
embedding_model=self.embedding_model, embedding_type=self.embedding_type
).embedding_function

self.client = chromadb.HttpClient(host=self.host, port=self.port)
self.client = chromadb.HttpClient(host=self.host, port=self.port) # type: ignore
self.collection = self.client.get_or_create_collection(
name=self.collection_name
)
Expand Down
4 changes: 3 additions & 1 deletion src/tests/components/multivec_retriever_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import json

from grag.components.multivec_retriever import Retriever
from grag.components.vectordb.deeplake_client import DeepLakeClient
from langchain_core.documents import Document

retriever = Retriever() # pass test collection
client = DeepLakeClient(collection_name="ci_test")
retriever = Retriever(vectordb=client) # pass test collection

doc = Document(page_content="Hello worlds", metadata={"source": "bars"})

Expand Down
2 changes: 1 addition & 1 deletion src/tests/rag/basic_rag_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from grag.components.vectordb.deeplake_client import DeepLakeClient
from grag.rag.basic_rag import BasicRAG

client = DeepLakeClient(collection_name="test")
client = DeepLakeClient(collection_name="ci_test")
retriever = Retriever(vectordb=client)


Expand Down