From 0216f36d8ae4bdb523408440c2f61aefd86a7213 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 17:43:04 -0400 Subject: [PATCH 01/45] mypy modifications --- src/grag/components/vectordb/chroma_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index d33e27c..9db9985 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -44,8 +44,8 @@ class ChromaClient(VectorDB): def __init__( self, - host: int = chroma_conf["host"], - port: int = chroma_conf["port"], + host: str = chroma_conf["host"], + port: str = chroma_conf["port"], collection_name: str = chroma_conf["collection_name"], embedding_type: str = chroma_conf["embedding_type"], embedding_model: str = chroma_conf["embedding_model"], From 33d553fa89fa52cb2a76ee3b88c7ae989b47ba60 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:03:12 -0400 Subject: [PATCH 02/45] Jenkisfile, CI --- ci/Jenkinsfile | 91 ++++++++++++++++++++++++++++++++++++++++ ci/modify_config.py | 13 ++++++ ci/modify_config_test.py | 6 +++ 3 files changed, 110 insertions(+) create mode 100644 ci/Jenkinsfile create mode 100644 ci/modify_config.py create mode 100644 ci/modify_config_test.py diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile new file mode 100644 index 0000000..98575d0 --- /dev/null +++ b/ci/Jenkinsfile @@ -0,0 +1,91 @@ +Jenkinsfile (Declarative Pipeline) +pipeline { + agent any + + stages { + + stage('Checkout') { + steps { + checkout scmGit( + branches: [[name: 'tests']], + extensions: [], + userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] + ) + } + } + + stage('Create venv'){ + steps { + sh 'python3 -m venv .venv' + sh 'source .venv/bin/activate' + } + + } + + stage('Install dependencies'){ + steps { + sh "pip install ." + } + + } + + stage('Config'){ + steps{ + sh 'source .venv/bin/activate' + sh 'python3 ci/modify_config.py' + sh 'python3 ci/modify_config_test.py' + } + } + + stage('Linting'){ + steps { + sh 'source .venv/bin/activate' + sh 'pip install ruff' + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ + sh '$JENKINS_HOME/.local/bin/ruff check --fix --output-format junit -o ruff-report.xml' + sh '$JENKINS_HOME/.local/bin/ruff format' + } + } + post { + always{ + withChecks('Lint Checks'){ + junit 'ruff-report.xml' + } + } + } + } + + stage('Static type check'){ + steps { + sh 'source .venv/bin/activate' + sh 'pip install mypy' + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ + sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml' + } + } + post { + always{ + withChecks('Static Type Checks'){ + junit 'mypy-report.xml' + } + + } + } + } + + stage('Tests'){ + steps{ + sh 'source .venv/bin/activate' + sh 'pip install pytest' + sh '$JENKINS_HOME/.local/bin/pytest src --junitxml=pytest-report.xml' + } + post { + always{ + withChecks('Integration Tests'){ + junit 'pytest-report.xml' + } + } + } + } + } +} diff --git a/ci/modify_config.py b/ci/modify_config.py new file mode 100644 index 0000000..66c9b46 --- /dev/null +++ b/ci/modify_config.py @@ -0,0 +1,13 @@ +import os + +from grag.components.utils import get_config + +workspace = os.getenv('WORKSPACE') +jenkins_home = os.getenv('JENKINS_HOME') + +config = get_config() +config['root']['root_path'] = f'{workspace}' +config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data' +config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models' + +config.write(f'{workspace}/src/config.ini') diff --git a/ci/modify_config_test.py b/ci/modify_config_test.py new file mode 100644 index 0000000..b93a40a --- /dev/null +++ b/ci/modify_config_test.py @@ -0,0 +1,6 @@ +from grag.components.utils import get_config + +config = get_config() +print(f"{config['root']['root_path']=}") +print(f"{config['data']['data_path'] = }") +print(f"{config['llm']['base_dir'] = }") From 24f22da3ded5061a1be531b03f5592c9745d238c Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:04:22 -0400 Subject: [PATCH 03/45] issue with ci --- ci/Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 98575d0..c6d466e 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -1,4 +1,3 @@ -Jenkinsfile (Declarative Pipeline) pipeline { agent any From 15e904f7ced40d4c5359e21ba19d58dd6e97b554 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:06:35 -0400 Subject: [PATCH 04/45] jenkinsfile error --- ci/Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index c6d466e..548284d 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -31,8 +31,9 @@ pipeline { stage('Config'){ steps{ sh 'source .venv/bin/activate' - sh 'python3 ci/modify_config.py' + sh 'pwd' sh 'python3 ci/modify_config_test.py' + sh 'python3 ci/modify_config.py' } } From 1b58f91a16f761c96339ad40afb408daf5f48bcd Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:13:20 -0400 Subject: [PATCH 05/45] virtual environment issue --- ci/Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 548284d..ea182eb 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -23,6 +23,7 @@ pipeline { stage('Install dependencies'){ steps { + sh 'source .venv/bin/activate' sh "pip install ." } From 6eb78ee304813167100d42480fa333b7fa12d549 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:22:50 -0400 Subject: [PATCH 06/45] grag installation --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index ea182eb..82d3e2d 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -24,7 +24,7 @@ pipeline { stage('Install dependencies'){ steps { sh 'source .venv/bin/activate' - sh "pip install ." + sh "pip install -e ." } } From e683a1a8016b486aa449d674a8cc6d788c187dc5 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:33:56 -0400 Subject: [PATCH 07/45] rewriting config.ini --- ci/modify_config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/modify_config.py b/ci/modify_config.py index 66c9b46..ba7927f 100644 --- a/ci/modify_config.py +++ b/ci/modify_config.py @@ -1,7 +1,10 @@ +import configparser import os from grag.components.utils import get_config +config = configparser.ConfigParser() + workspace = os.getenv('WORKSPACE') jenkins_home = os.getenv('JENKINS_HOME') @@ -10,4 +13,6 @@ config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data' config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models' -config.write(f'{workspace}/src/config.ini') +# config.write(f'{workspace}/src/config.ini') +with open(f'{workspace}/src/config.ini', 'w') as configfile: + config.write(configfile) From 8ad88fb3fc1ae1ae96fb7623b87515c421452d42 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:35:53 -0400 Subject: [PATCH 08/45] model path modified --- ci/modify_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/modify_config.py b/ci/modify_config.py index ba7927f..41efc35 100644 --- a/ci/modify_config.py +++ b/ci/modify_config.py @@ -11,7 +11,7 @@ config = get_config() config['root']['root_path'] = f'{workspace}' config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data' -config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models' +config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models' # config.write(f'{workspace}/src/config.ini') with open(f'{workspace}/src/config.ini', 'w') as configfile: From c8e48e66aade359edd09b2929f62fd8c558c61ec Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:54:20 -0400 Subject: [PATCH 09/45] . --- ci/Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 82d3e2d..830d2a9 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -17,6 +17,8 @@ pipeline { steps { sh 'python3 -m venv .venv' sh 'source .venv/bin/activate' + sh 'python3 -m pip install pip --upgrade' + sh 'pip install pyopenssl --upgrade' } } From 676169ad2819953acc5201e7fbaf17122d763404 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 18:59:21 -0400 Subject: [PATCH 10/45] installations --- ci/Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 830d2a9..4b0d937 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -19,6 +19,7 @@ pipeline { sh 'source .venv/bin/activate' sh 'python3 -m pip install pip --upgrade' sh 'pip install pyopenssl --upgrade' + sh 'pip install pdf2image' } } From 88b3e60ce48ffa6be4aa82117b980b2fc9a42e16 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 19:01:51 -0400 Subject: [PATCH 11/45] . --- ci/Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 4b0d937..9e702fa 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -20,6 +20,7 @@ pipeline { sh 'python3 -m pip install pip --upgrade' sh 'pip install pyopenssl --upgrade' sh 'pip install pdf2image' + sh 'pip install pillow-heif' } } From 43122a128897e77fd73fb24f2590f5fc63c16a5a Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 31 Mar 2024 19:10:53 -0400 Subject: [PATCH 12/45] modified jenkisfile --- ci/Jenkinsfile | 4 ++-- ci/modify_config.py | 1 - src/grag/components/vectordb/chroma_client.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 9e702fa..8b6a489 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -47,8 +47,8 @@ pipeline { sh 'source .venv/bin/activate' sh 'pip install ruff' catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ - sh '$JENKINS_HOME/.local/bin/ruff check --fix --output-format junit -o ruff-report.xml' - sh '$JENKINS_HOME/.local/bin/ruff format' + sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' + sh '$JENKINS_HOME/.local/bin/ruff format src' } } post { diff --git a/ci/modify_config.py b/ci/modify_config.py index 41efc35..759e88d 100644 --- a/ci/modify_config.py +++ b/ci/modify_config.py @@ -13,6 +13,5 @@ config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data' config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models' -# config.write(f'{workspace}/src/config.ini') with open(f'{workspace}/src/config.ini', 'w') as configfile: config.write(configfile) diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 9db9985..09969ca 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -69,7 +69,7 @@ def __init__( embedding_model=self.embedding_model, embedding_type=self.embedding_type ).embedding_function - self.client = chromadb.HttpClient(host=self.host, port=self.port) + self.client = chromadb.HttpClient(host=self.host, port=self.port) # type: ignore self.collection = self.client.get_or_create_collection( name=self.collection_name ) From c7be5f1b73484a6b4a0bcb192d7017e64aeb9813 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Mon, 1 Apr 2024 15:07:13 -0400 Subject: [PATCH 13/45] Jenkins declarative checkout --- ci/Jenkinsfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 8b6a489..6690b99 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -5,11 +5,12 @@ pipeline { stage('Checkout') { steps { - checkout scmGit( - branches: [[name: 'tests']], - extensions: [], - userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] - ) +// checkout scmGit( +// branches: [[name: 'tests']], +// extensions: [], +// userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] +// ) + checkout scm } } From 0a3f43c6fc53873c69c25e960644445eb3f98a74 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Mon, 1 Apr 2024 15:07:25 -0400 Subject: [PATCH 14/45] Unstructured deps --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e27f822..9ff73bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "sentence-transformers==2.2.2", "instructorembedding>=1.0.1", "streamlit>=1.31.1", - "unstructured>=0.12.3", + "unstructured[all]>=0.12.3", "pdfplumber>=0.10.3", "llama-cpp-python>=0.2.43", "tqdm>=4.65.0", From 749e02fa3702af8a8c38bcbcf520ab6802b3eb87 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Mon, 1 Apr 2024 15:10:16 -0400 Subject: [PATCH 15/45] Declarative checkout scm --- ci/Jenkinsfile | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 6690b99..1a4f6cf 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -3,16 +3,16 @@ pipeline { stages { - stage('Checkout') { - steps { -// checkout scmGit( -// branches: [[name: 'tests']], -// extensions: [], -// userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] -// ) - checkout scm - } - } +// stage('Checkout') { +// steps { +// // checkout scmGit( +// // branches: [[name: 'tests']], +// // extensions: [], +// // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] +// // ) +// checkout scm +// } +// } stage('Create venv'){ steps { From ed5d89324441f9a700a320325da60d347fee8505 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Mon, 1 Apr 2024 15:17:42 -0400 Subject: [PATCH 16/45] Workspace cleanup --- ci/Jenkinsfile | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 1a4f6cf..ca8455f 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -1,18 +1,23 @@ pipeline { agent any + options{ + skipDefaultCheckout(true) + } + stages { -// stage('Checkout') { -// steps { -// // checkout scmGit( -// // branches: [[name: 'tests']], -// // extensions: [], -// // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] -// // ) -// checkout scm -// } -// } + stage('Checkout') { + steps { +// checkout scmGit( +// branches: [[name: 'tests']], +// extensions: [], +// userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] +// ) + cleanWs() + checkout scm + } + } stage('Create venv'){ steps { @@ -90,8 +95,18 @@ pipeline { withChecks('Integration Tests'){ junit 'pytest-report.xml' } + + cleanWs( + cleanWhenNotBuilt: false, + deleteDirs: true, + disableDeferredWipeout: true, + notFailBuild: true, + patterns: [[pattern: '.gitignore', type: 'INCLUDE'], + [pattern: '.propsfile', type: 'EXCLUDE']] + ) } } } + } } From d3611c47020a53b5521347d26d8fca56af1d7b8c Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Mon, 1 Apr 2024 15:33:22 -0400 Subject: [PATCH 17/45] Python env jenkins --- ci/Jenkinsfile | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index ca8455f..8c609ed 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -4,6 +4,10 @@ pipeline { options{ skipDefaultCheckout(true) } + environment { + PYTHONPATH = "${env.WORKSPACE}/myenv" + } + stages { @@ -22,36 +26,44 @@ pipeline { stage('Create venv'){ steps { sh 'python3 -m venv .venv' - sh 'source .venv/bin/activate' - sh 'python3 -m pip install pip --upgrade' - sh 'pip install pyopenssl --upgrade' - sh 'pip install pdf2image' - sh 'pip install pillow-heif' + withPythonEnv(myenv){ +// sh 'source .venv/bin/activate' + sh 'python3 -m pip install pip --upgrade' +// sh 'pip install pyopenssl --upgrade' +// sh 'pip install pdf2image' +// sh 'pip install pillow-heif' + } } } stage('Install dependencies'){ steps { - sh 'source .venv/bin/activate' - sh "pip install -e ." +// sh 'source .venv/bin/activate' + withPythonEnv(myenv){ + sh "pip install -e ." + } } } stage('Config'){ steps{ - sh 'source .venv/bin/activate' - sh 'pwd' - sh 'python3 ci/modify_config_test.py' - sh 'python3 ci/modify_config.py' +// sh 'source .venv/bin/activate' +// sh 'pwd' + withPythonEnv(myenv){ + sh 'python3 ci/modify_config_test.py' + sh 'python3 ci/modify_config.py' + } } } stage('Linting'){ steps { - sh 'source .venv/bin/activate' - sh 'pip install ruff' +// sh 'source .venv/bin/activate' + withPythonEnv(myenv){ + sh 'pip install ruff' + } catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' sh '$JENKINS_HOME/.local/bin/ruff format src' @@ -107,6 +119,6 @@ pipeline { } } } - + } } From 974a9fb24d9271e453c09c63786e798e285e4673 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Mon, 1 Apr 2024 15:34:49 -0400 Subject: [PATCH 18/45] Python env jenkins --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 8c609ed..382ab93 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -25,7 +25,7 @@ pipeline { stage('Create venv'){ steps { - sh 'python3 -m venv .venv' +// sh 'python3 -m venv .venv' withPythonEnv(myenv){ // sh 'source .venv/bin/activate' sh 'python3 -m pip install pip --upgrade' From 076ebade4850192c121d16e021191c4cfc22b4d9 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:04:56 -0400 Subject: [PATCH 19/45] failing build because of environment --- ci/Jenkinsfile | 10 +++++----- cookbook/Basic-RAG/BasicRAG_ingest.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 382ab93..e0896c1 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { // extensions: [], // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] // ) - cleanWs() +// cleanWs() checkout scm } } @@ -26,7 +26,7 @@ pipeline { stage('Create venv'){ steps { // sh 'python3 -m venv .venv' - withPythonEnv(myenv){ + withPythonEnv('myenv'){ // sh 'source .venv/bin/activate' sh 'python3 -m pip install pip --upgrade' // sh 'pip install pyopenssl --upgrade' @@ -40,7 +40,7 @@ pipeline { stage('Install dependencies'){ steps { // sh 'source .venv/bin/activate' - withPythonEnv(myenv){ + withPythonEnv('myenv'){ sh "pip install -e ." } } @@ -51,7 +51,7 @@ pipeline { steps{ // sh 'source .venv/bin/activate' // sh 'pwd' - withPythonEnv(myenv){ + withPythonEnv('myenv'){ sh 'python3 ci/modify_config_test.py' sh 'python3 ci/modify_config.py' } @@ -61,7 +61,7 @@ pipeline { stage('Linting'){ steps { // sh 'source .venv/bin/activate' - withPythonEnv(myenv){ + withPythonEnv('myenv'){ sh 'pip install ruff' } catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ diff --git a/cookbook/Basic-RAG/BasicRAG_ingest.py b/cookbook/Basic-RAG/BasicRAG_ingest.py index e7b38a0..db63aac 100644 --- a/cookbook/Basic-RAG/BasicRAG_ingest.py +++ b/cookbook/Basic-RAG/BasicRAG_ingest.py @@ -5,9 +5,9 @@ from grag.components.multivec_retriever import Retriever from grag.components.vectordb.deeplake_client import DeepLakeClient -client = DeepLakeClient(collection_name="test") +client = DeepLakeClient(collection_name="ci_test") retriever = Retriever(vectordb=client) -dir_path = Path(__file__).parents[2] / "data/client_test/test/" +dir_path = Path(__file__).parents[2] / "data/ci_test/" retriever.ingest(dir_path) From 5d0caa44f45218b50e4ded4a036e83b0bbb916e4 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:07:16 -0400 Subject: [PATCH 20/45] . --- ci/Jenkinsfile | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index e0896c1..7026073 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -23,19 +23,19 @@ pipeline { } } - stage('Create venv'){ - steps { -// sh 'python3 -m venv .venv' - withPythonEnv('myenv'){ -// sh 'source .venv/bin/activate' - sh 'python3 -m pip install pip --upgrade' -// sh 'pip install pyopenssl --upgrade' -// sh 'pip install pdf2image' -// sh 'pip install pillow-heif' - } - } - - } +// stage('Create venv'){ +// steps { +// // sh 'python3 -m venv .venv' +// withPythonEnv('myenv'){ +// // sh 'source .venv/bin/activate' +// // sh 'python3 -m pip install pip --upgrade' +// // sh 'pip install pyopenssl --upgrade' +// // sh 'pip install pdf2image' +// // sh 'pip install pillow-heif' +// } +// } +// +// } stage('Install dependencies'){ steps { From 1b6ebe68bfe7f4cede81399d28450a930d1d398a Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:16:11 -0400 Subject: [PATCH 21/45] . --- ci/Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 7026073..573f761 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -26,7 +26,7 @@ pipeline { // stage('Create venv'){ // steps { // // sh 'python3 -m venv .venv' -// withPythonEnv('myenv'){ +// withPythonEnv('Python3'){ // // sh 'source .venv/bin/activate' // // sh 'python3 -m pip install pip --upgrade' // // sh 'pip install pyopenssl --upgrade' @@ -40,7 +40,7 @@ pipeline { stage('Install dependencies'){ steps { // sh 'source .venv/bin/activate' - withPythonEnv('myenv'){ + withPythonEnv('Python3'){ sh "pip install -e ." } } @@ -51,7 +51,7 @@ pipeline { steps{ // sh 'source .venv/bin/activate' // sh 'pwd' - withPythonEnv('myenv'){ + withPythonEnv('Python3'){ sh 'python3 ci/modify_config_test.py' sh 'python3 ci/modify_config.py' } @@ -61,7 +61,7 @@ pipeline { stage('Linting'){ steps { // sh 'source .venv/bin/activate' - withPythonEnv('myenv'){ + withPythonEnv('Python3'){ sh 'pip install ruff' } catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ From 06bb9e564a07be17ea27b066048abdab3137a48b Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:18:46 -0400 Subject: [PATCH 22/45] . --- ci/Jenkinsfile | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 573f761..1c49273 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -5,7 +5,7 @@ pipeline { skipDefaultCheckout(true) } environment { - PYTHONPATH = "${env.WORKSPACE}/myenv" + PYTHONPATH = "${env.WORKSPACE}/.venv" } @@ -23,24 +23,24 @@ pipeline { } } -// stage('Create venv'){ -// steps { -// // sh 'python3 -m venv .venv' -// withPythonEnv('Python3'){ -// // sh 'source .venv/bin/activate' -// // sh 'python3 -m pip install pip --upgrade' -// // sh 'pip install pyopenssl --upgrade' -// // sh 'pip install pdf2image' -// // sh 'pip install pillow-heif' -// } -// } -// -// } + stage('Create venv'){ + steps { + sh 'python3 -m venv .venv' +// withPythonEnv(PYTHONPATH){ +// sh 'source .venv/bin/activate' +// sh 'python3 -m pip install pip --upgrade' +// sh 'pip install pyopenssl --upgrade' +// sh 'pip install pdf2image' +// sh 'pip install pillow-heif' + } + } + + } stage('Install dependencies'){ steps { // sh 'source .venv/bin/activate' - withPythonEnv('Python3'){ + withPythonEnv(PYTHONPATH){ sh "pip install -e ." } } From 31ef1fbd313ac4027072a59cde8ef7dc4148b31c Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:22:07 -0400 Subject: [PATCH 23/45] . --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 1c49273..bf253f9 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -32,7 +32,7 @@ pipeline { // sh 'pip install pyopenssl --upgrade' // sh 'pip install pdf2image' // sh 'pip install pillow-heif' - } + } } From 75152d8feee66584ffad1bf36cf9691b4c21f5cf Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:24:05 -0400 Subject: [PATCH 24/45] . --- ci/Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index bf253f9..204c475 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -5,7 +5,7 @@ pipeline { skipDefaultCheckout(true) } environment { - PYTHONPATH = "${env.WORKSPACE}/.venv" + PYTHONPATH = "${env.WORKSPACE}/.venv/bin" } @@ -32,7 +32,7 @@ pipeline { // sh 'pip install pyopenssl --upgrade' // sh 'pip install pdf2image' // sh 'pip install pillow-heif' - + } } @@ -51,7 +51,7 @@ pipeline { steps{ // sh 'source .venv/bin/activate' // sh 'pwd' - withPythonEnv('Python3'){ + withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config_test.py' sh 'python3 ci/modify_config.py' } From bc64eaaa6688c40cc9614c978a22b5fe5e5f61b0 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:32:31 -0400 Subject: [PATCH 25/45] . --- ci/Jenkinsfile | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 204c475..99bc564 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -52,7 +52,7 @@ pipeline { // sh 'source .venv/bin/activate' // sh 'pwd' withPythonEnv(PYTHONPATH){ - sh 'python3 ci/modify_config_test.py' +// sh 'python3 ci/modify_config_test.py' sh 'python3 ci/modify_config.py' } } @@ -61,13 +61,19 @@ pipeline { stage('Linting'){ steps { // sh 'source .venv/bin/activate' - withPythonEnv('Python3'){ + withPythonEnv(PYTHONPATH){ sh 'pip install ruff' + + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ + sh 'ruff check --fix src --output-format junit -o ruff-report.xml' + sh 'ruff format src' } - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ - sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' - sh '$JENKINS_HOME/.local/bin/ruff format src' - } + } + +// catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ +// sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' +// sh '$JENKINS_HOME/.local/bin/ruff format src' +// } } post { always{ From aa63b1678232abeda308176e7f4c5f8849738d40 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 16:39:54 -0400 Subject: [PATCH 26/45] changed environment, clean added --- ci/Jenkinsfile | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 99bc564..14f8075 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { // extensions: [], // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] // ) -// cleanWs() + cleanWs() checkout scm } } @@ -32,9 +32,7 @@ pipeline { // sh 'pip install pyopenssl --upgrade' // sh 'pip install pdf2image' // sh 'pip install pillow-heif' - } - } stage('Install dependencies'){ @@ -63,13 +61,11 @@ pipeline { // sh 'source .venv/bin/activate' withPythonEnv(PYTHONPATH){ sh 'pip install ruff' - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ - sh 'ruff check --fix src --output-format junit -o ruff-report.xml' + sh 'ruff check src --output-format junit -o ruff-report.xml' sh 'ruff format src' + } } - } - // catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ // sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' // sh '$JENKINS_HOME/.local/bin/ruff format src' @@ -86,10 +82,12 @@ pipeline { stage('Static type check'){ steps { - sh 'source .venv/bin/activate' - sh 'pip install mypy' - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ - sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml' +// sh 'source .venv/bin/activate' + withPythonEnv(PYTHONPATH){ + sh 'pip install mypy' + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ + sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml' + } } } post { @@ -104,9 +102,11 @@ pipeline { stage('Tests'){ steps{ - sh 'source .venv/bin/activate' - sh 'pip install pytest' - sh '$JENKINS_HOME/.local/bin/pytest src --junitxml=pytest-report.xml' + withPythonEnv(PYTHONPATH){ +// sh 'source .venv/bin/activate' + sh 'pip install pytest' + sh '$JENKINS_HOME/.local/bin/pytest src --junitxml=pytest-report.xml' + } } post { always{ From ce0fd6cf965ea6359d8271536d8c9f8629201a59 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 17:49:43 -0400 Subject: [PATCH 27/45] resolved multivector retriever, added chroma as docker to Jenkins --- ci/Jenkinsfile | 17 +++++++---------- ci/unlock_deeplake.py | 10 ++++++++++ cookbook/Basic-RAG/BasicRAG_ingest.py | 6 ++++-- src/grag/components/multivec_retriever.py | 2 +- src/tests/components/multivec_retriever_test.py | 4 +++- src/tests/rag/basic_rag_test.py | 2 +- 6 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 ci/unlock_deeplake.py diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 14f8075..a3e11d5 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -37,7 +37,6 @@ pipeline { stage('Install dependencies'){ steps { -// sh 'source .venv/bin/activate' withPythonEnv(PYTHONPATH){ sh "pip install -e ." } @@ -47,10 +46,7 @@ pipeline { stage('Config'){ steps{ -// sh 'source .venv/bin/activate' -// sh 'pwd' withPythonEnv(PYTHONPATH){ -// sh 'python3 ci/modify_config_test.py' sh 'python3 ci/modify_config.py' } } @@ -58,12 +54,11 @@ pipeline { stage('Linting'){ steps { -// sh 'source .venv/bin/activate' withPythonEnv(PYTHONPATH){ sh 'pip install ruff' catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ - sh 'ruff check src --output-format junit -o ruff-report.xml' - sh 'ruff format src' + sh 'ruff check . --output-format junit -o ruff-report.xml' + sh 'ruff format .' } } // catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ @@ -82,7 +77,6 @@ pipeline { stage('Static type check'){ steps { -// sh 'source .venv/bin/activate' withPythonEnv(PYTHONPATH){ sh 'pip install mypy' catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ @@ -102,10 +96,12 @@ pipeline { stage('Tests'){ steps{ + sh 'docker pull chromadb/chroma' + sh 'docker run -p 8000:8000 chromadb/chroma' withPythonEnv(PYTHONPATH){ -// sh 'source .venv/bin/activate' sh 'pip install pytest' - sh '$JENKINS_HOME/.local/bin/pytest src --junitxml=pytest-report.xml' + sh 'python3 ci/unlock_deeplake.py' + sh 'pytest src --junitxml=pytest-report.xml' } } post { @@ -113,6 +109,7 @@ pipeline { withChecks('Integration Tests'){ junit 'pytest-report.xml' } + sh 'docker stop chromadb/chroma' cleanWs( cleanWhenNotBuilt: false, diff --git a/ci/unlock_deeplake.py b/ci/unlock_deeplake.py new file mode 100644 index 0000000..f1843c2 --- /dev/null +++ b/ci/unlock_deeplake.py @@ -0,0 +1,10 @@ +import os +from pathlib import Path + +jenkins_home = os.getenv('JENKINS_HOME') + +lock_path = Path(jenkins_home) / 'ci_test_data/data/vectordb/ci_test/dataset_lock.lock' + +if os.path.exists(lock_path): + os.remove(lock_path) + print('Deleting lock file: {}'.format(lock_path)) diff --git a/cookbook/Basic-RAG/BasicRAG_ingest.py b/cookbook/Basic-RAG/BasicRAG_ingest.py index db63aac..00e8f0b 100644 --- a/cookbook/Basic-RAG/BasicRAG_ingest.py +++ b/cookbook/Basic-RAG/BasicRAG_ingest.py @@ -5,9 +5,11 @@ from grag.components.multivec_retriever import Retriever from grag.components.vectordb.deeplake_client import DeepLakeClient +# from grag.components.vectordb.chroma_client import ChromaClient + client = DeepLakeClient(collection_name="ci_test") +# client = ChromaClient(collection_name="ci_test") retriever = Retriever(vectordb=client) -dir_path = Path(__file__).parents[2] / "data/ci_test/" - +dir_path = Path(__file__).parents[2] / "data/test/pdfs/new_papers" retriever.ingest(dir_path) diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 66af4c6..41bff2d 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -75,7 +75,7 @@ def __init__( self.store = LocalFileStore(self.store_path) self.retriever = MultiVectorRetriever( vectorstore=self.vectordb.langchain_client, - docstore=self.store, # type: ignore + byte_store=self.store, # type: ignore id_key=self.id_key, ) self.splitter = TextSplitter() diff --git a/src/tests/components/multivec_retriever_test.py b/src/tests/components/multivec_retriever_test.py index f2544bf..3f847bd 100644 --- a/src/tests/components/multivec_retriever_test.py +++ b/src/tests/components/multivec_retriever_test.py @@ -1,9 +1,11 @@ import json from grag.components.multivec_retriever import Retriever +from grag.components.vectordb.deeplake_client import DeepLakeClient from langchain_core.documents import Document -retriever = Retriever() # pass test collection +client = DeepLakeClient(collection_name="ci_test") +retriever = Retriever(vectordb=client) # pass test collection doc = Document(page_content="Hello worlds", metadata={"source": "bars"}) diff --git a/src/tests/rag/basic_rag_test.py b/src/tests/rag/basic_rag_test.py index b8c2ceb..0b93643 100644 --- a/src/tests/rag/basic_rag_test.py +++ b/src/tests/rag/basic_rag_test.py @@ -4,7 +4,7 @@ from grag.components.vectordb.deeplake_client import DeepLakeClient from grag.rag.basic_rag import BasicRAG -client = DeepLakeClient(collection_name="test") +client = DeepLakeClient(collection_name="ci_test") retriever = Retriever(vectordb=client) From 8011abaf397fc005e765d1fa286c4e837e576cbe Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:04:00 -0400 Subject: [PATCH 28/45] excluded ci, and docs from ruff check --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9ff73bd..88e2a7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 indent-width = 4 -extend-exclude = ["tests", "others"] +extend-exclude = ["tests", "others", "docs", "ci"] [tool.ruff.lint] select = ["E4", "E7", "E9", "F", "I", "D"] From b597720b613f6a35d5f62cb7aee7acb373b44095 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:12:43 -0400 Subject: [PATCH 29/45] added .venv to exclude form ruff --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 88e2a7d..14a0afc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 indent-width = 4 -extend-exclude = ["tests", "others", "docs", "ci"] +extend-exclude = ["tests", "others", "docs", "ci", ".venv"] [tool.ruff.lint] select = ["E4", "E7", "E9", "F", "I", "D"] From 6d7a5bccbcf9c6d8d8ac82fdb4c7bab2c0052967 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:22:08 -0400 Subject: [PATCH 30/45] . --- ci/Jenkinsfile | 24 ++++++++++++------------ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index a3e11d5..bdac8e6 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { // extensions: [], // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] // ) - cleanWs() +// cleanWs() checkout scm } } @@ -57,7 +57,7 @@ pipeline { withPythonEnv(PYTHONPATH){ sh 'pip install ruff' catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ - sh 'ruff check . --output-format junit -o ruff-report.xml' + sh 'ruff check . --exclude .pyenv-var-lib-jenkins-workspace-capstone_5-.venv-bin --output-format junit -o ruff-report.xml' sh 'ruff format .' } } @@ -96,8 +96,8 @@ pipeline { stage('Tests'){ steps{ - sh 'docker pull chromadb/chroma' - sh 'docker run -p 8000:8000 chromadb/chroma' + sh 'sudo docker pull chromadb/chroma' + sh 'sudo docker run -p 8000:8000 chromadb/chroma' withPythonEnv(PYTHONPATH){ sh 'pip install pytest' sh 'python3 ci/unlock_deeplake.py' @@ -111,14 +111,14 @@ pipeline { } sh 'docker stop chromadb/chroma' - cleanWs( - cleanWhenNotBuilt: false, - deleteDirs: true, - disableDeferredWipeout: true, - notFailBuild: true, - patterns: [[pattern: '.gitignore', type: 'INCLUDE'], - [pattern: '.propsfile', type: 'EXCLUDE']] - ) +// cleanWs( +// cleanWhenNotBuilt: false, +// deleteDirs: true, +// disableDeferredWipeout: true, +// notFailBuild: true, +// patterns: [[pattern: '.gitignore', type: 'INCLUDE'], +// [pattern: '.propsfile', type: 'EXCLUDE']] +// ) } } } diff --git a/pyproject.toml b/pyproject.toml index 14a0afc..88e2a7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 indent-width = 4 -extend-exclude = ["tests", "others", "docs", "ci", ".venv"] +extend-exclude = ["tests", "others", "docs", "ci"] [tool.ruff.lint] select = ["E4", "E7", "E9", "F", "I", "D"] From c1e43dbb95ec6613b219dd64fc177d1d3248e743 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:25:03 -0400 Subject: [PATCH 31/45] . --- ci/Jenkinsfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index bdac8e6..8a76076 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -96,8 +96,9 @@ pipeline { stage('Tests'){ steps{ - sh 'sudo docker pull chromadb/chroma' - sh 'sudo docker run -p 8000:8000 chromadb/chroma' + sh 'echo $USER' + sh 'docker pull chromadb/chroma' + sh 'docker run -p 8000:8000 chromadb/chroma' withPythonEnv(PYTHONPATH){ sh 'pip install pytest' sh 'python3 ci/unlock_deeplake.py' From 4f45713cf05240a3e48a96f7429f41e212a0e1aa Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:30:56 -0400 Subject: [PATCH 32/45] docker --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 8a76076..311a4e3 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -98,7 +98,7 @@ pipeline { steps{ sh 'echo $USER' sh 'docker pull chromadb/chroma' - sh 'docker run -p 8000:8000 chromadb/chroma' + sh 'docker run -p 8000:8000 chromadb/chroma -d' withPythonEnv(PYTHONPATH){ sh 'pip install pytest' sh 'python3 ci/unlock_deeplake.py' From ddcd752a670ecd783865f668d6e40d494eeff6c4 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:33:05 -0400 Subject: [PATCH 33/45] -detach before -port --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 311a4e3..5e0e791 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -98,7 +98,7 @@ pipeline { steps{ sh 'echo $USER' sh 'docker pull chromadb/chroma' - sh 'docker run -p 8000:8000 chromadb/chroma -d' + sh 'docker run -d -p 8000:8000 chromadb/chroma' withPythonEnv(PYTHONPATH){ sh 'pip install pytest' sh 'python3 ci/unlock_deeplake.py' From fcc4b682a83dcdf7f7a9a6e1bba642d83e00a3e4 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:38:19 -0400 Subject: [PATCH 34/45] toml issue --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 88e2a7d..9859013 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "sentence-transformers==2.2.2", "instructorembedding>=1.0.1", "streamlit>=1.31.1", - "unstructured[all]>=0.12.3", + "unstructured[pdf]>=0.12.3", "pdfplumber>=0.10.3", "llama-cpp-python>=0.2.43", "tqdm>=4.65.0", From 54d011eaf9ffe03cb5886431bcd47443825104bc Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:41:07 -0400 Subject: [PATCH 35/45] . --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 5e0e791..25fdf04 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { // extensions: [], // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] // ) -// cleanWs() + cleanWs() checkout scm } } From c86202013c7605da26c8546c2359ccd8644a08cf Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:56:29 -0400 Subject: [PATCH 36/45] . --- ci/Jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 25fdf04..481dd42 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -98,7 +98,7 @@ pipeline { steps{ sh 'echo $USER' sh 'docker pull chromadb/chroma' - sh 'docker run -d -p 8000:8000 chromadb/chroma' + sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma' withPythonEnv(PYTHONPATH){ sh 'pip install pytest' sh 'python3 ci/unlock_deeplake.py' @@ -107,10 +107,12 @@ pipeline { } post { always{ + sh withChecks('Integration Tests'){ junit 'pytest-report.xml' } - sh 'docker stop chromadb/chroma' + sh 'docker stop jenkins-chroma' + sh 'docker rm jenkins-chroma' // cleanWs( // cleanWhenNotBuilt: false, From 7b63ef9914c3a2a0d4d12f4727dd102d281a02d5 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 18:57:27 -0400 Subject: [PATCH 37/45] . --- ci/Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 481dd42..5d3c64d 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -107,7 +107,6 @@ pipeline { } post { always{ - sh withChecks('Integration Tests'){ junit 'pytest-report.xml' } From 7fc88500ceb436933e243b2ebfbb5a9640be3944 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:04:28 -0400 Subject: [PATCH 38/45] . --- ci/Jenkinsfile | 2 +- ci/unlock_deeplake.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 5d3c64d..4fbcab7 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { // extensions: [], // userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] // ) - cleanWs() +// cleanWs() checkout scm } } diff --git a/ci/unlock_deeplake.py b/ci/unlock_deeplake.py index f1843c2..d766941 100644 --- a/ci/unlock_deeplake.py +++ b/ci/unlock_deeplake.py @@ -1,4 +1,5 @@ import os +import shutil from pathlib import Path jenkins_home = os.getenv('JENKINS_HOME') @@ -6,5 +7,5 @@ lock_path = Path(jenkins_home) / 'ci_test_data/data/vectordb/ci_test/dataset_lock.lock' if os.path.exists(lock_path): - os.remove(lock_path) + shutil.rmtree(lock_path) print('Deleting lock file: {}'.format(lock_path)) From 0f113cb72d802985d76a90c5089e46638e5a72de Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:12:31 -0400 Subject: [PATCH 39/45] . --- ci/Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 4fbcab7..582620a 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -48,6 +48,8 @@ pipeline { steps{ withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config.py' + sh 'rm -rf $(JENKINS_HOME)/ci_test_data/data/vectordb/ci_test' + sh 'cp -r $(JENKINS_HOME)/ci_test_data/data/backup_vectordb/ci_test $(JENKINS_HOME)/ci_test_data/data/vectordb' } } } From be1991c05cdb862bcef144733abdca6bf5248856 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:13:59 -0400 Subject: [PATCH 40/45] . --- ci/Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 582620a..fc25cdb 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -46,6 +46,7 @@ pipeline { stage('Config'){ steps{ + sh 'echo $JENKINS_HOME' withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config.py' sh 'rm -rf $(JENKINS_HOME)/ci_test_data/data/vectordb/ci_test' From 7e60085917667c7abc6a245fd1b11be7f8011ba7 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:14:36 -0400 Subject: [PATCH 41/45] . --- ci/Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index fc25cdb..4ae7255 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -46,6 +46,7 @@ pipeline { stage('Config'){ steps{ + sh 'echo $env.JENKINS_HOME' sh 'echo $JENKINS_HOME' withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config.py' From d4c3249b31db167c81522cbe46d9e0a6eaa2239a Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:16:01 -0400 Subject: [PATCH 42/45] . --- ci/Jenkinsfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 4ae7255..ce9699b 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -47,11 +47,10 @@ pipeline { stage('Config'){ steps{ sh 'echo $env.JENKINS_HOME' - sh 'echo $JENKINS_HOME' withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config.py' - sh 'rm -rf $(JENKINS_HOME)/ci_test_data/data/vectordb/ci_test' - sh 'cp -r $(JENKINS_HOME)/ci_test_data/data/backup_vectordb/ci_test $(JENKINS_HOME)/ci_test_data/data/vectordb' + sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test' + sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $(JENKINS_HOME)/ci_test_data/data/vectordb' } } } From ca7be65dc57d1d1a1a37bf9f63c9cfa5b948fa38 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:26:20 -0400 Subject: [PATCH 43/45] . --- ci/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index ce9699b..22897f8 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -50,7 +50,7 @@ pipeline { withPythonEnv(PYTHONPATH){ sh 'python3 ci/modify_config.py' sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test' - sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $(JENKINS_HOME)/ci_test_data/data/vectordb' + sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $JENKINS_HOME/ci_test_data/data/vectordb' } } } From b0d0455dee15668ef01c9643bd690ba1bb3878d9 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:32:17 -0400 Subject: [PATCH 44/45] Jenkisfile, CI issue with ci jenkinsfile error virtual environment issue grag installation rewriting config.ini model path modified . installations . modified jenkisfile Jenkins declarative checkout Unstructured deps Declarative checkout scm Workspace cleanup Python env jenkins Python env jenkins failing build because of environment . . . . . . changed environment, clean added resolved multivector retriever, added chroma as docker to Jenkins excluded ci, and docs from ruff check added .venv to exclude form ruff . . docker -detach before -port toml issue . . . . . . . . . --- ci/Jenkinsfile | 132 ++++++++++++++++++ ci/modify_config.py | 17 +++ ci/modify_config_test.py | 6 + ci/unlock_deeplake.py | 11 ++ cookbook/Basic-RAG/BasicRAG_ingest.py | 8 +- pyproject.toml | 4 +- src/grag/components/multivec_retriever.py | 2 +- src/grag/components/vectordb/chroma_client.py | 2 +- .../components/multivec_retriever_test.py | 4 +- src/tests/rag/basic_rag_test.py | 2 +- 10 files changed, 179 insertions(+), 9 deletions(-) create mode 100644 ci/Jenkinsfile create mode 100644 ci/modify_config.py create mode 100644 ci/modify_config_test.py create mode 100644 ci/unlock_deeplake.py diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile new file mode 100644 index 0000000..22897f8 --- /dev/null +++ b/ci/Jenkinsfile @@ -0,0 +1,132 @@ +pipeline { + agent any + + options{ + skipDefaultCheckout(true) + } + environment { + PYTHONPATH = "${env.WORKSPACE}/.venv/bin" + } + + + stages { + + stage('Checkout') { + steps { +// checkout scmGit( +// branches: [[name: 'tests']], +// extensions: [], +// userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] +// ) +// cleanWs() + checkout scm + } + } + + stage('Create venv'){ + steps { + sh 'python3 -m venv .venv' +// withPythonEnv(PYTHONPATH){ +// sh 'source .venv/bin/activate' +// sh 'python3 -m pip install pip --upgrade' +// sh 'pip install pyopenssl --upgrade' +// sh 'pip install pdf2image' +// sh 'pip install pillow-heif' + } + } + + stage('Install dependencies'){ + steps { + withPythonEnv(PYTHONPATH){ + sh "pip install -e ." + } + } + + } + + stage('Config'){ + steps{ + sh 'echo $env.JENKINS_HOME' + withPythonEnv(PYTHONPATH){ + sh 'python3 ci/modify_config.py' + sh 'rm -rf $JENKINS_HOME/ci_test_data/data/vectordb/ci_test' + sh 'cp -r $JENKINS_HOME/ci_test_data/data/backup_vectordb/ci_test $JENKINS_HOME/ci_test_data/data/vectordb' + } + } + } + + stage('Linting'){ + steps { + withPythonEnv(PYTHONPATH){ + sh 'pip install ruff' + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ + sh 'ruff check . --exclude .pyenv-var-lib-jenkins-workspace-capstone_5-.venv-bin --output-format junit -o ruff-report.xml' + sh 'ruff format .' + } + } +// catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ +// sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' +// sh '$JENKINS_HOME/.local/bin/ruff format src' +// } + } + post { + always{ + withChecks('Lint Checks'){ + junit 'ruff-report.xml' + } + } + } + } + + stage('Static type check'){ + steps { + withPythonEnv(PYTHONPATH){ + sh 'pip install mypy' + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ + sh 'python3 -m mypy -p src.grag --junit-xml mypy-report.xml' + } + } + } + post { + always{ + withChecks('Static Type Checks'){ + junit 'mypy-report.xml' + } + + } + } + } + + stage('Tests'){ + steps{ + sh 'echo $USER' + sh 'docker pull chromadb/chroma' + sh 'docker run -d --name jenkins-chroma -p 8000:8000 chromadb/chroma' + withPythonEnv(PYTHONPATH){ + sh 'pip install pytest' + sh 'python3 ci/unlock_deeplake.py' + sh 'pytest src --junitxml=pytest-report.xml' + } + } + post { + always{ + withChecks('Integration Tests'){ + junit 'pytest-report.xml' + } + sh 'docker stop jenkins-chroma' + sh 'docker rm jenkins-chroma' + +// cleanWs( +// cleanWhenNotBuilt: false, +// deleteDirs: true, +// disableDeferredWipeout: true, +// notFailBuild: true, +// patterns: [[pattern: '.gitignore', type: 'INCLUDE'], +// [pattern: '.propsfile', type: 'EXCLUDE']] +// ) + } + } + } + + } +} diff --git a/ci/modify_config.py b/ci/modify_config.py new file mode 100644 index 0000000..759e88d --- /dev/null +++ b/ci/modify_config.py @@ -0,0 +1,17 @@ +import configparser +import os + +from grag.components.utils import get_config + +config = configparser.ConfigParser() + +workspace = os.getenv('WORKSPACE') +jenkins_home = os.getenv('JENKINS_HOME') + +config = get_config() +config['root']['root_path'] = f'{workspace}' +config['data']['data_path'] = f'{jenkins_home}/ci_test_data/data' +config['llm']['base_dir'] = f'{jenkins_home}/ci_test_models/models' + +with open(f'{workspace}/src/config.ini', 'w') as configfile: + config.write(configfile) diff --git a/ci/modify_config_test.py b/ci/modify_config_test.py new file mode 100644 index 0000000..b93a40a --- /dev/null +++ b/ci/modify_config_test.py @@ -0,0 +1,6 @@ +from grag.components.utils import get_config + +config = get_config() +print(f"{config['root']['root_path']=}") +print(f"{config['data']['data_path'] = }") +print(f"{config['llm']['base_dir'] = }") diff --git a/ci/unlock_deeplake.py b/ci/unlock_deeplake.py new file mode 100644 index 0000000..d766941 --- /dev/null +++ b/ci/unlock_deeplake.py @@ -0,0 +1,11 @@ +import os +import shutil +from pathlib import Path + +jenkins_home = os.getenv('JENKINS_HOME') + +lock_path = Path(jenkins_home) / 'ci_test_data/data/vectordb/ci_test/dataset_lock.lock' + +if os.path.exists(lock_path): + shutil.rmtree(lock_path) + print('Deleting lock file: {}'.format(lock_path)) diff --git a/cookbook/Basic-RAG/BasicRAG_ingest.py b/cookbook/Basic-RAG/BasicRAG_ingest.py index e7b38a0..00e8f0b 100644 --- a/cookbook/Basic-RAG/BasicRAG_ingest.py +++ b/cookbook/Basic-RAG/BasicRAG_ingest.py @@ -5,9 +5,11 @@ from grag.components.multivec_retriever import Retriever from grag.components.vectordb.deeplake_client import DeepLakeClient -client = DeepLakeClient(collection_name="test") -retriever = Retriever(vectordb=client) +# from grag.components.vectordb.chroma_client import ChromaClient -dir_path = Path(__file__).parents[2] / "data/client_test/test/" +client = DeepLakeClient(collection_name="ci_test") +# client = ChromaClient(collection_name="ci_test") +retriever = Retriever(vectordb=client) +dir_path = Path(__file__).parents[2] / "data/test/pdfs/new_papers" retriever.ingest(dir_path) diff --git a/pyproject.toml b/pyproject.toml index e27f822..9859013 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "sentence-transformers==2.2.2", "instructorembedding>=1.0.1", "streamlit>=1.31.1", - "unstructured>=0.12.3", + "unstructured[pdf]>=0.12.3", "pdfplumber>=0.10.3", "llama-cpp-python>=0.2.43", "tqdm>=4.65.0", @@ -102,7 +102,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 indent-width = 4 -extend-exclude = ["tests", "others"] +extend-exclude = ["tests", "others", "docs", "ci"] [tool.ruff.lint] select = ["E4", "E7", "E9", "F", "I", "D"] diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 66af4c6..41bff2d 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -75,7 +75,7 @@ def __init__( self.store = LocalFileStore(self.store_path) self.retriever = MultiVectorRetriever( vectorstore=self.vectordb.langchain_client, - docstore=self.store, # type: ignore + byte_store=self.store, # type: ignore id_key=self.id_key, ) self.splitter = TextSplitter() diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 9db9985..09969ca 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -69,7 +69,7 @@ def __init__( embedding_model=self.embedding_model, embedding_type=self.embedding_type ).embedding_function - self.client = chromadb.HttpClient(host=self.host, port=self.port) + self.client = chromadb.HttpClient(host=self.host, port=self.port) # type: ignore self.collection = self.client.get_or_create_collection( name=self.collection_name ) diff --git a/src/tests/components/multivec_retriever_test.py b/src/tests/components/multivec_retriever_test.py index f2544bf..3f847bd 100644 --- a/src/tests/components/multivec_retriever_test.py +++ b/src/tests/components/multivec_retriever_test.py @@ -1,9 +1,11 @@ import json from grag.components.multivec_retriever import Retriever +from grag.components.vectordb.deeplake_client import DeepLakeClient from langchain_core.documents import Document -retriever = Retriever() # pass test collection +client = DeepLakeClient(collection_name="ci_test") +retriever = Retriever(vectordb=client) # pass test collection doc = Document(page_content="Hello worlds", metadata={"source": "bars"}) diff --git a/src/tests/rag/basic_rag_test.py b/src/tests/rag/basic_rag_test.py index b8c2ceb..0b93643 100644 --- a/src/tests/rag/basic_rag_test.py +++ b/src/tests/rag/basic_rag_test.py @@ -4,7 +4,7 @@ from grag.components.vectordb.deeplake_client import DeepLakeClient from grag.rag.basic_rag import BasicRAG -client = DeepLakeClient(collection_name="test") +client = DeepLakeClient(collection_name="ci_test") retriever = Retriever(vectordb=client) From b3504787b2af2a826c3f0c2b434b3f6adc949538 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 1 Apr 2024 19:33:50 -0400 Subject: [PATCH 45/45] Jenkins pipeline working --- ci/Jenkinsfile | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 22897f8..56d3551 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -13,12 +13,7 @@ pipeline { stage('Checkout') { steps { -// checkout scmGit( -// branches: [[name: 'tests']], -// extensions: [], -// userRemoteConfigs: [[url: 'https://github.com/arjbingly/Capstone_5.git']] -// ) -// cleanWs() + cleanWs() checkout scm } } @@ -26,12 +21,6 @@ pipeline { stage('Create venv'){ steps { sh 'python3 -m venv .venv' -// withPythonEnv(PYTHONPATH){ -// sh 'source .venv/bin/activate' -// sh 'python3 -m pip install pip --upgrade' -// sh 'pip install pyopenssl --upgrade' -// sh 'pip install pdf2image' -// sh 'pip install pillow-heif' } } @@ -64,10 +53,6 @@ pipeline { sh 'ruff format .' } } -// catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE'){ -// sh '$JENKINS_HOME/.local/bin/ruff check --fix src --output-format junit -o ruff-report.xml' -// sh '$JENKINS_HOME/.local/bin/ruff format src' -// } } post { always{ @@ -116,14 +101,14 @@ pipeline { sh 'docker stop jenkins-chroma' sh 'docker rm jenkins-chroma' -// cleanWs( -// cleanWhenNotBuilt: false, -// deleteDirs: true, -// disableDeferredWipeout: true, -// notFailBuild: true, -// patterns: [[pattern: '.gitignore', type: 'INCLUDE'], -// [pattern: '.propsfile', type: 'EXCLUDE']] -// ) + cleanWs( + cleanWhenNotBuilt: false, + deleteDirs: true, + disableDeferredWipeout: true, + notFailBuild: true, + patterns: [[pattern: '.gitignore', type: 'INCLUDE'], + [pattern: '.propsfile', type: 'EXCLUDE']] + ) } } }