From de76cc02657818b9945c0ea1dc0180ee93c10dd1 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 10:06:01 -0500 Subject: [PATCH 01/10] INTPYTHON-416 Make AI/ML testing framework runnable locally --- .evergreen/utils.sh | 13 ++++++------- README.md | 2 +- chatgpt-retrieval-plugin/run.sh | 8 ++++---- docarray/run.sh | 9 +++++---- langchain-python/run.sh | 8 ++++---- langgraph-python/run.sh | 8 ++++---- llama-index-python-kvstore/run.sh | 8 ++++---- llama-index-python-vectorstore/run.sh | 8 ++++---- semantic-kernel-csharp/run.sh | 10 +++++----- semantic-kernel-python/run.sh | 8 ++++---- 10 files changed, 41 insertions(+), 41 deletions(-) diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index 885d4f6..01dfa55 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -68,7 +68,7 @@ setup_local_atlas() { IMAGE=artifactory.corp.mongodb.com/dockerhub/mongodb/mongodb-atlas-local:latest retry podman pull $IMAGE - CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" mongodb/mongodb-atlas-local:latest) + CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" $IMAGE) echo "waiting for container to become healthy..." function wait() { @@ -104,13 +104,13 @@ setup_local_atlas() { wait "$CONTAINER_ID" EXPOSED_PORT=$(podman inspect --format='{{ (index (index .NetworkSettings.Ports "27017/tcp") 0).HostPort }}' "$CONTAINER_ID") export CONN_STRING="mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" - # shellcheck disable=SC2154 - echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $workdir/src/.evergreen/.local_atlas_uri + SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) + echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $SCRIPT_DIR/.local_atlas_uri } fetch_local_atlas_uri() { - # shellcheck disable=SC2154 - . $workdir/src/.evergreen/.local_atlas_uri + SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) + . $SCRIPT_DIR/.local_atlas_uri export CONN_STRING=$CONN_STRING echo "$CONN_STRING" @@ -120,8 +120,7 @@ fetch_local_atlas_uri() { scaffold_atlas() { PYTHON_BINARY=$(find_python3) - # Should be called from src - EVERGREEN_PATH=$(pwd)/.evergreen + EVERGREEN_PATH=$(realpath $(dirname ${BASH_SOURCE[0]})) TARGET_DIR=$(pwd)/$DIR SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py diff --git a/README.md b/README.md index cd287f2..dce5c1d 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ The general layout of this repo looks like this: Each test subdirectory will automatically have its own local Atlas deployment. As a result, database and collection names will not conflict between different AI/ML integrations. To connect to your local Atlas using a connection string, `utils.sh` has a `fetch_local_atlas_uri` that you can call from the `run.sh` script within your subdirectory. For example: ```bash -. $workdir/src/.evergreen/utils.sh +. .evergreen/utils.sh CONN_STRING=$(fetch_local_atlas_uri) ``` diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index 9927cc5..b36d34b 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -5,11 +5,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/docarray/run.sh b/docarray/run.sh index 3477b5a..6563f71 100644 --- a/docarray/run.sh +++ b/docarray/run.sh @@ -6,11 +6,12 @@ set -eu # Get the MONGODB_URI. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/env.sh + +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/langchain-python/run.sh b/langchain-python/run.sh index 2500f8a..a95aeb7 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -4,11 +4,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) diff --git a/langgraph-python/run.sh b/langgraph-python/run.sh index 94408de..e466784 100644 --- a/langgraph-python/run.sh +++ b/langgraph-python/run.sh @@ -4,11 +4,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index ca5e2db..10c87b6 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -3,11 +3,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index 1a517eb..cc63836 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -3,11 +3,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index c88b9a9..4bc9ff2 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -3,12 +3,12 @@ set -eu # Get the MONGODB_URI. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh -# WORKING_DIR = src/semantic-kernel-csharp/semantic-kernel +. $SCRIPT_DIR/utils.sh +# WORKING_DIR = $ROOT_DIR/semantic-kernel-csharp/semantic-kernel # Install .NET DOTNET_SDK_PATH=./.dotnet diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index fa5a16e..b155f5b 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -3,11 +3,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $SCRIPT_DIR/utils.sh PYTHON_BINARY=$(find_python3) From f5c14d202143d3bf19208984173f570f6802b809 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 10:11:39 -0500 Subject: [PATCH 02/10] fix dir path --- chatgpt-retrieval-plugin/run.sh | 2 +- docarray/run.sh | 3 ++- langchain-python/run.sh | 2 +- langgraph-python/run.sh | 2 +- llama-index-python-kvstore/run.sh | 2 +- llama-index-python-vectorstore/run.sh | 2 +- semantic-kernel-csharp/run.sh | 3 ++- semantic-kernel-python/run.sh | 2 +- 8 files changed, 10 insertions(+), 8 deletions(-) diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index b36d34b..ddb58ca 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -9,7 +9,7 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/docarray/run.sh b/docarray/run.sh index 6563f71..30ed396 100644 --- a/docarray/run.sh +++ b/docarray/run.sh @@ -11,7 +11,8 @@ ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh + PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/langchain-python/run.sh b/langchain-python/run.sh index a95aeb7..177be62 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -8,7 +8,7 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) diff --git a/langgraph-python/run.sh b/langgraph-python/run.sh index e466784..8d43e5c 100644 --- a/langgraph-python/run.sh +++ b/langgraph-python/run.sh @@ -8,7 +8,7 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index 10c87b6..dd09c43 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -7,7 +7,7 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index cc63836..6eeb68e 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -7,7 +7,7 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index 4bc9ff2..c48d656 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -7,7 +7,8 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh + # WORKING_DIR = $ROOT_DIR/semantic-kernel-csharp/semantic-kernel # Install .NET diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index b155f5b..25cd0f2 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -7,7 +7,7 @@ SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh -. $SCRIPT_DIR/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) From c274376ef5a119d532faedf3d77a74bea0036925 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 10:48:08 -0500 Subject: [PATCH 03/10] finish local testing --- .evergreen/.local_atlas_uri | 1 + .evergreen/config.yml | 47 +++-------------------- .evergreen/execute-tests.sh | 16 ++++++++ .evergreen/fetch-repo.sh | 25 ++++++++++++ .evergreen/fetch-secrets.sh | 2 +- .evergreen/provision-atlas.sh | 7 ++++ .gitignore | 1 + README.md | 15 ++++++++ chatgpt-retrieval-plugin/config.env | 3 ++ docarray/config.env | 3 ++ langchain-python/config.env | 3 ++ langgraph-python/config.env | 3 ++ llama-index-python-kvstore/config.env | 4 ++ llama-index-python-vectorstore/config.env | 3 ++ semantic-kernel-csharp/config.env | 3 ++ semantic-kernel-python/config.env | 3 ++ 16 files changed, 96 insertions(+), 43 deletions(-) create mode 100644 .evergreen/.local_atlas_uri create mode 100644 .evergreen/execute-tests.sh create mode 100644 .evergreen/fetch-repo.sh create mode 100644 chatgpt-retrieval-plugin/config.env create mode 100644 docarray/config.env create mode 100644 langchain-python/config.env create mode 100644 langgraph-python/config.env create mode 100644 llama-index-python-kvstore/config.env create mode 100644 llama-index-python-vectorstore/config.env create mode 100644 semantic-kernel-csharp/config.env create mode 100644 semantic-kernel-python/config.env diff --git a/.evergreen/.local_atlas_uri b/.evergreen/.local_atlas_uri new file mode 100644 index 0000000..ae2ffff --- /dev/null +++ b/.evergreen/.local_atlas_uri @@ -0,0 +1 @@ +CONN_STRING=mongodb://127.0.0.1:39239/?directConnection=true diff --git a/.evergreen/config.yml b/.evergreen/config.yml index de73c5b..089c918 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -37,33 +37,21 @@ functions: args: [.evergreen/fetch-secrets.sh] "fetch repo": - - command: shell.exec + - command: subprocess.exec type: setup params: working_dir: "src" - script: | - if [ ! -d "${DIR}" ]; then - echo '${REPO_NAME} could not be found' 1>&2 - exit 1 - fi - # Apply patches to upstream repo if desired. - cd ${DIR} - git clone ${CLONE_URL} - if [ -d "patches" ]; then - cd ${REPO_NAME} - echo "Applying patches." - git apply ../patches/* - fi + binary: bash + args: [.evergreen/fetch-repo.sh] "execute tests": - command: subprocess.exec type: test params: add_expansions_to_env: true - working_dir: "src/${DIR}/${REPO_NAME}" + working_dir: "src" binary: bash - args: - - ../run.sh + args: [.evergreen/execute-tests.sh] "setup local atlas": - command: subprocess.exec @@ -194,10 +182,6 @@ buildvariants: display_name: LlamaIndex RHEL KV Store expansions: DIR: llama-index-python-kvstore - REPO_NAME: llama_index - # TODO - Update CLONE_URL: [PYTHON-4522] [INTPYTHON-326] - CLONE_URL: -b PYTHON-4522 --single-branch https://github.com/shruti-sridhar/llama_index.git - DATABASE: llama_index_test_db run_on: - rhel87-small tasks: @@ -209,9 +193,6 @@ buildvariants: display_name: Semantic-Kernel RHEL Python expansions: DIR: semantic-kernel-python - REPO_NAME: semantic-kernel - CLONE_URL: https://github.com/microsoft/semantic-kernel.git - DATABASE: pyMSKTest run_on: - rhel87-small tasks: @@ -224,9 +205,6 @@ buildvariants: display_name: Semantic-Kernel RHEL CSharp expansions: DIR: semantic-kernel-csharp - REPO_NAME: semantic-kernel - CLONE_URL: https://github.com/microsoft/semantic-kernel.git - DATABASE: dotnetMSKNearestTest run_on: - rhel87-small tasks: @@ -238,9 +216,6 @@ buildvariants: display_name: Langchain RHEL Python expansions: DIR: langchain-python - REPO_NAME: langchain-mongodb - CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git - DATABASE: langchain_test_db run_on: - rhel87-small tasks: @@ -252,9 +227,6 @@ buildvariants: display_name: Langgraph RHEL Python expansions: DIR: langgraph-python - REPO_NAME: langchain-mongodb - CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git - DATABASE: langgraph-test run_on: - rhel87-small tasks: @@ -266,9 +238,6 @@ buildvariants: display_name: ChatGPT Retrieval Plugin expansions: DIR: chatgpt-retrieval-plugin - REPO_NAME: chatgpt-retrieval-plugin - CLONE_URL: https://github.com/openai/chatgpt-retrieval-plugin.git - DATABASE: chatgpt_retrieval_plugin_test_db run_on: - rhel87-small tasks: @@ -280,9 +249,6 @@ buildvariants: display_name: LlamaIndex RHEL Vector Store expansions: DIR: llama-index-python-vectorstore - REPO_NAME: llama_index - CLONE_URL: https://github.com/run-llama/llama_index.git - DATABASE: llama_index_test_db run_on: - rhel87-small tasks: @@ -295,9 +261,6 @@ buildvariants: display_name: DocArray RHEL expansions: DIR: docarray - REPO_NAME: docarray - CLONE_URL: https://github.com/docarray/docarray.git - DATABASE: docarray_test_db run_on: - rhel87-small tasks: diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh new file mode 100644 index 0000000..785e699 --- /dev/null +++ b/.evergreen/execute-tests.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -eu + +SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +ROOT_DIR=$(dirname $SCRIPT_DIR) + + +# Source the configuration. +cd ${ROOT_DIR}/${DIR} +set -a +source config.env +set +a + +cd ${REPO_NAME} +bash ${ROOT_DIR}/${DIR}/run.sh diff --git a/.evergreen/fetch-repo.sh b/.evergreen/fetch-repo.sh new file mode 100644 index 0000000..dc79878 --- /dev/null +++ b/.evergreen/fetch-repo.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eu + +if [ ! -d "${DIR}" ]; then + echo '${REPO_NAME} could not be found' 1>&2 + exit 1 +fi + +cd ${DIR} + +# Source the configuration. +set -a +source config.env +set +a + +rm -rf ${REPO_NAME} +git clone ${CLONE_URL} + +# Apply patches to upstream repo if desired. +if [ -d "patches" ]; then + cd ${REPO_NAME} + echo "Applying patches." + git apply ../patches/* +fi diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index 2ca2980..808e506 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -3,7 +3,7 @@ set -eu # Clone drivers-evergeen-tools. -git clone https://github.com/mongodb-labs/drivers-evergreen-tools +git clone https://github.com/mongodb-labs/drivers-evergreen-tools || true # Get the secrets for drivers/ai-ml-pipeline-testing. . drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index dc1e010..e46f97d 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -3,6 +3,13 @@ set -eu . .evergreen/utils.sh +# Source the config +pushd $DIR +set -a +. config.env +set +x +popd + setup_local_atlas scaffold_atlas diff --git a/.gitignore b/.gitignore index af6cdc2..7a2946b 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ xunit-results/ # Miscellaneous .DS_Store drivers-evergreen-tools +atlas # Secrets secrets-export.sh diff --git a/README.md b/README.md index dce5c1d..9169539 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,21 @@ CONN_STRING=$(fetch_local_atlas_uri) Stores the local Atlas URI within the `CONN_STRING` var. The script can then pass `CONN_STRING` as an environment variable to the test suite. +#### Running tests locally. + +We can run the tests with a local checkout of the repo. + +For example, to run the `docarray` tests using local atlas: + +```bash +export DIR=docarray +bash .evergreen/fetch-repo.sh +bash .evergreen/provision-atlas.sh +bash .evergreen/execute-tests.sh +``` + +Use `.evergreen/setup-remote.sh` instead of `.evergreen/provision-atlas.sh` to test against the remote cluster. + #### Pre-populating the Local Atlas Deployment You can pre-populate a test's local Atlas deployment before running the `run.sh` script by providing JSON files in the optional `database` directory of the created subdirectory. The `.evergreen/scaffold_atlas.py` file will search for every JSON file within this database directory and upload the documents to the database provided by the `DATABASE` expansion provided in the build variant of the `.evergreen/config.yml` setup. The collection the script uploads to is based on the name of your JSON file: diff --git a/chatgpt-retrieval-plugin/config.env b/chatgpt-retrieval-plugin/config.env new file mode 100644 index 0000000..d45bb7d --- /dev/null +++ b/chatgpt-retrieval-plugin/config.env @@ -0,0 +1,3 @@ +REPO_NAME=chatgpt-retrieval-plugin +CLONE_URL="https://github.com/openai/chatgpt-retrieval-plugin.git" +DATABASE=chatgpt_retrieval_plugin_test_db diff --git a/docarray/config.env b/docarray/config.env new file mode 100644 index 0000000..c18451c --- /dev/null +++ b/docarray/config.env @@ -0,0 +1,3 @@ +REPO_NAME=docarray +CLONE_URL="https://github.com/docarray/docarray.git" +DATABASE=docarray_test_db diff --git a/langchain-python/config.env b/langchain-python/config.env new file mode 100644 index 0000000..5470511 --- /dev/null +++ b/langchain-python/config.env @@ -0,0 +1,3 @@ +REPO_NAME=langchain-mongodb +CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +DATABASE=langchain_test_db diff --git a/langgraph-python/config.env b/langgraph-python/config.env new file mode 100644 index 0000000..8ebc827 --- /dev/null +++ b/langgraph-python/config.env @@ -0,0 +1,3 @@ +REPO_NAME=langchain-mongodb +CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +DATABASE=langgraph-test diff --git a/llama-index-python-kvstore/config.env b/llama-index-python-kvstore/config.env new file mode 100644 index 0000000..bac5076 --- /dev/null +++ b/llama-index-python-kvstore/config.env @@ -0,0 +1,4 @@ +REPO_NAME=llama_index +# TODO - Update CLONE_URL: [PYTHON-4522] [INTPYTHON-326] +CLONE_URL=" -b PYTHON-4522 --single-branch https://github.com/shruti-sridhar/llama_index.git" +DATABASE=llama_index_test_db diff --git a/llama-index-python-vectorstore/config.env b/llama-index-python-vectorstore/config.env new file mode 100644 index 0000000..e4e5c83 --- /dev/null +++ b/llama-index-python-vectorstore/config.env @@ -0,0 +1,3 @@ +REPO_NAME=llama_index +CLONE_URL="https://github.com/run-llama/llama_index.git" +DATABASE=llama_index_test_db diff --git a/semantic-kernel-csharp/config.env b/semantic-kernel-csharp/config.env new file mode 100644 index 0000000..a784ac4 --- /dev/null +++ b/semantic-kernel-csharp/config.env @@ -0,0 +1,3 @@ +REPO_NAME=semantic-kernel +CLONE_URL="https://github.com/microsoft/semantic-kernel.git" +DATABASE=dotnetMSKNearestTest diff --git a/semantic-kernel-python/config.env b/semantic-kernel-python/config.env new file mode 100644 index 0000000..0487af5 --- /dev/null +++ b/semantic-kernel-python/config.env @@ -0,0 +1,3 @@ +REPO_NAME=semantic-kernel +CLONE_URL="https://github.com/microsoft/semantic-kernel.git" +DATABASE=pyMSKTest From fb7cf94320a6ca0c7fbb17d9e9ef118a5691d174 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 10:49:43 -0500 Subject: [PATCH 04/10] fix env handling --- .evergreen/config.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 089c918..205c62c 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -40,6 +40,7 @@ functions: - command: subprocess.exec type: setup params: + include_expansions_in_env: [DIR] working_dir: "src" binary: bash args: [.evergreen/fetch-repo.sh] @@ -48,7 +49,7 @@ functions: - command: subprocess.exec type: test params: - add_expansions_to_env: true + include_expansions_in_env: [DIR] working_dir: "src" binary: bash args: [.evergreen/execute-tests.sh] @@ -58,7 +59,7 @@ functions: type: setup retry_on_failure: true params: - add_expansions_to_env: true + include_expansions_in_env: [DIR] working_dir: "src" binary: bash args: @@ -68,7 +69,7 @@ functions: - command: subprocess.exec type: setup params: - add_expansions_to_env: true + include_expansions_in_env: [DIR] working_dir: "src" binary: bash args: [.evergreen/setup-remote.sh] From 0efad16a0adaf1a93dd2ea92b33ffa37b4aae45b Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 10:52:42 -0500 Subject: [PATCH 05/10] fix remote handling --- .evergreen/setup-remote.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index e1a802a..1f77a0c 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -8,6 +8,13 @@ if [ -z "${DIR:-}" ]; then exit 1 fi +# Source the config +pushd $DIR +set -a +. config.env +set +x +popd + # Get the correct remote URI. case $DIR in llama-index-python-kvstore) From 31a61458094d58705cc7f692d427a400018fca6e Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 10:56:16 -0500 Subject: [PATCH 06/10] test INTPYTHON-447-8 branch --- langgraph-python/config.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langgraph-python/config.env b/langgraph-python/config.env index 8ebc827..ad15215 100644 --- a/langgraph-python/config.env +++ b/langgraph-python/config.env @@ -1,3 +1,3 @@ REPO_NAME=langchain-mongodb -CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +CLONE_URL="--branch INTPYTHON-447-8 https://github.com/blink1073/langchain-mongodb.git" DATABASE=langgraph-test From 33300ba3376043eccd4fc652c14d50c64ba7283e Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 11:08:10 -0500 Subject: [PATCH 07/10] Revert "test INTPYTHON-447-8 branch" This reverts commit 31a61458094d58705cc7f692d427a400018fca6e. --- langgraph-python/config.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langgraph-python/config.env b/langgraph-python/config.env index ad15215..8ebc827 100644 --- a/langgraph-python/config.env +++ b/langgraph-python/config.env @@ -1,3 +1,3 @@ REPO_NAME=langchain-mongodb -CLONE_URL="--branch INTPYTHON-447-8 https://github.com/blink1073/langchain-mongodb.git" +CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" DATABASE=langgraph-test From 2f3c3dc033a229efb15841e0054d06c59895fb63 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 11:11:50 -0500 Subject: [PATCH 08/10] ignore .evergreen/.local_atlas_uri --- .evergreen/.local_atlas_uri | 1 - .gitignore | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 .evergreen/.local_atlas_uri diff --git a/.evergreen/.local_atlas_uri b/.evergreen/.local_atlas_uri deleted file mode 100644 index ae2ffff..0000000 --- a/.evergreen/.local_atlas_uri +++ /dev/null @@ -1 +0,0 @@ -CONN_STRING=mongodb://127.0.0.1:39239/?directConnection=true diff --git a/.gitignore b/.gitignore index 7a2946b..6e63670 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ xunit-results/ .DS_Store drivers-evergreen-tools atlas +.evergreen/.local_atlas_uri # Secrets secrets-export.sh From d9a4c66bb33b9b274625abfa588fa45d5a2d5c0e Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 11:14:51 -0500 Subject: [PATCH 09/10] update readme --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9169539..b24f68a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ Each subdirectory is scoped to run only one AI/ML integration's suite of tests f Within each subdirectory you should expect to have: - `run.sh` -- A script that should handle any additional library installations and steps for executing the test suite. This script should not populate the Atlas database with any required test data. +- `config.env` - A file that defines the following environment variables: + - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned + - `CLONE_URL` -- The Github URL to clone into the specified `DIR` + - `DATABASE` -- The optional database where the Atlas CLI will load your index configs - `database/` -- An optional directory used by `.evergreen/scaffold_atlas.py` to populate a MongoDB database with test data. Only provide this if your tests require pre-populated data. - `database/{collection}.json` -- An optional JSON file containing one or more MongoDB documents that will be uploaded to `$DATABASE.{collection}` in the local Atlas instance. Only provide this if your tests require pre-populated data. - `indexConfig.json` -- An optional file containing configuration for a specified Atlas Search Index. @@ -40,12 +44,15 @@ The general layout of this repo looks like this: │ │ └── furthestSearch.json # Populates $DATABASE.furthestSearch │ ├── indexes # Optional Index definitions directory │ │ └── indexConfig.json # Optional Search index definition +| ├── config.env # Configuration file │ └── run.sh # Script that executes test +| ├── semantic-kernel-python # Folder scoped for one Integration │ ├── database # Optional database definition │ │ └── nearestSearch.json # Populates $DATABASE.nearestSearch │ │ └── furthestSearch.json # Populates $DATABASE.furthestSearch │ ├── indexConfig.json # Creates Search Index on $DATABASE +| ├── config.env # Configuration file │ └── run.sh # Script that executes test ``` @@ -97,9 +104,6 @@ Test execution flow is defined in `.evergreen/config.yml`. The test pipeline's c - [`expansions`](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Configuration-Files/#expansions) -- Build variant specific variables. Expansions that need to be maintained as secrets should be stored in [the Evergreen project settings](https://spruce.mongodb.com/project/ai-ml-pipeline-testing/settings/variables) using [variables](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-and-Distro-Settings#variables). Some common expansions needed are: - `DIR` -- The subdirectory where the tasks will run - - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned - - `CLONE_URL` -- The Github URL to clone into the specified `DIR` - - `DATABASE` -- The optional database where the Atlas CLI will load your index configs - `run_on` -- Specified platform to run on. `rhel87-small` should be used by default. Any other distro may fail Atlas CLI setup. - `tasks` -- Tasks to run. See below for more details From e1a77b558fa51c9cf1f5ee5c8815d2ca5baade2b Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 11 Dec 2024 11:48:01 -0500 Subject: [PATCH 10/10] lint --- .evergreen/execute-tests.sh | 2 +- .evergreen/utils.sh | 6 +++--- chatgpt-retrieval-plugin/run.sh | 4 ++-- docarray/run.sh | 4 ++-- langchain-python/run.sh | 2 +- langgraph-python/run.sh | 2 +- llama-index-python-kvstore/run.sh | 4 ++-- llama-index-python-vectorstore/run.sh | 4 ++-- semantic-kernel-csharp/run.sh | 2 +- semantic-kernel-python/run.sh | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index 785e699..130d680 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -2,7 +2,7 @@ set -eu -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index 01dfa55..3a2398d 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -104,12 +104,12 @@ setup_local_atlas() { wait "$CONTAINER_ID" EXPOSED_PORT=$(podman inspect --format='{{ (index (index .NetworkSettings.Ports "27017/tcp") 0).HostPort }}' "$CONTAINER_ID") export CONN_STRING="mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" - SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) + SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $SCRIPT_DIR/.local_atlas_uri } fetch_local_atlas_uri() { - SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) + SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") . $SCRIPT_DIR/.local_atlas_uri export CONN_STRING=$CONN_STRING @@ -120,7 +120,7 @@ fetch_local_atlas_uri() { scaffold_atlas() { PYTHON_BINARY=$(find_python3) - EVERGREEN_PATH=$(realpath $(dirname ${BASH_SOURCE[0]})) + EVERGREEN_PATH=$(realpath "$(dirname ${BASH_SOURCE[0]})") TARGET_DIR=$(pwd)/$DIR SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index ddb58ca..5ddd7bf 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -1,11 +1,11 @@ -#!/bin/sh +#!/bin/bash # chat-gpt-retrieval-plugin is a poetry run project set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/docarray/run.sh b/docarray/run.sh index 30ed396..7db31b9 100644 --- a/docarray/run.sh +++ b/docarray/run.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Sets up a virtual environment (poetry) # Runs the mongodb tests of the upstream repo @@ -6,7 +6,7 @@ set -eu # Get the MONGODB_URI. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/langchain-python/run.sh b/langchain-python/run.sh index 177be62..b379f2d 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -4,7 +4,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/langgraph-python/run.sh b/langgraph-python/run.sh index 8d43e5c..9864c97 100644 --- a/langgraph-python/run.sh +++ b/langgraph-python/run.sh @@ -4,7 +4,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index dd09c43..1f31df4 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -1,9 +1,9 @@ -#!/bin/sh +#!/bin/bash set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index 6eeb68e..524ba5c 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -1,9 +1,9 @@ -#!/bin/sh +#!/bin/bash set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index c48d656..2f15b68 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -3,7 +3,7 @@ set -eu # Get the MONGODB_URI. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index 25cd0f2..13716c1 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -3,7 +3,7 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE[0]})) +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") ROOT_DIR=$(dirname $SCRIPT_DIR) . $ROOT_DIR/env.sh