diff --git a/.evergreen/config.yml b/.evergreen/config.yml index de73c5b..205c62c 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -37,40 +37,29 @@ functions: args: [.evergreen/fetch-secrets.sh] "fetch repo": - - command: shell.exec + - command: subprocess.exec type: setup params: + include_expansions_in_env: [DIR] working_dir: "src" - script: | - if [ ! -d "${DIR}" ]; then - echo '${REPO_NAME} could not be found' 1>&2 - exit 1 - fi - # Apply patches to upstream repo if desired. - cd ${DIR} - git clone ${CLONE_URL} - if [ -d "patches" ]; then - cd ${REPO_NAME} - echo "Applying patches." - git apply ../patches/* - fi + binary: bash + args: [.evergreen/fetch-repo.sh] "execute tests": - command: subprocess.exec type: test params: - add_expansions_to_env: true - working_dir: "src/${DIR}/${REPO_NAME}" + include_expansions_in_env: [DIR] + working_dir: "src" binary: bash - args: - - ../run.sh + args: [.evergreen/execute-tests.sh] "setup local atlas": - command: subprocess.exec type: setup retry_on_failure: true params: - add_expansions_to_env: true + include_expansions_in_env: [DIR] working_dir: "src" binary: bash args: @@ -80,7 +69,7 @@ functions: - command: subprocess.exec type: setup params: - add_expansions_to_env: true + include_expansions_in_env: [DIR] working_dir: "src" binary: bash args: [.evergreen/setup-remote.sh] @@ -194,10 +183,6 @@ buildvariants: display_name: LlamaIndex RHEL KV Store expansions: DIR: llama-index-python-kvstore - REPO_NAME: llama_index - # TODO - Update CLONE_URL: [PYTHON-4522] [INTPYTHON-326] - CLONE_URL: -b PYTHON-4522 --single-branch https://github.com/shruti-sridhar/llama_index.git - DATABASE: llama_index_test_db run_on: - rhel87-small tasks: @@ -209,9 +194,6 @@ buildvariants: display_name: Semantic-Kernel RHEL Python expansions: DIR: semantic-kernel-python - REPO_NAME: semantic-kernel - CLONE_URL: https://github.com/microsoft/semantic-kernel.git - DATABASE: pyMSKTest run_on: - rhel87-small tasks: @@ -224,9 +206,6 @@ buildvariants: display_name: Semantic-Kernel RHEL CSharp expansions: DIR: semantic-kernel-csharp - REPO_NAME: semantic-kernel - CLONE_URL: https://github.com/microsoft/semantic-kernel.git - DATABASE: dotnetMSKNearestTest run_on: - rhel87-small tasks: @@ -238,9 +217,6 @@ buildvariants: display_name: Langchain RHEL Python expansions: DIR: langchain-python - REPO_NAME: langchain-mongodb - CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git - DATABASE: langchain_test_db run_on: - rhel87-small tasks: @@ -252,9 +228,6 @@ buildvariants: display_name: Langgraph RHEL Python expansions: DIR: langgraph-python - REPO_NAME: langchain-mongodb - CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git - DATABASE: langgraph-test run_on: - rhel87-small tasks: @@ -266,9 +239,6 @@ buildvariants: display_name: ChatGPT Retrieval Plugin expansions: DIR: chatgpt-retrieval-plugin - REPO_NAME: chatgpt-retrieval-plugin - CLONE_URL: https://github.com/openai/chatgpt-retrieval-plugin.git - DATABASE: chatgpt_retrieval_plugin_test_db run_on: - rhel87-small tasks: @@ -280,9 +250,6 @@ buildvariants: display_name: LlamaIndex RHEL Vector Store expansions: DIR: llama-index-python-vectorstore - REPO_NAME: llama_index - CLONE_URL: https://github.com/run-llama/llama_index.git - DATABASE: llama_index_test_db run_on: - rhel87-small tasks: @@ -295,9 +262,6 @@ buildvariants: display_name: DocArray RHEL expansions: DIR: docarray - REPO_NAME: docarray - CLONE_URL: https://github.com/docarray/docarray.git - DATABASE: docarray_test_db run_on: - rhel87-small tasks: diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh new file mode 100644 index 0000000..130d680 --- /dev/null +++ b/.evergreen/execute-tests.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -eu + +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) + + +# Source the configuration. +cd ${ROOT_DIR}/${DIR} +set -a +source config.env +set +a + +cd ${REPO_NAME} +bash ${ROOT_DIR}/${DIR}/run.sh diff --git a/.evergreen/fetch-repo.sh b/.evergreen/fetch-repo.sh new file mode 100644 index 0000000..dc79878 --- /dev/null +++ b/.evergreen/fetch-repo.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eu + +if [ ! -d "${DIR}" ]; then + echo '${REPO_NAME} could not be found' 1>&2 + exit 1 +fi + +cd ${DIR} + +# Source the configuration. +set -a +source config.env +set +a + +rm -rf ${REPO_NAME} +git clone ${CLONE_URL} + +# Apply patches to upstream repo if desired. +if [ -d "patches" ]; then + cd ${REPO_NAME} + echo "Applying patches." + git apply ../patches/* +fi diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index 2ca2980..808e506 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -3,7 +3,7 @@ set -eu # Clone drivers-evergeen-tools. -git clone https://github.com/mongodb-labs/drivers-evergreen-tools +git clone https://github.com/mongodb-labs/drivers-evergreen-tools || true # Get the secrets for drivers/ai-ml-pipeline-testing. . drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index dc1e010..e46f97d 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -3,6 +3,13 @@ set -eu . .evergreen/utils.sh +# Source the config +pushd $DIR +set -a +. config.env +set +x +popd + setup_local_atlas scaffold_atlas diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh index e1a802a..1f77a0c 100644 --- a/.evergreen/setup-remote.sh +++ b/.evergreen/setup-remote.sh @@ -8,6 +8,13 @@ if [ -z "${DIR:-}" ]; then exit 1 fi +# Source the config +pushd $DIR +set -a +. config.env +set +x +popd + # Get the correct remote URI. case $DIR in llama-index-python-kvstore) diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index 885d4f6..3a2398d 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -68,7 +68,7 @@ setup_local_atlas() { IMAGE=artifactory.corp.mongodb.com/dockerhub/mongodb/mongodb-atlas-local:latest retry podman pull $IMAGE - CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" mongodb/mongodb-atlas-local:latest) + CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" $IMAGE) echo "waiting for container to become healthy..." function wait() { @@ -104,13 +104,13 @@ setup_local_atlas() { wait "$CONTAINER_ID" EXPOSED_PORT=$(podman inspect --format='{{ (index (index .NetworkSettings.Ports "27017/tcp") 0).HostPort }}' "$CONTAINER_ID") export CONN_STRING="mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" - # shellcheck disable=SC2154 - echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $workdir/src/.evergreen/.local_atlas_uri + SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") + echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $SCRIPT_DIR/.local_atlas_uri } fetch_local_atlas_uri() { - # shellcheck disable=SC2154 - . $workdir/src/.evergreen/.local_atlas_uri + SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") + . $SCRIPT_DIR/.local_atlas_uri export CONN_STRING=$CONN_STRING echo "$CONN_STRING" @@ -120,8 +120,7 @@ fetch_local_atlas_uri() { scaffold_atlas() { PYTHON_BINARY=$(find_python3) - # Should be called from src - EVERGREEN_PATH=$(pwd)/.evergreen + EVERGREEN_PATH=$(realpath "$(dirname ${BASH_SOURCE[0]})") TARGET_DIR=$(pwd)/$DIR SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py diff --git a/.gitignore b/.gitignore index af6cdc2..6e63670 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,8 @@ xunit-results/ # Miscellaneous .DS_Store drivers-evergreen-tools +atlas +.evergreen/.local_atlas_uri # Secrets secrets-export.sh diff --git a/README.md b/README.md index cd287f2..b24f68a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ Each subdirectory is scoped to run only one AI/ML integration's suite of tests f Within each subdirectory you should expect to have: - `run.sh` -- A script that should handle any additional library installations and steps for executing the test suite. This script should not populate the Atlas database with any required test data. +- `config.env` - A file that defines the following environment variables: + - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned + - `CLONE_URL` -- The Github URL to clone into the specified `DIR` + - `DATABASE` -- The optional database where the Atlas CLI will load your index configs - `database/` -- An optional directory used by `.evergreen/scaffold_atlas.py` to populate a MongoDB database with test data. Only provide this if your tests require pre-populated data. - `database/{collection}.json` -- An optional JSON file containing one or more MongoDB documents that will be uploaded to `$DATABASE.{collection}` in the local Atlas instance. Only provide this if your tests require pre-populated data. - `indexConfig.json` -- An optional file containing configuration for a specified Atlas Search Index. @@ -40,12 +44,15 @@ The general layout of this repo looks like this: │ │ └── furthestSearch.json # Populates $DATABASE.furthestSearch │ ├── indexes # Optional Index definitions directory │ │ └── indexConfig.json # Optional Search index definition +| ├── config.env # Configuration file │ └── run.sh # Script that executes test +| ├── semantic-kernel-python # Folder scoped for one Integration │ ├── database # Optional database definition │ │ └── nearestSearch.json # Populates $DATABASE.nearestSearch │ │ └── furthestSearch.json # Populates $DATABASE.furthestSearch │ ├── indexConfig.json # Creates Search Index on $DATABASE +| ├── config.env # Configuration file │ └── run.sh # Script that executes test ``` @@ -54,13 +61,28 @@ The general layout of this repo looks like this: Each test subdirectory will automatically have its own local Atlas deployment. As a result, database and collection names will not conflict between different AI/ML integrations. To connect to your local Atlas using a connection string, `utils.sh` has a `fetch_local_atlas_uri` that you can call from the `run.sh` script within your subdirectory. For example: ```bash -. $workdir/src/.evergreen/utils.sh +. .evergreen/utils.sh CONN_STRING=$(fetch_local_atlas_uri) ``` Stores the local Atlas URI within the `CONN_STRING` var. The script can then pass `CONN_STRING` as an environment variable to the test suite. +#### Running tests locally. + +We can run the tests with a local checkout of the repo. + +For example, to run the `docarray` tests using local atlas: + +```bash +export DIR=docarray +bash .evergreen/fetch-repo.sh +bash .evergreen/provision-atlas.sh +bash .evergreen/execute-tests.sh +``` + +Use `.evergreen/setup-remote.sh` instead of `.evergreen/provision-atlas.sh` to test against the remote cluster. + #### Pre-populating the Local Atlas Deployment You can pre-populate a test's local Atlas deployment before running the `run.sh` script by providing JSON files in the optional `database` directory of the created subdirectory. The `.evergreen/scaffold_atlas.py` file will search for every JSON file within this database directory and upload the documents to the database provided by the `DATABASE` expansion provided in the build variant of the `.evergreen/config.yml` setup. The collection the script uploads to is based on the name of your JSON file: @@ -82,9 +104,6 @@ Test execution flow is defined in `.evergreen/config.yml`. The test pipeline's c - [`expansions`](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Configuration-Files/#expansions) -- Build variant specific variables. Expansions that need to be maintained as secrets should be stored in [the Evergreen project settings](https://spruce.mongodb.com/project/ai-ml-pipeline-testing/settings/variables) using [variables](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-and-Distro-Settings#variables). Some common expansions needed are: - `DIR` -- The subdirectory where the tasks will run - - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned - - `CLONE_URL` -- The Github URL to clone into the specified `DIR` - - `DATABASE` -- The optional database where the Atlas CLI will load your index configs - `run_on` -- Specified platform to run on. `rhel87-small` should be used by default. Any other distro may fail Atlas CLI setup. - `tasks` -- Tasks to run. See below for more details diff --git a/chatgpt-retrieval-plugin/config.env b/chatgpt-retrieval-plugin/config.env new file mode 100644 index 0000000..d45bb7d --- /dev/null +++ b/chatgpt-retrieval-plugin/config.env @@ -0,0 +1,3 @@ +REPO_NAME=chatgpt-retrieval-plugin +CLONE_URL="https://github.com/openai/chatgpt-retrieval-plugin.git" +DATABASE=chatgpt_retrieval_plugin_test_db diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh index 9927cc5..5ddd7bf 100644 --- a/chatgpt-retrieval-plugin/run.sh +++ b/chatgpt-retrieval-plugin/run.sh @@ -1,15 +1,15 @@ -#!/bin/sh +#!/bin/bash # chat-gpt-retrieval-plugin is a poetry run project set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/docarray/config.env b/docarray/config.env new file mode 100644 index 0000000..c18451c --- /dev/null +++ b/docarray/config.env @@ -0,0 +1,3 @@ +REPO_NAME=docarray +CLONE_URL="https://github.com/docarray/docarray.git" +DATABASE=docarray_test_db diff --git a/docarray/run.sh b/docarray/run.sh index 3477b5a..7db31b9 100644 --- a/docarray/run.sh +++ b/docarray/run.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Sets up a virtual environment (poetry) # Runs the mongodb tests of the upstream repo @@ -6,11 +6,13 @@ set -eu # Get the MONGODB_URI. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) + +. $ROOT_DIR/env.sh + +. $ROOT_DIR/.evergreen/utils.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/langchain-python/config.env b/langchain-python/config.env new file mode 100644 index 0000000..5470511 --- /dev/null +++ b/langchain-python/config.env @@ -0,0 +1,3 @@ +REPO_NAME=langchain-mongodb +CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +DATABASE=langchain_test_db diff --git a/langchain-python/run.sh b/langchain-python/run.sh index 2500f8a..b379f2d 100644 --- a/langchain-python/run.sh +++ b/langchain-python/run.sh @@ -4,11 +4,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) diff --git a/langgraph-python/config.env b/langgraph-python/config.env new file mode 100644 index 0000000..8ebc827 --- /dev/null +++ b/langgraph-python/config.env @@ -0,0 +1,3 @@ +REPO_NAME=langchain-mongodb +CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git" +DATABASE=langgraph-test diff --git a/langgraph-python/run.sh b/langgraph-python/run.sh index 94408de..9864c97 100644 --- a/langgraph-python/run.sh +++ b/langgraph-python/run.sh @@ -4,11 +4,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) diff --git a/llama-index-python-kvstore/config.env b/llama-index-python-kvstore/config.env new file mode 100644 index 0000000..bac5076 --- /dev/null +++ b/llama-index-python-kvstore/config.env @@ -0,0 +1,4 @@ +REPO_NAME=llama_index +# TODO - Update CLONE_URL: [PYTHON-4522] [INTPYTHON-326] +CLONE_URL=" -b PYTHON-4522 --single-branch https://github.com/shruti-sridhar/llama_index.git" +DATABASE=llama_index_test_db diff --git a/llama-index-python-kvstore/run.sh b/llama-index-python-kvstore/run.sh index ca5e2db..1f31df4 100644 --- a/llama-index-python-kvstore/run.sh +++ b/llama-index-python-kvstore/run.sh @@ -1,13 +1,13 @@ -#!/bin/sh +#!/bin/bash set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/llama-index-python-vectorstore/config.env b/llama-index-python-vectorstore/config.env new file mode 100644 index 0000000..e4e5c83 --- /dev/null +++ b/llama-index-python-vectorstore/config.env @@ -0,0 +1,3 @@ +REPO_NAME=llama_index +CLONE_URL="https://github.com/run-llama/llama_index.git" +DATABASE=llama_index_test_db diff --git a/llama-index-python-vectorstore/run.sh b/llama-index-python-vectorstore/run.sh index 1a517eb..524ba5c 100644 --- a/llama-index-python-vectorstore/run.sh +++ b/llama-index-python-vectorstore/run.sh @@ -1,13 +1,13 @@ -#!/bin/sh +#!/bin/bash set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3) $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')" diff --git a/semantic-kernel-csharp/config.env b/semantic-kernel-csharp/config.env new file mode 100644 index 0000000..a784ac4 --- /dev/null +++ b/semantic-kernel-csharp/config.env @@ -0,0 +1,3 @@ +REPO_NAME=semantic-kernel +CLONE_URL="https://github.com/microsoft/semantic-kernel.git" +DATABASE=dotnetMSKNearestTest diff --git a/semantic-kernel-csharp/run.sh b/semantic-kernel-csharp/run.sh index c88b9a9..2f15b68 100644 --- a/semantic-kernel-csharp/run.sh +++ b/semantic-kernel-csharp/run.sh @@ -3,12 +3,13 @@ set -eu # Get the MONGODB_URI. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh -# WORKING_DIR = src/semantic-kernel-csharp/semantic-kernel +. $ROOT_DIR/.evergreen/utils.sh + +# WORKING_DIR = $ROOT_DIR/semantic-kernel-csharp/semantic-kernel # Install .NET DOTNET_SDK_PATH=./.dotnet diff --git a/semantic-kernel-python/config.env b/semantic-kernel-python/config.env new file mode 100644 index 0000000..0487af5 --- /dev/null +++ b/semantic-kernel-python/config.env @@ -0,0 +1,3 @@ +REPO_NAME=semantic-kernel +CLONE_URL="https://github.com/microsoft/semantic-kernel.git" +DATABASE=pyMSKTest diff --git a/semantic-kernel-python/run.sh b/semantic-kernel-python/run.sh index fa5a16e..13716c1 100644 --- a/semantic-kernel-python/run.sh +++ b/semantic-kernel-python/run.sh @@ -3,11 +3,11 @@ set -eu # Get the MONGODB_URI and OPENAI_API_KEY. -# shellcheck disable=SC2154 -. $workdir/src/env.sh +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname $SCRIPT_DIR) +. $ROOT_DIR/env.sh -# shellcheck disable=SC2154 -. $workdir/src/.evergreen/utils.sh +. $ROOT_DIR/.evergreen/utils.sh PYTHON_BINARY=$(find_python3)