mongodb-labs · blink1073 · Dec 16, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/.evergreen/config.yml b/.evergreen/config.yml
@@ -37,40 +37,29 @@ functions:
         args: [.evergreen/fetch-secrets.sh]
 
   "fetch repo":
-    - command: shell.exec
+    - command: subprocess.exec
       type: setup
       params:
+        include_expansions_in_env: [DIR]
         working_dir: "src"
-        script: |
-          if [ ! -d "${DIR}" ]; then
-            echo '${REPO_NAME} could not be found' 1>&2
-            exit 1
-          fi
-          # Apply patches to upstream repo if desired.
-          cd ${DIR}
-          git clone ${CLONE_URL}
-          if [ -d "patches" ]; then
-            cd ${REPO_NAME}
-            echo "Applying patches."
-            git apply  ../patches/*
-          fi
+        binary: bash
+        args: [.evergreen/fetch-repo.sh]
 
   "execute tests":
     - command: subprocess.exec
       type: test
       params:
-        add_expansions_to_env: true
-        working_dir: "src/${DIR}/${REPO_NAME}"
+        include_expansions_in_env: [DIR]
+        working_dir: "src"
         binary: bash
-        args:
-          - ../run.sh
+        args: [.evergreen/execute-tests.sh]
 
   "setup local atlas":
     - command: subprocess.exec
       type: setup
       retry_on_failure: true
       params:
-        add_expansions_to_env: true
+        include_expansions_in_env: [DIR]
         working_dir: "src"
         binary: bash
         args:
@@ -80,7 +69,7 @@ functions:
     - command: subprocess.exec
       type: setup
       params:
-        add_expansions_to_env: true
+        include_expansions_in_env: [DIR]
         working_dir: "src"
         binary: bash
         args: [.evergreen/setup-remote.sh]
@@ -194,10 +183,6 @@ buildvariants:
     display_name: LlamaIndex RHEL KV Store
     expansions:
       DIR: llama-index-python-kvstore
-      REPO_NAME: llama_index
-      # TODO - Update CLONE_URL: [PYTHON-4522] [INTPYTHON-326]
-      CLONE_URL: -b PYTHON-4522 --single-branch https://github.com/shruti-sridhar/llama_index.git
-      DATABASE: llama_index_test_db
     run_on:
       - rhel87-small
     tasks:
@@ -209,9 +194,6 @@ buildvariants:
     display_name: Semantic-Kernel RHEL Python
     expansions:
       DIR: semantic-kernel-python
-      REPO_NAME: semantic-kernel
-      CLONE_URL: https://github.com/microsoft/semantic-kernel.git
-      DATABASE: pyMSKTest
     run_on:
       - rhel87-small
     tasks:
@@ -224,9 +206,6 @@ buildvariants:
     display_name: Semantic-Kernel RHEL CSharp
     expansions:
       DIR: semantic-kernel-csharp
-      REPO_NAME: semantic-kernel
-      CLONE_URL: https://github.com/microsoft/semantic-kernel.git
-      DATABASE: dotnetMSKNearestTest
     run_on:
       - rhel87-small
     tasks:
@@ -238,9 +217,6 @@ buildvariants:
     display_name: Langchain RHEL Python
     expansions:
       DIR: langchain-python
-      REPO_NAME: langchain-mongodb
-      CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git
-      DATABASE: langchain_test_db
     run_on:
       - rhel87-small
     tasks:
@@ -252,9 +228,6 @@ buildvariants:
     display_name: Langgraph RHEL Python
     expansions:
       DIR: langgraph-python
-      REPO_NAME: langchain-mongodb
-      CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git
-      DATABASE: langgraph-test
     run_on:
       - rhel87-small
     tasks:
@@ -266,9 +239,6 @@ buildvariants:
     display_name: ChatGPT Retrieval Plugin
     expansions:
       DIR: chatgpt-retrieval-plugin
-      REPO_NAME: chatgpt-retrieval-plugin
-      CLONE_URL: https://github.com/openai/chatgpt-retrieval-plugin.git
-      DATABASE: chatgpt_retrieval_plugin_test_db
     run_on:
       - rhel87-small
     tasks:
@@ -280,9 +250,6 @@ buildvariants:
     display_name: LlamaIndex RHEL Vector Store
     expansions:
       DIR: llama-index-python-vectorstore
-      REPO_NAME: llama_index
-      CLONE_URL: https://github.com/run-llama/llama_index.git
-      DATABASE: llama_index_test_db
     run_on:
       - rhel87-small
     tasks:
@@ -295,9 +262,6 @@ buildvariants:
     display_name: DocArray RHEL
     expansions:
       DIR: docarray
-      REPO_NAME: docarray
-      CLONE_URL: https://github.com/docarray/docarray.git
-      DATABASE: docarray_test_db
     run_on:
       - rhel87-small
     tasks:

diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -eu
+
+SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+
+
+# Source the configuration.
+cd ${ROOT_DIR}/${DIR}
+set -a
+source config.env
+set +a
+
+cd ${REPO_NAME}
+bash ${ROOT_DIR}/${DIR}/run.sh
diff --git a/.evergreen/fetch-repo.sh b/.evergreen/fetch-repo.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -eu
+
+if [ ! -d "${DIR}" ]; then
+  echo '${REPO_NAME} could not be found' 1>&2
+  exit 1
+fi
+
+cd ${DIR}
+
+# Source the configuration.
+set -a
+source config.env
+set +a
+
+rm -rf ${REPO_NAME}
+git clone ${CLONE_URL}
+
+# Apply patches to upstream repo if desired.
+if [ -d "patches" ]; then
+  cd ${REPO_NAME}
+  echo "Applying patches."
+  git apply  ../patches/*
+fi
diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh
@@ -3,7 +3,7 @@
 set -eu
 
 # Clone drivers-evergeen-tools.
-git clone https://github.com/mongodb-labs/drivers-evergreen-tools
+git clone https://github.com/mongodb-labs/drivers-evergreen-tools || true
 
 # Get the secrets for drivers/ai-ml-pipeline-testing.
 . drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing
diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh
@@ -3,6 +3,13 @@ set -eu
 
 . .evergreen/utils.sh
 
+# Source the config
+pushd $DIR
+set -a
+. config.env
+set +x
+popd
+
 setup_local_atlas
 scaffold_atlas
 

diff --git a/.evergreen/setup-remote.sh b/.evergreen/setup-remote.sh
@@ -8,6 +8,13 @@ if [ -z "${DIR:-}" ]; then
     exit 1
 fi
 
+# Source the config
+pushd $DIR
+set -a
+. config.env
+set +x
+popd
+
 # Get the correct remote URI.
 case $DIR in
     llama-index-python-kvstore)

diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh
@@ -68,7 +68,7 @@ setup_local_atlas() {
     IMAGE=artifactory.corp.mongodb.com/dockerhub/mongodb/mongodb-atlas-local:latest
     retry podman pull $IMAGE
 
-    CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" mongodb/mongodb-atlas-local:latest)
+    CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" $IMAGE)
 
     echo "waiting for container to become healthy..."
     function wait() {
@@ -104,13 +104,13 @@ setup_local_atlas() {
     wait "$CONTAINER_ID"
     EXPOSED_PORT=$(podman inspect --format='{{ (index (index .NetworkSettings.Ports "27017/tcp") 0).HostPort }}' "$CONTAINER_ID")
     export CONN_STRING="mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true"
-    # shellcheck disable=SC2154
-    echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $workdir/src/.evergreen/.local_atlas_uri
+    SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
+    echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $SCRIPT_DIR/.local_atlas_uri
 }
 
 fetch_local_atlas_uri() {
-    # shellcheck disable=SC2154
-    . $workdir/src/.evergreen/.local_atlas_uri
+    SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
+    . $SCRIPT_DIR/.local_atlas_uri
 
     export CONN_STRING=$CONN_STRING
     echo "$CONN_STRING"
@@ -120,8 +120,7 @@ fetch_local_atlas_uri() {
 scaffold_atlas() {
     PYTHON_BINARY=$(find_python3)
 
-    # Should be called from src
-    EVERGREEN_PATH=$(pwd)/.evergreen
+    EVERGREEN_PATH=$(realpath "$(dirname ${BASH_SOURCE[0]})")
     TARGET_DIR=$(pwd)/$DIR
     SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py
 

diff --git a/.gitignore b/.gitignore
@@ -50,6 +50,8 @@ xunit-results/
 # Miscellaneous
 .DS_Store
 drivers-evergreen-tools
+atlas
+.evergreen/.local_atlas_uri
 
 # Secrets
 secrets-export.sh

diff --git a/README.md b/README.md
@@ -22,6 +22,10 @@ Each subdirectory is scoped to run only one AI/ML integration's suite of tests f
 Within each subdirectory you should expect to have:
 
 - `run.sh` -- A script that should handle any additional library installations and steps for executing the test suite. This script should not populate the Atlas database with any required test data.
+- `config.env` - A file that defines the following environment variables:
+  - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned
+  - `CLONE_URL` -- The Github URL to clone into the specified `DIR`
+  - `DATABASE` -- The optional database where the Atlas CLI will load your index configs
 - `database/` -- An optional directory used by `.evergreen/scaffold_atlas.py` to populate a MongoDB database with test data. Only provide this if your tests require pre-populated data.
 - `database/{collection}.json` -- An optional JSON file containing one or more MongoDB documents that will be uploaded to `$DATABASE.{collection}` in the local Atlas instance. Only provide this if your tests require pre-populated data.
 - `indexConfig.json` -- An optional file containing configuration for a specified Atlas Search Index.
@@ -40,12 +44,15 @@ The general layout of this repo looks like this:
 │   │   └── furthestSearch.json                 # Populates $DATABASE.furthestSearch
 │   ├── indexes                                 # Optional Index definitions directory
 │   │   └── indexConfig.json                    # Optional Search index definition
+|   ├── config.env                              # Configuration file
 │   └── run.sh                                  # Script that executes test
+|
 ├── semantic-kernel-python                      # Folder scoped for one Integration
 │   ├── database                                # Optional database definition
 │   │   └── nearestSearch.json                  # Populates $DATABASE.nearestSearch
 │   │   └── furthestSearch.json                 # Populates $DATABASE.furthestSearch
 │   ├── indexConfig.json                        # Creates Search Index on $DATABASE
+|   ├── config.env                              # Configuration file
 │   └── run.sh                                  # Script that executes test
 ```
 
@@ -54,13 +61,28 @@ The general layout of this repo looks like this:
 Each test subdirectory will automatically have its own local Atlas deployment. As a result, database and collection names will not conflict between different AI/ML integrations. To connect to your local Atlas using a connection string, `utils.sh` has a `fetch_local_atlas_uri` that you can call from the `run.sh` script within your subdirectory. For example:
 
 ```bash
-. $workdir/src/.evergreen/utils.sh
+. .evergreen/utils.sh
 
 CONN_STRING=$(fetch_local_atlas_uri)
 ```
 
 Stores the local Atlas URI within the `CONN_STRING` var. The script can then pass `CONN_STRING` as an environment variable to the test suite.
 
+#### Running tests locally.
+
+We can run the tests with a local checkout of the repo.
+
+For example, to run the `docarray` tests using local atlas:
+
+```bash
+export DIR=docarray
+bash .evergreen/fetch-repo.sh
+bash .evergreen/provision-atlas.sh
+bash .evergreen/execute-tests.sh
+```
+
+Use `.evergreen/setup-remote.sh` instead of `.evergreen/provision-atlas.sh` to test against the remote cluster.
+
 #### Pre-populating the Local Atlas Deployment
 
 You can pre-populate a test's local Atlas deployment before running the `run.sh` script by providing JSON files in the optional `database` directory of the created subdirectory. The `.evergreen/scaffold_atlas.py` file will search for every JSON file within this database directory and upload the documents to the database provided by the `DATABASE` expansion provided in the build variant of the `.evergreen/config.yml` setup. The collection the script uploads to is based on the name of your JSON file:
@@ -82,9 +104,6 @@ Test execution flow is defined in `.evergreen/config.yml`. The test pipeline's c
 - [`expansions`](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Configuration-Files/#expansions) -- Build variant specific variables. Expansions that need to be maintained as secrets should be stored in [the Evergreen project settings](https://spruce.mongodb.com/project/ai-ml-pipeline-testing/settings/variables) using [variables](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-and-Distro-Settings#variables). Some common expansions needed are:
 
   - `DIR` -- The subdirectory where the tasks will run
-  - `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned
-  - `CLONE_URL` -- The Github URL to clone into the specified `DIR`
-  - `DATABASE` -- The optional database where the Atlas CLI will load your index configs
 
 - `run_on` -- Specified platform to run on. `rhel87-small` should be used by default. Any other distro may fail Atlas CLI setup.
 - `tasks` -- Tasks to run. See below for more details

diff --git a/chatgpt-retrieval-plugin/config.env b/chatgpt-retrieval-plugin/config.env
@@ -0,0 +1,3 @@
+REPO_NAME=chatgpt-retrieval-plugin
+CLONE_URL="https://github.com/openai/chatgpt-retrieval-plugin.git"
+DATABASE=chatgpt_retrieval_plugin_test_db
diff --git a/chatgpt-retrieval-plugin/run.sh b/chatgpt-retrieval-plugin/run.sh
@@ -1,15 +1,15 @@
-#!/bin/sh
+#!/bin/bash
 
 # chat-gpt-retrieval-plugin is a poetry run project
 
 set -eu
 
 # Get the MONGODB_URI and OPENAI_API_KEY.
-# shellcheck disable=SC2154
-. $workdir/src/env.sh
+SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+. $ROOT_DIR/env.sh
 
-# shellcheck disable=SC2154
-. $workdir/src/.evergreen/utils.sh
+. $ROOT_DIR/.evergreen/utils.sh
 
 PYTHON_BINARY=$(find_python3)
 $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')"

diff --git a/docarray/config.env b/docarray/config.env
@@ -0,0 +1,3 @@
+REPO_NAME=docarray
+CLONE_URL="https://github.com/docarray/docarray.git"
+DATABASE=docarray_test_db
diff --git a/docarray/run.sh b/docarray/run.sh
@@ -1,16 +1,18 @@
-#!/bin/sh
+#!/bin/bash
 
 #  Sets up a virtual environment (poetry)
 #  Runs the mongodb tests of the upstream repo
 
 set -eu
 
 # Get the MONGODB_URI.
-# shellcheck disable=SC2154
-. $workdir/src/env.sh
+SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+
+. $ROOT_DIR/env.sh
+
+. $ROOT_DIR/.evergreen/utils.sh
 
-# shellcheck disable=SC2154
-. $workdir/src/.evergreen/utils.sh
 PYTHON_BINARY=$(find_python3)
 $PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')"
 

diff --git a/langchain-python/config.env b/langchain-python/config.env
@@ -0,0 +1,3 @@
+REPO_NAME=langchain-mongodb
+CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git"
+DATABASE=langchain_test_db