mongodb-labs · blink1073 · Dec 4, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/.evergreen/config.yml b/.evergreen/config.yml
@@ -28,6 +28,14 @@ functions:
       params:
         directory: "src"
 
+  "fetch secrets":
+    - command: subprocess.exec
+      type: setup
+      params:
+        working_dir: "src"
+        binary: bash
+        args: [.evergreen/fetch-secrets.sh]
+
   "fetch repo":
     - command: shell.exec
       type: setup
@@ -54,58 +62,105 @@ functions:
         add_expansions_to_env: true
         working_dir: "src/${DIR}/${REPO_NAME}"
         binary: bash
-        env:
-          atlas: ${workdir}/src/atlas/bin/atlas
         args:
           - ../run.sh
 
-  "setup atlas cli":
+  "setup local atlas":
     - command: subprocess.exec
       type: setup
       retry_on_failure: true
       params:
         add_expansions_to_env: true
         working_dir: "src"
         binary: bash
-        env:
-          atlas: ${workdir}/src/atlas/bin/atlas
         args:
           - .evergreen/provision-atlas.sh
 
+  "setup remote atlas":
+    - command: subprocess.exec
+      type: setup
+      params:
+        add_expansions_to_env: true
+        working_dir: "src"
+        binary: bash
+        args: [.evergreen/setup-remote.sh]
+
 pre_error_fails_task: true
 pre:
   - func: "fetch source"
-  - func: "setup atlas cli"
+  - func: "fetch secrets"
 
 tasks:
-  - name: test-semantic-kernel-python
+  - name: test-semantic-kernel-python-local
+    commands:
+      - func: "fetch repo"
+      - func: "setup local atlas"
+      - func: "execute tests"
+
+  - name: test-semantic-kernel-python-remote
+    commands:
+      - func: "fetch repo"
+      - func: "setup remote atlas"
+      - func: "execute tests"
+
+  - name: test-semantic-kernel-csharp-local
+    commands:
+      - func: "fetch repo"
+      - func: "setup local atlas"
+      - func: "execute tests"
+
+  - name: test-semantic-kernel-csharp-remote
+    commands:
+      - func: "fetch repo"
+      - func: "setup remote atlas"
+      - func: "execute tests"
+
+  - name: test-langchain-python-local
     commands:
       - func: "fetch repo"
+      - func: "setup local atlas"
       - func: "execute tests"
 
-  - name: test-semantic-kernel-csharp
+  - name: test-langchain-python-remote
     commands:
       - func: "fetch repo"
+      - func: "setup remote atlas"
       - func: "execute tests"
 
-  - name: test-langchain-python
+  - name: test-chatgpt-retrieval-plugin-local
     commands:
       - func: "fetch repo"
+      - func: "setup local atlas"
       - func: "execute tests"
 
-  - name: test-chatgpt-retrieval-plugin
+  - name: test-chatgpt-retrieval-plugin-remote
     commands:
       - func: "fetch repo"
+      - func: "setup remote atlas"
       - func: "execute tests"
 
-  - name: test-llama-index
+  - name: test-llama-index-local
     commands:
       - func: "fetch repo"
+      - func: "setup local atlas"
       - func: "execute tests"
 
-  - name: test-docarray
+  - name: test-llama-index-remote
     commands:
       - func: "fetch repo"
+      - func: "setup remote atlas"
+      - func: "execute tests"
+
+  - name: test-docarray-local
+    commands:
+      - func: "fetch repo"
+      - func: "setup local atlas"
+      - func: "execute tests"
+
+  - name: test-docarray-remote
+    commands:
+      - func: "fetch repo"
+      - func: "setup remote atlas"
       - func: "execute tests"
 
 buildvariants:
@@ -121,7 +176,10 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-llama-index
+      - name: test-llama-index-local
+      - name: test-llama-index-remote
+        batchtime: 10080  # 1 week
+
   - name: test-semantic-kernel-python-rhel
     display_name: Semantic-Kernel RHEL Python
     expansions:
@@ -132,7 +190,10 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-semantic-kernel-python
+      - name: test-semantic-kernel-python-local
+      # TODO: INTPYTHON-430
+      # - name: test-semantic-kernel-python-remote
+      #   batchtime: 10080  # 1 week
 
   - name: test-semantic-kernel-csharp-rhel
     display_name: Semantic-Kernel RHEL CSharp
@@ -144,7 +205,9 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-semantic-kernel-csharp
+      - name: test-semantic-kernel-csharp-local
+      - name: test-semantic-kernel-csharp-remote
+        batchtime: 10080  # 1 week
 
   - name: test-langchain-python-rhel
     display_name: Langchain RHEL Python
@@ -156,7 +219,9 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-langchain-python
+      - name: test-langchain-python-local
+      - name: test-langchain-python-remote
+        batchtime: 10080  # 1 week
 
   - name: test-chatgpt-retrieval-plugin-rhel
     display_name: ChatGPT Retrieval Plugin
@@ -168,7 +233,9 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-chatgpt-retrieval-plugin
+      - name: test-chatgpt-retrieval-plugin-local
+      - name: test-chatgpt-retrieval-plugin-remote
+        batchtime: 10080  # 1 week
 
   - name: test-llama-index-vectorstore-rhel
     display_name: LlamaIndex RHEL Vector Store
@@ -180,7 +247,9 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-llama-index
+      - name: test-llama-index-local
+      - name: test-llama-index-remote
+        batchtime: 10080  # 1 week
 
   - name: test-docarray-rhel
     display_name: DocArray RHEL
@@ -192,4 +261,5 @@ buildvariants:
     run_on:
       - rhel87-small
     tasks:
-      - name: test-docarray
+      - name: test-docarray-local
+      - name: test-docarray-remote
diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eu
+
+# Clone drivers-evergeen-tools.
+git clone https://github.com/mongodb-labs/drivers-evergreen-tools
+
+# Get the secrets for drivers/ai-ml-pipeline-testing.
+. drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing
diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -eu
 
 . .evergreen/utils.sh
 
@@ -9,15 +10,15 @@ EVERGREEN_PATH=$(pwd)/.evergreen
 TARGET_DIR=$(pwd)/$DIR
 SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py
 
-set -ex
 mkdir atlas
 
 setup_local_atlas
 
-cd atlas
+pushd atlas
 
 $PYTHON_BINARY -m venv .
 source ./bin/activate
+popd
 
 # Test server is up
 $PYTHON_BINARY -m pip install pymongo
@@ -29,5 +30,13 @@ DATABASE=$DATABASE \
     CONN_STRING=$CONN_STRING \
     REPO_NAME=$REPO_NAME \
     DIR=$DIR \
+    DEBUG="${DEBUG:-1}" \
     TARGET_DIR=$TARGET_DIR \
     $PYTHON_BINARY $SCAFFOLD_SCRIPT
+
+# Get the secrets.
+source secrets-export.sh
+
+# Create the env file
+echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh
+echo "export MONGODB_URI=$CONN_STRING" >> env.sh
diff --git a/.evergreen/scaffold_atlas.py b/.evergreen/scaffold_atlas.py
@@ -4,7 +4,8 @@
 import logging
 import os
 from pathlib import Path
-from typing import Any, Union
+from time import sleep, monotonic
+from typing import Any, Callable, Union
 
 from pymongo import MongoClient
 from pymongo.database import Database
@@ -13,7 +14,7 @@
 
 logging.basicConfig()
 logger = logging.getLogger(__file__)
-logger.setLevel(logging.DEBUG if os.environ.get("DEBUG") else logging.INFO)
+logger.setLevel(logging.DEBUG)
 
 DATABASE_NAME = os.environ.get("DATABASE")
 CONN_STRING = os.environ.get("CONN_STRING")
@@ -41,12 +42,17 @@ def upload_data(db: Database, filename: Path) -> None:
         db.name,
         collection_name,
     )
+    collections = [c["name"] for c in db.list_collections()]
+    if collection_name in collections:
+        logger.debug("Clearing existing collection", collection_name)
+        db[collection_name].delete_many({})
+
     if not isinstance(loaded_collection, list):
         loaded_collection = [loaded_collection]
     if loaded_collection:
         result: InsertManyResult = db[collection_name].insert_many(loaded_collection)
         logger.debug("Uploaded results for %s: %s", filename.name, result.inserted_ids)
-    else:
+    elif collection_name not in collections:
         logger.debug("Empty collection named %s created", collection_name)
         db.create_collection(collection_name)
 
@@ -66,12 +72,87 @@ def create_index(client: MongoClient, filename: Path) -> None:
     index_name = loaded_index_configuration.pop("name")
     index_type = loaded_index_configuration.pop("type", None)
 
+    logger.debug(
+        "creating search index: %s on %s.%s...",
+        index_name,
+        database_name,
+        collection_name,
+    )
+
     collection = client[database_name][collection_name]
 
     search_index = SearchIndexModel(
         loaded_index_configuration, name=index_name, type=index_type
     )
-    collection.create_search_index(search_index)
+    indexes = [index["name"] for index in collection.list_search_indexes()]
+    if index_name not in indexes:
+        collection.create_search_index(search_index)
+
+    else:
+        logger.debug(
+            "search index already exists, updating: %s on %s.%s",
+            index_name,
+            database_name,
+            collection_name,
+        )
+        collection.update_search_index(index_name, loaded_index_configuration)
+
+    logger.debug("waiting for search index to be ready...")
+    wait_until_complete = 120
+    _wait_for_predicate(
+        predicate=lambda: _is_index_ready(collection, index_name),
+        err=f"Index {index_name} update did not complete in {wait_until_complete}!",
+        timeout=wait_until_complete,
+    )
+    logger.debug("waiting for search index to be ready... done.")
+
+    logger.debug(
+        "creating search index: %s on %s.%s... done",
+        index_name,
+        database_name,
+        collection_name,
+    )
+
+
+def _is_index_ready(collection: Any, index_name: str) -> bool:
+    """Check for the index name in the list of available search indexes.
+
+     This confirms that the specified index is of status READY.
+
+    Args:
+        collection (Collection): MongoDB Collection to for the search indexes
+        index_name (str): Vector Search Index name
+
+    Returns:
+        bool : True if the index is present and READY false otherwise
+    """
+    search_indexes = collection.list_search_indexes(index_name)
+
+    for index in search_indexes:
+        if index["status"] == "READY":
+            return True
+    return False
+
+
+def _wait_for_predicate(
+    predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5
+) -> None:
+    """Generic to block until the predicate returns true.
+
+    Args:
+        predicate (Callable[, bool]): A function that returns a boolean value
+        err (str): Error message to raise if nothing occurs
+        timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT.
+        interval (float, optional): Interval to check predicate. Defaults to DELAY.
+
+    Raises:
+        TimeoutError: _description_
+    """
+    start = monotonic()
+    while not predicate():
+        if monotonic() - start > timeout:
+            raise TimeoutError(err)
+        sleep(interval)
 
 
 def walk_directory(filepath) -> list[str]: