Skip to content

Commit

Permalink
Merge pull request #50 from blink1073/INTPYTHON-309
Browse files Browse the repository at this point in the history
INTPYTHON-309 & INTPYTHON-417 Use new cluster and schedule on interval
  • Loading branch information
blink1073 authored Dec 4, 2024
2 parents 57d9909 + c4ad06e commit e94c697
Show file tree
Hide file tree
Showing 23 changed files with 412 additions and 124 deletions.
120 changes: 101 additions & 19 deletions .evergreen/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ functions:
params:
directory: "src"

"fetch secrets":
- command: subprocess.exec
type: setup
params:
working_dir: "src"
binary: bash
args: [.evergreen/fetch-secrets.sh]

"fetch repo":
- command: shell.exec
type: setup
Expand All @@ -54,58 +62,116 @@ functions:
add_expansions_to_env: true
working_dir: "src/${DIR}/${REPO_NAME}"
binary: bash
env:
atlas: ${workdir}/src/atlas/bin/atlas
args:
- ../run.sh

"setup atlas cli":
"setup local atlas":
- command: subprocess.exec
type: setup
retry_on_failure: true
params:
add_expansions_to_env: true
working_dir: "src"
binary: bash
env:
atlas: ${workdir}/src/atlas/bin/atlas
args:
- .evergreen/provision-atlas.sh

"setup remote atlas":
- command: subprocess.exec
type: setup
params:
add_expansions_to_env: true
working_dir: "src"
binary: bash
args: [.evergreen/setup-remote.sh]

pre_error_fails_task: true
pre:
- func: "fetch source"
- func: "setup atlas cli"
- func: "fetch secrets"

tasks:
- name: test-semantic-kernel-python
- name: test-semantic-kernel-python-local
tags: [local]
commands:
- func: "fetch repo"
- func: "setup local atlas"
- func: "execute tests"

- name: test-semantic-kernel-python-remote
tags: [remote]
commands:
- func: "fetch repo"
- func: "setup remote atlas"
- func: "execute tests"

- name: test-semantic-kernel-csharp-local
tags: [local]
commands:
- func: "fetch repo"
- func: "setup local atlas"
- func: "execute tests"

- name: test-semantic-kernel-csharp-remote
tags: [remote]
commands:
- func: "fetch repo"
- func: "setup remote atlas"
- func: "execute tests"

- name: test-langchain-python-local
tags: [local]
commands:
- func: "fetch repo"
- func: "setup local atlas"
- func: "execute tests"

- name: test-semantic-kernel-csharp
- name: test-langchain-python-remote
tags: [remote]
commands:
- func: "fetch repo"
- func: "setup remote atlas"
- func: "execute tests"

- name: test-langchain-python
- name: test-chatgpt-retrieval-plugin-local
tags: [local]
commands:
- func: "fetch repo"
- func: "setup local atlas"
- func: "execute tests"

- name: test-chatgpt-retrieval-plugin
- name: test-chatgpt-retrieval-plugin-remote
tags: [remote]
commands:
- func: "fetch repo"
- func: "setup remote atlas"
- func: "execute tests"

- name: test-llama-index
- name: test-llama-index-local
tags: [local]
commands:
- func: "fetch repo"
- func: "setup local atlas"
- func: "execute tests"

- name: test-docarray
- name: test-llama-index-remote
commands:
- func: "fetch repo"
- func: "setup remote atlas"
- func: "execute tests"

- name: test-docarray-local
tags: [local]
commands:
- func: "fetch repo"
- func: "setup local atlas"
- func: "execute tests"

- name: test-docarray-remote
tags: [remote]
commands:
- func: "fetch repo"
- func: "setup remote atlas"
- func: "execute tests"

buildvariants:
Expand All @@ -121,7 +187,10 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-llama-index
- name: test-llama-index-local
- name: test-llama-index-remote
batchtime: 10080 # 1 week

- name: test-semantic-kernel-python-rhel
display_name: Semantic-Kernel RHEL Python
expansions:
Expand All @@ -132,7 +201,10 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-semantic-kernel-python
- name: test-semantic-kernel-python-local
# TODO: INTPYTHON-430
# - name: test-semantic-kernel-python-remote
# batchtime: 10080 # 1 week

- name: test-semantic-kernel-csharp-rhel
display_name: Semantic-Kernel RHEL CSharp
Expand All @@ -144,7 +216,9 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-semantic-kernel-csharp
- name: test-semantic-kernel-csharp-local
- name: test-semantic-kernel-csharp-remote
batchtime: 10080 # 1 week

- name: test-langchain-python-rhel
display_name: Langchain RHEL Python
Expand All @@ -156,7 +230,9 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-langchain-python
- name: test-langchain-python-local
- name: test-langchain-python-remote
batchtime: 10080 # 1 week

- name: test-chatgpt-retrieval-plugin-rhel
display_name: ChatGPT Retrieval Plugin
Expand All @@ -168,7 +244,9 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-chatgpt-retrieval-plugin
- name: test-chatgpt-retrieval-plugin-local
- name: test-chatgpt-retrieval-plugin-remote
batchtime: 10080 # 1 week

- name: test-llama-index-vectorstore-rhel
display_name: LlamaIndex RHEL Vector Store
Expand All @@ -180,7 +258,10 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-llama-index
- name: test-llama-index-local
# TODO: INTPYTHON-440
# - name: test-llama-index-remote
# batchtime: 10080 # 1 week

- name: test-docarray-rhel
display_name: DocArray RHEL
Expand All @@ -192,4 +273,5 @@ buildvariants:
run_on:
- rhel87-small
tasks:
- name: test-docarray
- name: test-docarray-local
- name: test-docarray-remote
9 changes: 9 additions & 0 deletions .evergreen/fetch-secrets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -eu

# Clone drivers-evergeen-tools.
git clone https://github.com/mongodb-labs/drivers-evergreen-tools

# Get the secrets for drivers/ai-ml-pipeline-testing.
. drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing
33 changes: 7 additions & 26 deletions .evergreen/provision-atlas.sh
Original file line number Diff line number Diff line change
@@ -1,33 +1,14 @@
#!/bin/bash
set -eu

. .evergreen/utils.sh

PYTHON_BINARY=$(find_python3)

# Should be called from src
EVERGREEN_PATH=$(pwd)/.evergreen
TARGET_DIR=$(pwd)/$DIR
SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py

set -ex
mkdir atlas

setup_local_atlas
scaffold_atlas

cd atlas

$PYTHON_BINARY -m venv .
source ./bin/activate

# Test server is up
$PYTHON_BINARY -m pip install pymongo
CONN_STRING=$CONN_STRING \
$PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['CONN_STRING']).db.command('ping')"
# Get the secrets.
source secrets-export.sh

# Add database and index configurations
DATABASE=$DATABASE \
CONN_STRING=$CONN_STRING \
REPO_NAME=$REPO_NAME \
DIR=$DIR \
TARGET_DIR=$TARGET_DIR \
$PYTHON_BINARY $SCAFFOLD_SCRIPT
# Create the env file
echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh
echo "export MONGODB_URI=$CONN_STRING" >> env.sh
89 changes: 85 additions & 4 deletions .evergreen/scaffold_atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import logging
import os
from pathlib import Path
from typing import Any, Union
from time import sleep, monotonic
from typing import Any, Callable, Union

from pymongo import MongoClient
from pymongo.database import Database
Expand All @@ -13,7 +14,7 @@

logging.basicConfig()
logger = logging.getLogger(__file__)
logger.setLevel(logging.DEBUG if os.environ.get("DEBUG") else logging.INFO)
logger.setLevel(logging.DEBUG)

DATABASE_NAME = os.environ.get("DATABASE")
CONN_STRING = os.environ.get("CONN_STRING")
Expand Down Expand Up @@ -41,12 +42,17 @@ def upload_data(db: Database, filename: Path) -> None:
db.name,
collection_name,
)
collections = [c["name"] for c in db.list_collections()]
if collection_name in collections:
logger.debug("Clearing existing collection", collection_name)
db[collection_name].delete_many({})

if not isinstance(loaded_collection, list):
loaded_collection = [loaded_collection]
if loaded_collection:
result: InsertManyResult = db[collection_name].insert_many(loaded_collection)
logger.debug("Uploaded results for %s: %s", filename.name, result.inserted_ids)
else:
elif collection_name not in collections:
logger.debug("Empty collection named %s created", collection_name)
db.create_collection(collection_name)

Expand All @@ -66,12 +72,87 @@ def create_index(client: MongoClient, filename: Path) -> None:
index_name = loaded_index_configuration.pop("name")
index_type = loaded_index_configuration.pop("type", None)

logger.debug(
"creating search index: %s on %s.%s...",
index_name,
database_name,
collection_name,
)

collection = client[database_name][collection_name]

search_index = SearchIndexModel(
loaded_index_configuration, name=index_name, type=index_type
)
collection.create_search_index(search_index)
indexes = [index["name"] for index in collection.list_search_indexes()]
if index_name not in indexes:
collection.create_search_index(search_index)

else:
logger.debug(
"search index already exists, updating: %s on %s.%s",
index_name,
database_name,
collection_name,
)
collection.update_search_index(index_name, loaded_index_configuration)

logger.debug("waiting for search index to be ready...")
wait_until_complete = 120
_wait_for_predicate(
predicate=lambda: _is_index_ready(collection, index_name),
err=f"Index {index_name} update did not complete in {wait_until_complete}!",
timeout=wait_until_complete,
)
logger.debug("waiting for search index to be ready... done.")

logger.debug(
"creating search index: %s on %s.%s... done",
index_name,
database_name,
collection_name,
)


def _is_index_ready(collection: Any, index_name: str) -> bool:
"""Check for the index name in the list of available search indexes.
This confirms that the specified index is of status READY.
Args:
collection (Collection): MongoDB Collection to for the search indexes
index_name (str): Vector Search Index name
Returns:
bool : True if the index is present and READY false otherwise
"""
search_indexes = collection.list_search_indexes(index_name)

for index in search_indexes:
if index["status"] == "READY":
return True
return False


def _wait_for_predicate(
predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5
) -> None:
"""Generic to block until the predicate returns true.
Args:
predicate (Callable[, bool]): A function that returns a boolean value
err (str): Error message to raise if nothing occurs
timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT.
interval (float, optional): Interval to check predicate. Defaults to DELAY.
Raises:
TimeoutError: _description_
"""
start = monotonic()
while not predicate():
if monotonic() - start > timeout:
raise TimeoutError(err)
sleep(interval)


def walk_directory(filepath) -> list[str]:
Expand Down
Loading

0 comments on commit e94c697

Please sign in to comment.