-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
INTPYTHON-309 & INTPYTHON-417 Use new cluster and schedule on interval #50
Changes from all commits
68cd5c6
5b17bb3
111ced2
ed04fc4
ea81019
2443a55
f260636
749b416
3bda392
3434017
af656e3
9a2093c
f50115f
2b69ba3
7ae343e
06a91d1
19da278
0cd1e0c
c4dcdd3
efba00e
ab11405
ca14f16
c356168
ff80a59
11eef1c
688f5ca
8e2e257
04bef34
77969cf
e9ada75
019be06
ee2fc6c
94a7700
1034e68
4eb03ba
42dcbd2
09f265f
eaf685f
b75ffbe
29b2493
b88b862
fa9ccb0
4c7549e
2ac1f31
ea5ffe5
d08aafe
0e97128
1618a15
5809946
83af185
8757c48
04926fb
c650983
cf3c154
aa418b6
620845f
c4ad06e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
|
||
set -eu | ||
|
||
# Clone drivers-evergeen-tools. | ||
git clone https://github.com/mongodb-labs/drivers-evergreen-tools | ||
|
||
# Get the secrets for drivers/ai-ml-pipeline-testing. | ||
. drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,14 @@ | ||
#!/bin/bash | ||
set -eu | ||
|
||
. .evergreen/utils.sh | ||
|
||
PYTHON_BINARY=$(find_python3) | ||
|
||
# Should be called from src | ||
EVERGREEN_PATH=$(pwd)/.evergreen | ||
TARGET_DIR=$(pwd)/$DIR | ||
SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py | ||
|
||
set -ex | ||
mkdir atlas | ||
|
||
setup_local_atlas | ||
scaffold_atlas | ||
|
||
cd atlas | ||
|
||
$PYTHON_BINARY -m venv . | ||
source ./bin/activate | ||
|
||
# Test server is up | ||
$PYTHON_BINARY -m pip install pymongo | ||
CONN_STRING=$CONN_STRING \ | ||
$PYTHON_BINARY -c "from pymongo import MongoClient; import os; MongoClient(os.environ['CONN_STRING']).db.command('ping')" | ||
# Get the secrets. | ||
source secrets-export.sh | ||
|
||
# Add database and index configurations | ||
DATABASE=$DATABASE \ | ||
CONN_STRING=$CONN_STRING \ | ||
REPO_NAME=$REPO_NAME \ | ||
DIR=$DIR \ | ||
TARGET_DIR=$TARGET_DIR \ | ||
$PYTHON_BINARY $SCAFFOLD_SCRIPT | ||
# Create the env file | ||
echo "export OPENAI_API_KEY=$OPENAI_API_KEY" >> env.sh | ||
echo "export MONGODB_URI=$CONN_STRING" >> env.sh |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,8 @@ | |
import logging | ||
import os | ||
from pathlib import Path | ||
from typing import Any, Union | ||
from time import sleep, monotonic | ||
from typing import Any, Callable, Union | ||
|
||
from pymongo import MongoClient | ||
from pymongo.database import Database | ||
|
@@ -13,7 +14,7 @@ | |
|
||
logging.basicConfig() | ||
logger = logging.getLogger(__file__) | ||
logger.setLevel(logging.DEBUG if os.environ.get("DEBUG") else logging.INFO) | ||
logger.setLevel(logging.DEBUG) | ||
|
||
DATABASE_NAME = os.environ.get("DATABASE") | ||
CONN_STRING = os.environ.get("CONN_STRING") | ||
|
@@ -41,12 +42,17 @@ def upload_data(db: Database, filename: Path) -> None: | |
db.name, | ||
collection_name, | ||
) | ||
collections = [c["name"] for c in db.list_collections()] | ||
if collection_name in collections: | ||
logger.debug("Clearing existing collection", collection_name) | ||
db[collection_name].delete_many({}) | ||
Comment on lines
+46
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT: We could also drop the collection entirely. This would remove all existing index definitions on it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried that but it seemed to cause race conditions. |
||
|
||
if not isinstance(loaded_collection, list): | ||
loaded_collection = [loaded_collection] | ||
if loaded_collection: | ||
result: InsertManyResult = db[collection_name].insert_many(loaded_collection) | ||
logger.debug("Uploaded results for %s: %s", filename.name, result.inserted_ids) | ||
else: | ||
elif collection_name not in collections: | ||
logger.debug("Empty collection named %s created", collection_name) | ||
db.create_collection(collection_name) | ||
|
||
|
@@ -66,12 +72,87 @@ def create_index(client: MongoClient, filename: Path) -> None: | |
index_name = loaded_index_configuration.pop("name") | ||
index_type = loaded_index_configuration.pop("type", None) | ||
|
||
logger.debug( | ||
"creating search index: %s on %s.%s...", | ||
index_name, | ||
database_name, | ||
collection_name, | ||
) | ||
Comment on lines
+75
to
+80
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 |
||
|
||
collection = client[database_name][collection_name] | ||
|
||
search_index = SearchIndexModel( | ||
loaded_index_configuration, name=index_name, type=index_type | ||
) | ||
collection.create_search_index(search_index) | ||
indexes = [index["name"] for index in collection.list_search_indexes()] | ||
if index_name not in indexes: | ||
collection.create_search_index(search_index) | ||
|
||
else: | ||
logger.debug( | ||
"search index already exists, updating: %s on %s.%s", | ||
index_name, | ||
database_name, | ||
collection_name, | ||
) | ||
collection.update_search_index(index_name, loaded_index_configuration) | ||
|
||
logger.debug("waiting for search index to be ready...") | ||
wait_until_complete = 120 | ||
_wait_for_predicate( | ||
predicate=lambda: _is_index_ready(collection, index_name), | ||
err=f"Index {index_name} update did not complete in {wait_until_complete}!", | ||
timeout=wait_until_complete, | ||
) | ||
logger.debug("waiting for search index to be ready... done.") | ||
|
||
logger.debug( | ||
"creating search index: %s on %s.%s... done", | ||
index_name, | ||
database_name, | ||
collection_name, | ||
) | ||
|
||
|
||
def _is_index_ready(collection: Any, index_name: str) -> bool: | ||
"""Check for the index name in the list of available search indexes. | ||
|
||
This confirms that the specified index is of status READY. | ||
|
||
Args: | ||
collection (Collection): MongoDB Collection to for the search indexes | ||
index_name (str): Vector Search Index name | ||
|
||
Returns: | ||
bool : True if the index is present and READY false otherwise | ||
""" | ||
search_indexes = collection.list_search_indexes(index_name) | ||
|
||
for index in search_indexes: | ||
if index["status"] == "READY": | ||
return True | ||
return False | ||
|
||
|
||
def _wait_for_predicate( | ||
predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5 | ||
) -> None: | ||
"""Generic to block until the predicate returns true. | ||
|
||
Args: | ||
predicate (Callable[, bool]): A function that returns a boolean value | ||
err (str): Error message to raise if nothing occurs | ||
timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT. | ||
interval (float, optional): Interval to check predicate. Defaults to DELAY. | ||
|
||
Raises: | ||
TimeoutError: _description_ | ||
""" | ||
start = monotonic() | ||
while not predicate(): | ||
if monotonic() - start > timeout: | ||
raise TimeoutError(err) | ||
sleep(interval) | ||
|
||
|
||
def walk_directory(filepath) -> list[str]: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to add tags or can we do regex matching in evergreen to make sure these aren't run on pull requests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done