Skip to content

Commit

Permalink
add retry constraints to prediction job
Browse files Browse the repository at this point in the history
  • Loading branch information
Tooyosi committed Sep 11, 2024
1 parent 01c5b05 commit a2ec5fa
Showing 1 changed file with 1 addition and 0 deletions.
1 change: 1 addition & 0 deletions bajor/batch/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def create_batch_job(job_id, manifest_url, pool_id):
copy_code_to_shared_dir = 'cp -Rf $AZ_BATCH_NODE_MOUNTS_DIR/$PREDICTIONS_CONTAINER_MOUNT_DIR/$CODE_DIR_PATH/* $AZ_BATCH_NODE_SHARED_DIR/'
job.job_preparation_task = batchmodels.JobPreparationTask(
command_line=f'/bin/bash -c \"set -ex; {create_results_dir}; {copy_code_to_shared_dir}\"',
constraints=batchmodels.TaskConstraints(max_task_retry_count=3),
#
# A busted preparation task means the main task won't launch...ever!
# and leave the node in a scaled state costing $$ ££
Expand Down

0 comments on commit a2ec5fa

Please sign in to comment.