diff --git a/bajor/batch/predictions.py b/bajor/batch/predictions.py index 5bc878c..95f4462 100644 --- a/bajor/batch/predictions.py +++ b/bajor/batch/predictions.py @@ -84,6 +84,7 @@ def create_batch_job(job_id, manifest_url, pool_id): copy_code_to_shared_dir = 'cp -Rf $AZ_BATCH_NODE_MOUNTS_DIR/$PREDICTIONS_CONTAINER_MOUNT_DIR/$CODE_DIR_PATH/* $AZ_BATCH_NODE_SHARED_DIR/' job.job_preparation_task = batchmodels.JobPreparationTask( command_line=f'/bin/bash -c \"set -ex; {create_results_dir}; {copy_code_to_shared_dir}\"', + constraints=batchmodels.TaskConstraints(max_task_retry_count=3), # # A busted preparation task means the main task won't launch...ever! # and leave the node in a scaled state costing $$ ££