From 53f3f0a899578603f59d3ef3ffd195fe22221c99 Mon Sep 17 00:00:00 2001 From: Curtis Kapsak Date: Tue, 5 Dec 2023 17:10:21 -0500 Subject: [PATCH] disable call caching for various workflows (#251) * disabled call caching for basespace_fetch task, transfer_column_content task, and ncbi_datasets task. enabled multi-threading for transfer_column_content task also * disable call caching for theiavalidate task * disable call caching for export_taxon_tables task * update CI * update CI again * disabled call caching for compare_two_tsvs task for theiavalidate * add more call caching to off * revert a volatile true for one that doesn't need it * final md5sum update * fixing a bad copy-paste --------- Co-authored-by: Sage Wright --- .../utilities/submission/task_mercury_file_wrangling.wdl | 4 ++++ tasks/utilities/submission/task_submission.wdl | 4 ++++ tasks/utilities/task_basespace_cli.wdl | 4 ++++ tasks/utilities/task_broad_terra_tools.wdl | 4 ++++ tasks/utilities/task_czgenepi_wrangling.wdl | 4 ++++ tasks/utilities/task_download_terra_table.wdl | 5 ++++- tasks/utilities/task_file_handling.wdl | 6 +++++- tasks/utilities/task_ncbi_datasets.wdl | 4 ++++ tasks/utilities/task_summarize_data.wdl | 4 ++++ tasks/utilities/task_validate.wdl | 8 ++++++++ .../workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- .../workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 12 files changed, 47 insertions(+), 4 deletions(-) diff --git a/tasks/utilities/submission/task_mercury_file_wrangling.wdl b/tasks/utilities/submission/task_mercury_file_wrangling.wdl index 44f4d2e93..2ca566415 100644 --- a/tasks/utilities/submission/task_mercury_file_wrangling.wdl +++ b/tasks/utilities/submission/task_mercury_file_wrangling.wdl @@ -19,6 +19,10 @@ task sm_metadata_wrangling { # the sm stands for supermassive Boolean usa_territory = false # only for SC2; uses territory name (in state column) for country in GISAID submissions Int disk_size = 100 } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< # when running on terra, comment out all input_table mentions python3 /scripts/export_large_tsv/export_large_tsv.py --project "~{project_name}" --workspace "~{workspace_name}" --entity_type ~{table_name} --tsv_filename ~{table_name}-data.tsv diff --git a/tasks/utilities/submission/task_submission.wdl b/tasks/utilities/submission/task_submission.wdl index cd01685a5..86131a7f6 100644 --- a/tasks/utilities/submission/task_submission.wdl +++ b/tasks/utilities/submission/task_submission.wdl @@ -14,6 +14,10 @@ task prune_table { String read1_column_name = "read1" String read2_column_name = "read2" } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< # when running on terra, comment out all input_table mentions python3 /scripts/export_large_tsv/export_large_tsv.py --project "~{project_name}" --workspace "~{workspace_name}" --entity_type ~{table_name} --tsv_filename ~{table_name}-data.tsv diff --git a/tasks/utilities/task_basespace_cli.wdl b/tasks/utilities/task_basespace_cli.wdl index 452754bc1..c13371e3c 100644 --- a/tasks/utilities/task_basespace_cli.wdl +++ b/tasks/utilities/task_basespace_cli.wdl @@ -16,6 +16,10 @@ task fetch_bs { String docker = "us-docker.pkg.dev/general-theiagen/theiagen/basespace_cli:1.2.1" } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< # set basespace name and id variables if [[ ! -z "~{basespace_sample_id}" ]]; then diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 15bc7ff94..f65a91d24 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -341,6 +341,10 @@ task export_taxon_tables { String? srst2_vibrio_serogroup String? srst2_vibrio_biotype } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< # capture taxon and corresponding table names from input taxon_tables diff --git a/tasks/utilities/task_czgenepi_wrangling.wdl b/tasks/utilities/task_czgenepi_wrangling.wdl index 97081458a..55e5eed6c 100644 --- a/tasks/utilities/task_czgenepi_wrangling.wdl +++ b/tasks/utilities/task_czgenepi_wrangling.wdl @@ -25,6 +25,10 @@ task czgenepi_wrangling { # runtime Int disk_size = 100 + } + meta { + # added so that call caching is always turned off + volatile: true } command <<< # parse terra table for data diff --git a/tasks/utilities/task_download_terra_table.wdl b/tasks/utilities/task_download_terra_table.wdl index 79eb7e438..9a6c54e2b 100644 --- a/tasks/utilities/task_download_terra_table.wdl +++ b/tasks/utilities/task_download_terra_table.wdl @@ -2,7 +2,10 @@ version 1.0 task download_terra_table { meta { - description: "This task downloads a Terra table and reduces it to only include the samples of interest." + description: "This task downloads a Terra table and reduces it to only include the samples of interest." + + # added so that call caching is always turned off + volatile: true } input { String terra_table_name diff --git a/tasks/utilities/task_file_handling.wdl b/tasks/utilities/task_file_handling.wdl index b332af356..d874dbe44 100644 --- a/tasks/utilities/task_file_handling.wdl +++ b/tasks/utilities/task_file_handling.wdl @@ -83,13 +83,17 @@ task transfer_files { Int mem_size_gb = 8 String docker_image = "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.1" } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< file_path_array="~{sep=' ' files_to_transfer}" gsutil -m cp -n ${file_path_array[@]} ~{target_bucket} echo "transferred_files" > transferred_files.tsv - gsutil ls ~{target_bucket} >> transferred_files.tsv + gsutil -m ls ~{target_bucket} >> transferred_files.tsv >>> output { File transferred_files = "transferred_files.tsv" diff --git a/tasks/utilities/task_ncbi_datasets.wdl b/tasks/utilities/task_ncbi_datasets.wdl index 20d4328b6..f9ae8f1b2 100644 --- a/tasks/utilities/task_ncbi_datasets.wdl +++ b/tasks/utilities/task_ncbi_datasets.wdl @@ -10,6 +10,10 @@ task ncbi_datasets_download_genome_accession { Boolean include_gbff = false Boolean include_gff3 = false } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< date | tee DATE datasets --version | sed 's|datasets version: ||' | tee DATASETS_VERSION diff --git a/tasks/utilities/task_summarize_data.wdl b/tasks/utilities/task_summarize_data.wdl index 917b5a00e..24e7f85fa 100644 --- a/tasks/utilities/task_summarize_data.wdl +++ b/tasks/utilities/task_summarize_data.wdl @@ -15,6 +15,10 @@ task summarize_data { #File? input_table Boolean phandango_coloring = true } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< # when running on terra, comment out all input_table mentions python3 /scripts/export_large_tsv/export_large_tsv.py --project "~{terra_project}" --workspace "~{terra_workspace}" --entity_type ~{terra_table} --tsv_filename ~{terra_table}-data.tsv diff --git a/tasks/utilities/task_validate.wdl b/tasks/utilities/task_validate.wdl index bfa8fa8d7..1867ce29c 100644 --- a/tasks/utilities/task_validate.wdl +++ b/tasks/utilities/task_validate.wdl @@ -10,6 +10,10 @@ task export_two_tsvs { String datatable2 Int disk_size = 10 } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< python3 /scripts/export_large_tsv/export_large_tsv.py --project ~{terra_project1} --workspace ~{terra_workspace1} --entity_type ~{datatable1} --tsv_filename "~{datatable1}.tsv" @@ -62,6 +66,10 @@ task compare_two_tsvs { Int disk_size = 10 } + meta { + # added so that call caching is always turned off + volatile: true + } command <<< # too lazy to create a new docker image, this is not good practice pip install pretty_html_table diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 8e74ad94a..78a7f6a6b 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -632,7 +632,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: faacd87946ee3fbdf70f3a15b79ce547 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 5dc54b8446b6a430fc7375ae364908f0 + md5sum: 4106837e51f6445e02776e0a74606ed5 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl md5sum: 3acf4dcddbb44d547b69f597761cc048 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 60742d42c..fd86922e0 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -598,7 +598,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: faacd87946ee3fbdf70f3a15b79ce547 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 5dc54b8446b6a430fc7375ae364908f0 + md5sum: 4106837e51f6445e02776e0a74606ed5 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 3e19938fc8a624c7948b57867865561a - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl