From 2a8562cb3c1031acc8a38fe4116fd521ac30473a Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 26 Sep 2024 14:38:51 -0400 Subject: [PATCH] [TheiaCoV] iVar Consensus Pipefail fix (#629) * addedpipeline fail to avoid silent failure * exposed vm compute parameters for tasks * Update task_ivar_variant_call.wdl Added set -euo pipefail to variants task. * Update task_ivar_variant_call.wdl Switch o and u for consistency * Update wf_ivar_consensus.wdl Added optional inputs for compute variables for stats and coverage for primer trimming * Update wf_ivar_consensus.wdl Corrected disk size naming variable for stats n coverage prim trim * Update test_wf_theiacov_illumina_pe.yml Update md5 sum for variant call and consensus for ivar consensus tests. * Update test_wf_theiacov_illumina_se.yml Update md5 check sum for variant call and consensus * Update wf_ivar_consensus.wdl Remove buffer space * Update theiacov.md Updated docs for ivar consensus optional inputs exposing compute params. * Order variables alphabetically for ivar_consensus --- .../genomic_characterization/theiacov.md | 24 ++++++++ tasks/assembly/task_ivar_consensus.wdl | 4 +- .../task_ivar_variant_call.wdl | 3 +- .../theiacov/test_wf_theiacov_illumina_pe.yml | 6 +- .../theiacov/test_wf_theiacov_illumina_se.yml | 6 +- workflows/utilities/wf_ivar_consensus.wdl | 61 ++++++++++++++++--- 6 files changed, 89 insertions(+), 15 deletions(-) diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index b1859ebed..24881de45 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -191,8 +191,32 @@ All TheiaCoV Workflows (not TheiaCoV_FASTA_Batch) | gene_coverage | **min_depth** | Int | The minimum depth to determine if a position was covered. | 10 | Optional | ONT, PE, SE | MPXV, sars-cov-2 | | gene_coverage | **sc2_s_gene_start** | Int | start nucleotide position of the SARS-CoV-2 Spike gene | 21563 | Optional | CL, ONT, PE, SE | MPXV, sars-cov-2 | | gene_coverage | **sc2_s_gene_stop** | Int | End/Last nucleotide position of the SARS-CoV-2 Spike gene | 25384 | Optional | CL, ONT, PE, SE | MPXV, sars-cov-2 | +| ivar_consensus | **ivar_bwa_cpu** | Int | Number of CPUs to allocate to the task | 6 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_bwa_disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_bwa_docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/ivar:1.3.1-titan | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_bwa_memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 16 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_consensus_cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_consensus_disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_consensus_docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/ivar:1.3.1-titan | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_consensus_memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 8 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_trim_primers_cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_trim_primers_disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_trim_primers_docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/ivar:1.3.1-titan | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_trim_primers_memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 8 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_variant_cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_variant_disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_variant_docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/ivar:1.3.1-titan | | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **ivar_variant_memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 8 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | | ivar_consensus | **read2** | File | Internal component, do not modify | | Do not modify, Optional | SE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | | ivar_consensus | **skip_N** | Boolean | True/False variable that determines if regions with depth less than minimum depth should not be added to the consensus sequence | FALSE | Optional | PE, SE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/samtools:1.15 | | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 8 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_primtrim_cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_primtrim_disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_primtrim_docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/samtools:1.15 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | +| ivar_consensus | **stats_n_coverage_primtrim_memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 8 | Optional | SE,PE | HIV, MPXV, WNV, rsv_a, rsv_b, sars-cov-2 | | kraken2_dehosted | **cpu** | Int | Number of CPUs to allocate to the task | 4 | Optional | CL | sars-cov-2 | | kraken2_dehosted | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | CL | sars-cov-2 | | kraken2_dehosted | **docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/kraken2:2.0.8-beta_hv | Optional | CL | sars-cov-2 | diff --git a/tasks/assembly/task_ivar_consensus.wdl b/tasks/assembly/task_ivar_consensus.wdl index d3069ae3a..2755fefcb 100644 --- a/tasks/assembly/task_ivar_consensus.wdl +++ b/tasks/assembly/task_ivar_consensus.wdl @@ -20,6 +20,8 @@ task consensus { String docker = "us-docker.pkg.dev/general-theiagen/staphb/ivar:1.3.1-titan" } command <<< + #set -euo pipefail to avoid silent failure + set -euo pipefail # date and version control date | tee DATE ivar version | head -n1 | tee IVAR_VERSION @@ -70,4 +72,4 @@ task consensus { preemptible: 0 maxRetries: 3 } -} \ No newline at end of file +} diff --git a/tasks/gene_typing/variant_detection/task_ivar_variant_call.wdl b/tasks/gene_typing/variant_detection/task_ivar_variant_call.wdl index e810a13ae..7ed7275f3 100644 --- a/tasks/gene_typing/variant_detection/task_ivar_variant_call.wdl +++ b/tasks/gene_typing/variant_detection/task_ivar_variant_call.wdl @@ -19,6 +19,7 @@ task variant_call { String docker = "us-docker.pkg.dev/general-theiagen/staphb/ivar:1.3.1-titan" } command <<< + set -euo pipefail # version control ivar version | head -n1 | tee IVAR_VERSION samtools --version | head -n1 | tee SAMTOOLS_VERSION @@ -107,4 +108,4 @@ task variant_call { preemptible: 0 maxRetries: 3 } -} \ No newline at end of file +} diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml index d334e6b19..b1bb6da13 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml @@ -236,7 +236,7 @@ - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/work/_miniwdl_inputs/0/SRR13687078.primertrim.sorted.bam # variant call - path: miniwdl_run/call-ivar_consensus/call-variant_call/command - md5sum: bc40f95c8c990dbf25a8d8408a7e0195 + md5sum: 871e716bb580f4157f34d4356f681482 - path: miniwdl_run/call-ivar_consensus/call-variant_call/inputs.json contains: ["bamfile", "variant_min_freq", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-variant_call/outputs.json @@ -267,7 +267,7 @@ - path: miniwdl_run/call-ivar_consensus/call-variant_call/work/_miniwdl_inputs/0/SRR13687078.primertrim.sorted.bam # consensus - path: miniwdl_run/call-ivar_consensus/call-consensus/command - md5sum: ccdea44d43b85395ac64135a9fbe3e08 + md5sum: 89e10c8cf1368912456d477a3cb30dbf - path: miniwdl_run/call-ivar_consensus/call-consensus/inputs.json contains: ["bamfile", "samplename", "min_depth"] - path: miniwdl_run/call-ivar_consensus/call-consensus/outputs.json @@ -529,4 +529,4 @@ - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["read_QC_trim", "description", "call", "output"] - path: miniwdl_run/workflow.log - contains: ["wdl", "theiacov_illumina_pe", "NOTICE", "done"] \ No newline at end of file + contains: ["wdl", "theiacov_illumina_pe", "NOTICE", "done"] diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index 37e1b629c..9af5b61c9 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -188,7 +188,7 @@ - path: miniwdl_run/call-ivar_consensus/call-stats_n_coverage_primtrim/work/_miniwdl_inputs/0/ERR6319327.primertrim.sorted.bam # variant call - path: miniwdl_run/call-ivar_consensus/call-variant_call/command - md5sum: b19b644c97a1e267664bc334622471a4 + md5sum: 0e927e531c7c66c130fbb69e4fece330 - path: miniwdl_run/call-ivar_consensus/call-variant_call/inputs.json contains: ["bamfile", "variant_min_freq", "samplename"] - path: miniwdl_run/call-ivar_consensus/call-variant_call/outputs.json @@ -219,7 +219,7 @@ - path: miniwdl_run/call-ivar_consensus/call-variant_call/work/_miniwdl_inputs/0/ERR6319327.primertrim.sorted.bam # consensus - path: miniwdl_run/call-ivar_consensus/call-consensus/command - md5sum: 043ae7ecbd31d90be3e92cdc9fd55c10 + md5sum: d74095745fef2e76e82e1f195da8b32e - path: miniwdl_run/call-ivar_consensus/call-consensus/inputs.json contains: ["bamfile", "samplename", "min_depth"] - path: miniwdl_run/call-ivar_consensus/call-consensus/outputs.json @@ -474,4 +474,4 @@ - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl contains: ["read_QC_trim", "description", "call", "output"] - path: miniwdl_run/workflow.log - contains: ["wdl", "theiacov_illumina_se", "NOTICE", "done"] \ No newline at end of file + contains: ["wdl", "theiacov_illumina_se", "NOTICE", "done"] diff --git a/workflows/utilities/wf_ivar_consensus.wdl b/workflows/utilities/wf_ivar_consensus.wdl index 1da0fb5f6..8dadbd2b9 100644 --- a/workflows/utilities/wf_ivar_consensus.wdl +++ b/workflows/utilities/wf_ivar_consensus.wdl @@ -22,25 +22,61 @@ workflow ivar_consensus { Float consensus_min_freq File? primer_bed Boolean? skip_N + Int? ivar_bwa_cpu + Int? ivar_bwa_memory + Int? ivar_bwa_disk_size + String? ivar_bwa_docker + Int? ivar_trim_primers_cpu + Int? ivar_trim_primers_memory + Int? ivar_trim_primers_disk_size + String? ivar_trim_primers_docker + Int? stats_n_coverage_primtrim_cpu + Int? stats_n_coverage_primtrim_memory + Int? stats_n_coverage_primtrim_disk_size + String? stats_n_coverage_primtrim_docker + Int? ivar_variant_cpu + Int? ivar_variant_memory + Int? ivar_variant_disk_size + String? ivar_variant_docker + Int? ivar_consensus_cpu + Int? ivar_consensus_memory + Int? ivar_consensus_disk_size + String? ivar_consensus_docker + Int? stats_n_coverage_cpu + Int? stats_n_coverage_memory + Int? stats_n_coverage_disk_size + String? stats_n_coverage_docker } call bwa_task.bwa { input: samplename = samplename, read1 = read1, read2 = read2, - reference_genome = reference_genome + reference_genome = reference_genome, + cpu = ivar_bwa_cpu, + memory = ivar_bwa_memory, + disk_size = ivar_bwa_disk_size, + docker = ivar_bwa_docker } if (trim_primers) { call primer_trim_task.primer_trim { input: samplename = samplename, primer_bed = select_first([primer_bed]), - bamfile = bwa.sorted_bam + bamfile = bwa.sorted_bam, + cpu = ivar_trim_primers_cpu, + memory = ivar_trim_primers_memory, + disk_size = ivar_trim_primers_disk_size, + docker = ivar_trim_primers_docker } call assembly_metrics.stats_n_coverage as stats_n_coverage_primtrim { input: samplename = samplename, bamfile = primer_trim.trim_sorted_bam, + cpu = stats_n_coverage_primtrim_cpu, + memory = stats_n_coverage_primtrim_memory, + disk_size = stats_n_coverage_primtrim_disk_size, + docker = stats_n_coverage_primtrim_docker } } call variant_call_task.variant_call { @@ -50,8 +86,11 @@ workflow ivar_consensus { reference_gff = reference_gff, reference_genome = reference_genome, variant_min_depth = min_depth, - variant_min_freq = variant_min_freq - + variant_min_freq = variant_min_freq, + cpu = ivar_variant_cpu, + memory = ivar_variant_memory, + disk_size = ivar_variant_disk_size, + docker = ivar_variant_docker } call consensus_task.consensus { input: @@ -60,12 +99,20 @@ workflow ivar_consensus { reference_genome = reference_genome, consensus_min_depth = min_depth, consensus_min_freq = consensus_min_freq, - skip_N = skip_N + skip_N = skip_N, + cpu = ivar_consensus_cpu, + memory = ivar_consensus_memory, + disk_size = ivar_consensus_disk_size, + docker = ivar_consensus_docker } call assembly_metrics.stats_n_coverage { input: samplename = samplename, - bamfile = bwa.sorted_bam + bamfile = bwa.sorted_bam, + cpu = stats_n_coverage_cpu, + memory = stats_n_coverage_memory, + disk_size = stats_n_coverage_disk_size, + docker = stats_n_coverage_docker } output { # bwa outputs @@ -108,4 +155,4 @@ workflow ivar_consensus { String samtools_version_stats = stats_n_coverage.samtools_version } -} \ No newline at end of file +}