diff --git a/tasks/tasks_nextstrain.wdl b/tasks/tasks_nextstrain.wdl index a833a2ee..a7a6c431 100644 --- a/tasks/tasks_nextstrain.wdl +++ b/tasks/tasks_nextstrain.wdl @@ -15,7 +15,7 @@ task nextclade_one_sample { String docker = "nextstrain/nextclade:1.7.0" } String basename = basename(genome_fasta, ".fasta") - command { + command <<< set -e apt-get update apt-get -y install python3 @@ -57,7 +57,7 @@ task nextclade_one_sample { grep ^clade transposed.tsv | cut -f 2 | grep -v clade > NEXTCLADE_CLADE grep ^aaSubstitutions transposed.tsv | cut -f 2 | grep -v aaSubstitutions > NEXTCLADE_AASUBS grep ^aaDeletions transposed.tsv | cut -f 2 | grep -v aaDeletions > NEXTCLADE_AADELS - } + >>> runtime { docker: docker memory: "3 GB" @@ -147,7 +147,7 @@ task nextclade_many_samples { # gather runtime metrics cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES >>> runtime { docker: docker @@ -351,8 +351,7 @@ task derived_cols { awk '!a[$1]++' "~{basename}.derived_cols.tsv" > temp.tsv && mv temp.tsv "~{basename}.derived_cols.tsv" # convert to csv - tr '\t' ',' < "~{basename}.derived_cols.tsv" > "~{basename}.derived_cols.csv" - + csvformat --tabs "~{basename}.derived_cols.tsv" > "~{basename}.derived_cols.csv" >>> runtime { docker: docker @@ -526,7 +525,7 @@ task nextstrain_build_subsample { cd .. cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES >>> runtime { docker: docker @@ -555,11 +554,11 @@ task nextstrain_ncov_defaults { String nextstrain_ncov_repo_commit = "cf79e41d4178608bda4b084080f0ffff5b3da61c" String docker = "nextstrain/base:build-20220111T004537Z" } - command { + command <<< set -e wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz" tar -xf "~{nextstrain_ncov_repo_commit}.tar.gz" --strip-components=1 - } + >>> runtime { docker: docker memory: "1 GB" @@ -635,7 +634,6 @@ task nextstrain_ncov_sanitize_gisaid_data { meta { description: "Sanitize data downloaded from GISAID for use in Nextstrain/augur. See: https://nextstrain.github.io/ncov/data-prep#curate-data-from-the-full-gisaid-database" } - input { File sequences_gisaid_fasta File metadata_gisaid_tsv @@ -645,7 +643,6 @@ task nextstrain_ncov_sanitize_gisaid_data { String nextstrain_ncov_repo_commit = "cf79e41d4178608bda4b084080f0ffff5b3da61c" String docker = "nextstrain/base:build-20220111T004537Z" } - parameter_meta { sequences_gisaid_fasta: { description: "Multiple sequences downloaded from GISAID", @@ -661,7 +658,7 @@ task nextstrain_ncov_sanitize_gisaid_data { } String out_basename = basename(basename(basename(basename(sequences_gisaid_fasta, '.xz'), '.gz'), '.tar'), '.fasta') - command { + command <<< set -e ncov_path_prefix="/nextstrain/ncov" wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz" @@ -679,7 +676,7 @@ task nextstrain_ncov_sanitize_gisaid_data { --rename-fields 'Virus name=strain' 'Accession ID=gisaid_epi_isl' 'Collection date=date' 'Clade=GISAID_clade' 'Pango lineage=pango_lineage' 'Host=host' 'Type=virus' 'Patient age=age' \ ~{"--strip-prefixes=" + prefix_to_strip} \ --output "~{out_basename}_metadata_sanitized_for_nextstrain.tsv.gz" - } + >>> runtime { docker: docker memory: "7 GB" @@ -730,7 +727,7 @@ task filter_subsample_sequences { } } String out_fname = sub(sub(basename(sequences_fasta), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") - command { + command <<< set -e augur version > VERSION @@ -769,8 +766,8 @@ task filter_subsample_sequences { grep "strains passed all filters" STDOUT | cut -f 1 -d ' ' > OUT_COUNT cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "15 GB" @@ -863,7 +860,7 @@ task filter_sequences_to_list { cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES >>> runtime { docker: docker @@ -902,7 +899,7 @@ task mafft_one_chr { Int mem_size = 500 Int cpus = 64 } - command { + command <<< set -e # decompress sequences if necessary @@ -952,8 +949,8 @@ task mafft_one_chr { # profiling and stats cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: mem_size + " GB" @@ -988,7 +985,7 @@ task mafft_one_chr_chunked { Int mem_size = 32 Int cpus = 96 } - command { + command <<< set -e # write out ref @@ -1058,8 +1055,8 @@ task mafft_one_chr_chunked { # profiling and stats cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: mem_size + " GB" @@ -1092,7 +1089,7 @@ task augur_mafft_align { String docker = "nextstrain/base:build-20220111T004537Z" } - command { + command <<< set -e augur version > VERSION augur align --sequences "~{sequences}" \ @@ -1105,8 +1102,8 @@ task augur_mafft_align { --nthreads auto cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "180 GB" @@ -1132,10 +1129,10 @@ task snp_sites { String docker = "quay.io/biocontainers/snp-sites:2.5.1--hed695b0_0" } String out_basename = basename(msa_fasta, ".fasta") - command { + command <<< snp-sites -V > VERSION snp-sites -v ~{true="" false="-c" allow_wildcard_bases} -o "~{out_basename}.vcf" "~{msa_fasta}" - } + >>> runtime { docker: docker memory: "31 GB" @@ -1168,7 +1165,7 @@ task augur_mask_sites { } } String out_fname = sub(sub(basename(sequences), ".vcf", ".masked.vcf"), ".fasta$", ".masked.fasta") - command { + command <<< set -e augur version > VERSION BEDFILE=~{select_first([mask_bed, "/dev/null"])} @@ -1181,8 +1178,8 @@ task augur_mask_sites { fi cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "3 GB" @@ -1224,7 +1221,7 @@ task draft_augur_tree { } } String out_basename = basename(basename(basename(msa_or_vcf, '.gz'), '.vcf'), '.fasta') - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur tree --alignment "~{msa_or_vcf}" \ @@ -1237,8 +1234,8 @@ task draft_augur_tree { --nthreads auto cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "32 GB" @@ -1291,7 +1288,7 @@ task refine_augur_tree { } } String out_basename = basename(basename(basename(msa_or_vcf, '.gz'), '.vcf'), '.fasta') - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur refine \ @@ -1318,8 +1315,8 @@ task refine_augur_tree { ~{"--vcf-reference " + vcf_reference} cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "50 GB" @@ -1355,7 +1352,7 @@ task ancestral_traits { String docker = "nextstrain/base:build-20220111T004537Z" } String out_basename = basename(tree, '.nwk') - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur traits \ @@ -1368,8 +1365,8 @@ task ancestral_traits { ~{true="--confidence" false="" confidence} cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "32 GB" @@ -1412,7 +1409,7 @@ task ancestral_tree { } } String out_basename = basename(basename(basename(msa_or_vcf, '.gz'), '.vcf'), '.fasta') - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur ancestral \ @@ -1428,8 +1425,8 @@ task ancestral_tree { ~{true="--infer-ambiguous" false="" infer_ambiguous} cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "50 GB" @@ -1465,7 +1462,7 @@ task translate_augur_tree { String docker = "nextstrain/base:build-20220111T004537Z" } String out_basename = basename(tree, '.nwk') - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur translate --tree "~{tree}" \ @@ -1475,8 +1472,8 @@ task translate_augur_tree { ~{"--vcf-reference " + vcf_reference} \ ~{"--genes " + genes} \ --output-node-data ~{out_basename}_aa_muts.json - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "2 GB" @@ -1520,7 +1517,7 @@ task tip_frequencies { String docker = "nextstrain/base:build-20220111T004537Z" String out_basename = basename(tree, '.nwk') } - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur frequencies \ @@ -1544,8 +1541,8 @@ task tip_frequencies { cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: select_first([machine_mem_gb, 30]) + " GB" @@ -1578,7 +1575,7 @@ task assign_clades_to_nodes { String docker = "nextstrain/base:build-20220111T004537Z" } String out_basename = basename(basename(tree_nwk, ".nwk"), "_timetree") - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur clades \ @@ -1587,8 +1584,8 @@ task assign_clades_to_nodes { --reference "~{ref_fasta}" \ --clades "~{clades_tsv}" \ --output-node-data "~{out_basename}_clades.json" - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "2 GB" @@ -1621,7 +1618,7 @@ task augur_import_beast { String docker = "nextstrain/base:build-20220111T004537Z" } String tree_basename = basename(beast_mcc_tree, ".tree") - command { + command <<< set -e augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur import beast \ @@ -1634,8 +1631,8 @@ task augur_import_beast { ~{"--tip-date-delimeter " + tip_date_delimiter} cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: select_first([machine_mem_gb, 3]) + " GB" @@ -1679,7 +1676,7 @@ task export_auspice_json { String docker = "nextstrain/base:build-20220111T004537Z" } - command { + command <<< set -e -o pipefail augur version > VERSION touch exportargs @@ -1733,8 +1730,8 @@ task export_auspice_json { touch "~{out_basename}_auspice_root-sequence.json" cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> runtime { docker: docker memory: "32 GB" @@ -1755,7 +1752,6 @@ task export_auspice_json { } task prep_augur_metadata { - input { File assembly String collection_date @@ -1766,15 +1762,13 @@ task prep_augur_metadata { String? iso_county="" - String docker_image = "quay.io/theiagen/utility:1.1" Int mem_size_gb = 3 Int CPUs = 1 Int disk_size = 10 Int preemptible_tries = 0 } - - command { + command <<< # de-identified consensus/assembly sequence year=$(echo ${collection_date} | cut -f 1 -d '-') @@ -1786,13 +1780,10 @@ task prep_augur_metadata { echo -e "\"$assembly_header\"\t\"ncov\"\t\"${collection_date}\"\t\"${iso_continent}\" \t\"${iso_country}\"\t\"${iso_state}\"\t\"${iso_county}\"\t"${pango_lineage}"" >> augur_metadata.tsv echo $(ls ) - - } - + >>> output { File augur_metadata = "augur_metadata.tsv" } - runtime { docker: docker_image memory: "~{mem_size_gb} GB" diff --git a/tasks/tasks_reports.wdl b/tasks/tasks_reports.wdl index 40bc36fa..a3ef6a0f 100644 --- a/tasks/tasks_reports.wdl +++ b/tasks/tasks_reports.wdl @@ -338,7 +338,7 @@ task MultiQC { # get the basename in all wdl use the filename specified (sans ".html" extension, if specified) String report_filename = if (defined(file_name)) then basename(select_first([file_name]), ".html") else "multiqc" - command { + command <<< set -ex -o pipefail echo "${sep='\n' input_files}" > input-filenames.txt @@ -378,7 +378,7 @@ task MultiQC { fi tar -c "${out_dir}/${report_filename}_data" | gzip -c > "${report_filename}_data.tar.gz" - } + >>> output { File multiqc_report = "${out_dir}/${report_filename}.html" @@ -399,13 +399,11 @@ task tsv_join { meta { description: "Perform a full left outer join on multiple TSV tables. Each input tsv must have a header row, and each must must contain the value of id_col in its header. Inputs may or may not be gzipped. Unix/Mac/Win line endings are tolerated on input, Unix line endings are emitted as output. Unicode text safe." } - input { Array[File]+ input_tsvs String id_col String out_basename = "merged" } - command <<< python3<>> - output { File out_tsv = "${out_basename}.tsv" } - runtime { memory: "7 GB" cpu: 1 @@ -470,18 +466,15 @@ task tsv_stack { String out_basename String docker="quay.io/broadinstitute/viral-core:2.1.19" } - - command { + command <<< csvstack -t --filenames \ ${sep=' ' input_tsvs} \ | tr , '\t' \ > ${out_basename}.txt - } - + >>> output { File out_tsv = "${out_basename}.txt" } - runtime { memory: "1 GB" cpu: 1 @@ -490,7 +483,6 @@ task tsv_stack { dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 3 } - } task compare_two_genomes { @@ -501,16 +493,14 @@ task compare_two_genomes { String docker="quay.io/broadinstitute/viral-assemble:2.1.16.1" } - - command { + command <<< set -ex -o pipefail assembly.py --version | tee VERSION assembly.py alignment_summary "${genome_one}" "${genome_two}" --outfileName "${out_basename}.txt" --printCounts --loglevel=DEBUG cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES - } - + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES + >>> output { File comparison_table = "${out_basename}.txt" Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) @@ -518,7 +508,6 @@ task compare_two_genomes { String cpu_load = read_string("CPU_LOAD") String viralngs_version = read_string("VERSION") } - runtime { memory: "3 GB" cpu: 2 diff --git a/tasks/tasks_utils.wdl b/tasks/tasks_utils.wdl index f5c80898..bca75031 100644 --- a/tasks/tasks_utils.wdl +++ b/tasks/tasks_utils.wdl @@ -118,7 +118,7 @@ task zcat { # gather runtime metrics cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg > CPU_LOAD - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES + { cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes || echo 0; } > MEM_BYTES >>> runtime { docker: "quay.io/broadinstitute/viral-core:2.1.33"