From 3667868ace92480f85b715a09195a8f768160f0b Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 12:06:02 -0400 Subject: [PATCH 01/25] added ani_threshold Float input to animummer task. also exposed cpus and memory. added logic for comparing ANI_HIGHEST_PERCENTAGE to ani_threshold and only outputting the name of the match if the threshold is surpassed. tested successfully in miniwdl. --- tasks/quality_control/task_mummer_ani.wdl | 55 +++++++++++++++-------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 088a985b5..086c15f64 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -6,12 +6,15 @@ task animummer { String samplename File? ref_genome Float mash_filter = 0.9 - String docker="us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" + Float ani_threshold = 92.0 + String docker= "us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" + Int cpus = 4 + Int memory = 8 Int disk_size = 100 } command <<< # capture and version - mummer --version | tee MUMMER_VERSION + mummer --version | tee MUMMER_VERSION.txt # set the reference genome # if not defined by user, then use all 43 genomes in RGDv2 @@ -42,18 +45,21 @@ task animummer { echo "~{samplename} did not surpass the minimum mash genetic distance filter, thus ANI was not performed" echo "The output TSV only contains the header line" # set output variables as 0s or descriptive strings - echo "0.0" > ANI_HIGHEST_PERCENT_BASES_ALIGNED - echo "0.0" > ANI_HIGHEST_PERCENT - echo "ANI skipped due to high genetic divergence from reference genomes" > ANI_TOP_SPECIES_MATCH + echo "0.0" > ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt + echo "0.0" > ANI_HIGHEST_PERCENT.txt + echo "ANI skipped due to high genetic divergence from reference genomes" > ANI_TOP_SPECIES_MATCH.txt # if output TSV has greater than 1 lines, then parse for appropriate outputs else ## parse out highest percentBases aligned - cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED - echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED)" + cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt + echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ## parse out ANI value using highest percentBases aligned value - grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT - echo "ANI value is: $(cat ANI_HIGHEST_PERCENT)" + grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT.txt + echo "Highest ANI value is: $(cat ANI_HIGHEST_PERCENT.txt)" + # set ANI_HIGHEST_PERCENT as a bash variable (float) + ANI_HIGHEST_PERCENT=$(cat ANI_HIGHEST_PERCENT.txt) + # have to separate out results for ani_top_species match because user-defined reference genome FASTAs will not be named as they are in RGDv2 if [[ -z "~{ref_genome}" ]]; then @@ -63,27 +69,40 @@ task animummer { # cut on periods to pull out genus_species (in future this will inlcude lineages for Listeria and other sub-species designations) # have to create assembly_file_basename bash variable since output TSV does not include full path to assembly file, only filename assembly_file_basename=$(basename ~{assembly}) - grep "$(cat ANI_HIGHEST_PERCENT)" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH - echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH)" + grep "${ANI_HIGHEST_PERCENT}" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH.txt + echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH.txt)" + + # if ANI threshold is defined by user, compare to highest ANI value and only output if threshold is surpassed + if [[ -n "~{ani_threshold}" ]]; then + echo "Comparing user-defined ANI threshold to highest ANI value..." + # compare ANI_HIGHEST_PERCENT to ani_threshold using awk + if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )"; then + echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" + echo "The ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + else + echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}" + fi + fi else # User specified a reference genome, use fasta filename as output string - basename "${REF_GENOME}" > ANI_TOP_SPECIES_MATCH + basename "${REF_GENOME}" > ANI_TOP_SPECIES_MATCH.txt echo "Reference genome used for ANI is: ${REF_GENOME}" fi fi >>> output { - Float ani_highest_percent = read_float("ANI_HIGHEST_PERCENT") - Float ani_highest_percent_bases_aligned = read_float("ANI_HIGHEST_PERCENT_BASES_ALIGNED") + Float ani_highest_percent = read_float("ANI_HIGHEST_PERCENT.txt") + Float ani_highest_percent_bases_aligned = read_float("ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt") File ani_output_tsv = "~{samplename}.ani-mummer.out.tsv" - String ani_top_species_match = read_string("ANI_TOP_SPECIES_MATCH") - String ani_mummer_version = read_string("MUMMER_VERSION") + String ani_top_species_match = read_string("ANI_TOP_SPECIES_MATCH.txt") + String ani_mummer_version = read_string("MUMMER_VERSION.txt") + String ani_docker = "~{docker}" } runtime { docker: "~{docker}" - memory: "8 GB" - cpu: 4 + memory: "~{memory} GB" + cpu: cpus disks: "local-disk " + disk_size + " SSD" disk: disk_size + " GB" maxRetries: 3 From 697c28b619e53cf8a47dd8122f61738f5cda7970 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 12:22:30 -0400 Subject: [PATCH 02/25] fixed awk syntax --- tasks/quality_control/task_mummer_ani.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 086c15f64..3f2dedaf0 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -76,7 +76,7 @@ task animummer { if [[ -n "~{ani_threshold}" ]]; then echo "Comparing user-defined ANI threshold to highest ANI value..." # compare ANI_HIGHEST_PERCENT to ani_threshold using awk - if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )"; then + if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" echo "The ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt else From 07a60a0e734674a20c452908246c993b8c9d830d Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 15:17:56 -0400 Subject: [PATCH 03/25] added ani_docker to export_taxon_tables task and illumina pe workflow --- tasks/utilities/task_broad_terra_tools.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 708d7bd92..491f5f18c 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -90,6 +90,7 @@ task export_taxon_tables { File? ani_output_tsv String? ani_top_species_match String? ani_mummer_version + String? ani_docker File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -583,6 +584,7 @@ task export_taxon_tables { "ani_output_tsv": "~{ani_output_tsv}", "ani_top_species_match": "~{ani_top_species_match}", "ani_mummer_version": "~{ani_mummer_version}", + "ani_docker": "~{ani_docker}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index e960a7eb1..a74232c9f 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -211,7 +211,7 @@ workflow theiaprok_illumina_pe { quast_gc_percent = quast.gc_percent, busco_results = busco.busco_results, ani_highest_percent = ani.ani_highest_percent, - ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned + ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned, } } call merlin_magic_workflow.merlin_magic { @@ -291,6 +291,7 @@ workflow theiaprok_illumina_pe { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -605,6 +606,7 @@ workflow theiaprok_illumina_pe { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report From 20cb4e389b8b8c6f2f8b25fcedc8607ee3e6e055 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 15:32:40 -0400 Subject: [PATCH 04/25] remove "The" --- tasks/quality_control/task_mummer_ani.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 3f2dedaf0..3b9a81295 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -78,7 +78,7 @@ task animummer { # compare ANI_HIGHEST_PERCENT to ani_threshold using awk if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" - echo "The ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt else echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}" fi From d7de47e7a3e52912df6420835ff947413cc00223 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 09:40:45 -0400 Subject: [PATCH 05/25] added ani_docker output to theiaprok_fasta wf --- workflows/theiaprok/wf_theiaprok_fasta.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index cf4f7df7b..7e776a825 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -166,6 +166,7 @@ workflow theiaprok_fasta { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -411,6 +412,7 @@ workflow theiaprok_fasta { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report From ff175641c3ea7e113a7bd2e53dcf1e4e08bd87b3 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 09:41:24 -0400 Subject: [PATCH 06/25] removed comma that I accidentally added to call block of qc_check_table in theiaprok illumina pe wf --- workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index a74232c9f..c6af16285 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -211,7 +211,7 @@ workflow theiaprok_illumina_pe { quast_gc_percent = quast.gc_percent, busco_results = busco.busco_results, ani_highest_percent = ani.ani_highest_percent, - ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned, + ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned } } call merlin_magic_workflow.merlin_magic { From 7e49db0ca523b66d1e0f1e7aab3b266515378ead Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 09:57:12 -0400 Subject: [PATCH 07/25] added ani_docker output to theiaprok illumina se and ONT workflows --- workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_ont.wdl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index 1d2c80fe5..7f23087ff 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -261,6 +261,7 @@ workflow theiaprok_illumina_se { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -554,6 +555,7 @@ workflow theiaprok_illumina_se { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index ded5d1807..5af688b51 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -258,6 +258,7 @@ workflow theiaprok_ont { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -523,6 +524,7 @@ workflow theiaprok_ont { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report From 5dc45e4fbc514f6ca983fafaeedf2d851be65851 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 11:10:57 -0400 Subject: [PATCH 08/25] update CI --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 6 +++--- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 3ba26335f..1a763b493 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -574,7 +574,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl md5sum: 65bd7b4ef51b04df67aaf9276ff0918a - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: 21e8b1cbe4c8be26d1ac8b8013970166 + md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 - path: miniwdl_run/wdl/tasks/quality_control/task_ncbi_scrub.wdl md5sum: f41beadaf9d24ae77b05ac3b962127a8 - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl @@ -632,9 +632,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 499eb797a809306edf7e97005f67f8f2 + md5sum: 227ee827aca5f05b23eaa608119ddc9a - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: a76d59109075ce8b861e63ffe70d7c77 + md5sum: 084648227cdc63e06e10ec27e728bdc6 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 6d2a6ccbb..a2cf78fc3 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -544,7 +544,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl md5sum: 65bd7b4ef51b04df67aaf9276ff0918a - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: 21e8b1cbe4c8be26d1ac8b8013970166 + md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl contains: ["version", "qc_check_table", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_quast.wdl @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 499eb797a809306edf7e97005f67f8f2 + md5sum: 227ee827aca5f05b23eaa608119ddc9a - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: c6ab2e13cf4802664b1c93e40a1cd5ba + md5sum: 6c6ccc821ae81ef22529f10398c60a06 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From 3cc9766bcaf246c64a798d55f7dd16ca372d980d Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 1 Sep 2023 15:43:10 -0400 Subject: [PATCH 09/25] update CI --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 582cfc2c5..4b15211a2 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -632,7 +632,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 227ee827aca5f05b23eaa608119ddc9a + md5sum: 1d7365ed0f29a7632568a37bfce0f1b6 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl md5sum: 084648227cdc63e06e10ec27e728bdc6 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index f1b5c6104..6ca48b0ed 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 227ee827aca5f05b23eaa608119ddc9a + md5sum: 1d7365ed0f29a7632568a37bfce0f1b6 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 6c6ccc821ae81ef22529f10398c60a06 + md5sum: d144cb871c175029a34107a369e70c5d - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From a5566753be57cb7915a69adde607f098d8b96f38 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 6 Oct 2023 16:56:03 -0400 Subject: [PATCH 10/25] mummer_ani task: added percent_base_aligned_threshold with 70 as default. default ani_threshold is now 85. Also added logic to only output ani_top_species_match if both thresholds are surpassed. --- tasks/quality_control/task_mummer_ani.wdl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 3b9a81295..2c8d87d11 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -6,7 +6,9 @@ task animummer { String samplename File? ref_genome Float mash_filter = 0.9 - Float ani_threshold = 92.0 + # these 2 thresholds were set as they are used by CDC enterics lab/PulseNet for ANI thresholds + Float ani_threshold = 85.0 + Float percent_bases_aligned_threshold = 70.0 String docker= "us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" Int cpus = 4 Int memory = 8 @@ -53,6 +55,7 @@ task animummer { ## parse out highest percentBases aligned cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" + ANI_HIGHEST_PERCENT_BASES_ALIGNED=$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt) ## parse out ANI value using highest percentBases aligned value grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT.txt @@ -72,15 +75,21 @@ task animummer { grep "${ANI_HIGHEST_PERCENT}" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH.txt echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH.txt)" - # if ANI threshold is defined by user, compare to highest ANI value and only output if threshold is surpassed - if [[ -n "~{ani_threshold}" ]]; then + # if ANI threshold or percent_bases_aligned_threshold is defined by user (they both are by default), compare to highest ANI value and corresponding percent_bases_aligned value and only output ANI_top_species_match if both thresholds are surpassed + if [[ -n "~{ani_threshold}" || -n "~{percent_bases_aligned_threshold}" ]]; then echo "Comparing user-defined ANI threshold to highest ANI value..." # compare ANI_HIGHEST_PERCENT to ani_threshold using awk if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + # else if: compare percent_bases_aligned_threshold to ANI_HIGHEST_PERCENT_BASES_ALIGNED using awk + elif ! awk "BEGIN{ exit (${ANI_HIGHEST_PERCENT_BASES_ALIGNED} < ~{percent_bases_aligned_threshold} )}"; then + echo "The highest ANI percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is less than the user-defined threshold of ~{percent_bases_aligned_threshold}" + # overwrite ANI_TOP_SPECIES_MATCH.txt when percent_bases_aligned threshold is not surpassed + echo "ANI percent bases aligned did not surpass the user-defined threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt else - echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}" + echo "The highest ANI value ${ANI_HIGHEST_PERCENT} is greater than the user-defined threshold ~{ani_threshold}" + echo "The highest percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is greater than the user-defined threshold ~{percent_bases_aligned_threshold}" fi fi else From aea02358c350e65f50ffaaaa406608df9954e836 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 6 Oct 2023 17:52:48 -0400 Subject: [PATCH 11/25] clarified messages when 2 thresholds are not passed --- tasks/quality_control/task_mummer_ani.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 2c8d87d11..630ff9d10 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -80,13 +80,13 @@ task animummer { echo "Comparing user-defined ANI threshold to highest ANI value..." # compare ANI_HIGHEST_PERCENT to ani_threshold using awk if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then - echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" - echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined ANI threshold of ~{ani_threshold}" + echo "ANI top species match did not surpass the user-defined ANI threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt # else if: compare percent_bases_aligned_threshold to ANI_HIGHEST_PERCENT_BASES_ALIGNED using awk elif ! awk "BEGIN{ exit (${ANI_HIGHEST_PERCENT_BASES_ALIGNED} < ~{percent_bases_aligned_threshold} )}"; then echo "The highest ANI percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is less than the user-defined threshold of ~{percent_bases_aligned_threshold}" # overwrite ANI_TOP_SPECIES_MATCH.txt when percent_bases_aligned threshold is not surpassed - echo "ANI percent bases aligned did not surpass the user-defined threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt + echo "ANI top species match did not surpass the user-defined percent bases aligned threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt else echo "The highest ANI value ${ANI_HIGHEST_PERCENT} is greater than the user-defined threshold ~{ani_threshold}" echo "The highest percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is greater than the user-defined threshold ~{percent_bases_aligned_threshold}" From 818065a610319892f3a320dc91b0e9409c360abf Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 12:06:02 -0400 Subject: [PATCH 12/25] added ani_threshold Float input to animummer task. also exposed cpus and memory. added logic for comparing ANI_HIGHEST_PERCENTAGE to ani_threshold and only outputting the name of the match if the threshold is surpassed. tested successfully in miniwdl. --- tasks/quality_control/task_mummer_ani.wdl | 55 +++++++++++++++-------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 088a985b5..086c15f64 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -6,12 +6,15 @@ task animummer { String samplename File? ref_genome Float mash_filter = 0.9 - String docker="us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" + Float ani_threshold = 92.0 + String docker= "us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" + Int cpus = 4 + Int memory = 8 Int disk_size = 100 } command <<< # capture and version - mummer --version | tee MUMMER_VERSION + mummer --version | tee MUMMER_VERSION.txt # set the reference genome # if not defined by user, then use all 43 genomes in RGDv2 @@ -42,18 +45,21 @@ task animummer { echo "~{samplename} did not surpass the minimum mash genetic distance filter, thus ANI was not performed" echo "The output TSV only contains the header line" # set output variables as 0s or descriptive strings - echo "0.0" > ANI_HIGHEST_PERCENT_BASES_ALIGNED - echo "0.0" > ANI_HIGHEST_PERCENT - echo "ANI skipped due to high genetic divergence from reference genomes" > ANI_TOP_SPECIES_MATCH + echo "0.0" > ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt + echo "0.0" > ANI_HIGHEST_PERCENT.txt + echo "ANI skipped due to high genetic divergence from reference genomes" > ANI_TOP_SPECIES_MATCH.txt # if output TSV has greater than 1 lines, then parse for appropriate outputs else ## parse out highest percentBases aligned - cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED - echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED)" + cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt + echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ## parse out ANI value using highest percentBases aligned value - grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT - echo "ANI value is: $(cat ANI_HIGHEST_PERCENT)" + grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT.txt + echo "Highest ANI value is: $(cat ANI_HIGHEST_PERCENT.txt)" + # set ANI_HIGHEST_PERCENT as a bash variable (float) + ANI_HIGHEST_PERCENT=$(cat ANI_HIGHEST_PERCENT.txt) + # have to separate out results for ani_top_species match because user-defined reference genome FASTAs will not be named as they are in RGDv2 if [[ -z "~{ref_genome}" ]]; then @@ -63,27 +69,40 @@ task animummer { # cut on periods to pull out genus_species (in future this will inlcude lineages for Listeria and other sub-species designations) # have to create assembly_file_basename bash variable since output TSV does not include full path to assembly file, only filename assembly_file_basename=$(basename ~{assembly}) - grep "$(cat ANI_HIGHEST_PERCENT)" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH - echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH)" + grep "${ANI_HIGHEST_PERCENT}" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH.txt + echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH.txt)" + + # if ANI threshold is defined by user, compare to highest ANI value and only output if threshold is surpassed + if [[ -n "~{ani_threshold}" ]]; then + echo "Comparing user-defined ANI threshold to highest ANI value..." + # compare ANI_HIGHEST_PERCENT to ani_threshold using awk + if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )"; then + echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" + echo "The ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + else + echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}" + fi + fi else # User specified a reference genome, use fasta filename as output string - basename "${REF_GENOME}" > ANI_TOP_SPECIES_MATCH + basename "${REF_GENOME}" > ANI_TOP_SPECIES_MATCH.txt echo "Reference genome used for ANI is: ${REF_GENOME}" fi fi >>> output { - Float ani_highest_percent = read_float("ANI_HIGHEST_PERCENT") - Float ani_highest_percent_bases_aligned = read_float("ANI_HIGHEST_PERCENT_BASES_ALIGNED") + Float ani_highest_percent = read_float("ANI_HIGHEST_PERCENT.txt") + Float ani_highest_percent_bases_aligned = read_float("ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt") File ani_output_tsv = "~{samplename}.ani-mummer.out.tsv" - String ani_top_species_match = read_string("ANI_TOP_SPECIES_MATCH") - String ani_mummer_version = read_string("MUMMER_VERSION") + String ani_top_species_match = read_string("ANI_TOP_SPECIES_MATCH.txt") + String ani_mummer_version = read_string("MUMMER_VERSION.txt") + String ani_docker = "~{docker}" } runtime { docker: "~{docker}" - memory: "8 GB" - cpu: 4 + memory: "~{memory} GB" + cpu: cpus disks: "local-disk " + disk_size + " SSD" disk: disk_size + " GB" maxRetries: 3 From 3c49ebc0b80592ef86c8ec6146147359ef07a9df Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 12:22:30 -0400 Subject: [PATCH 13/25] fixed awk syntax --- tasks/quality_control/task_mummer_ani.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 086c15f64..3f2dedaf0 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -76,7 +76,7 @@ task animummer { if [[ -n "~{ani_threshold}" ]]; then echo "Comparing user-defined ANI threshold to highest ANI value..." # compare ANI_HIGHEST_PERCENT to ani_threshold using awk - if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )"; then + if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" echo "The ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt else From dbbb92c744dfc5d7ca2c84989286c12bcb7b7870 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 15:17:56 -0400 Subject: [PATCH 14/25] added ani_docker to export_taxon_tables task and illumina pe workflow --- tasks/utilities/task_broad_terra_tools.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 2ae737a5c..eb819b302 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -92,6 +92,7 @@ task export_taxon_tables { File? ani_output_tsv String? ani_top_species_match String? ani_mummer_version + String? ani_docker String? kmerfinder_docker File? kmerfinder_results_tsv String? kmerfinder_top_hit @@ -595,6 +596,7 @@ task export_taxon_tables { "ani_output_tsv": "~{ani_output_tsv}", "ani_top_species_match": "~{ani_top_species_match}", "ani_mummer_version": "~{ani_mummer_version}", + "ani_docker": "~{ani_docker}", "kmerfinder_docker": "~{kmerfinder_docker}", "kmerfinder_results_tsv": "~{kmerfinder_results_tsv}", "kmerfinder_top_hit": "~{kmerfinder_top_hit}", diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index feae677c0..af6f9389e 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -220,7 +220,7 @@ workflow theiaprok_illumina_pe { quast_gc_percent = quast.gc_percent, busco_results = busco.busco_results, ani_highest_percent = ani.ani_highest_percent, - ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned + ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned, } } call merlin_magic_workflow.merlin_magic { @@ -300,6 +300,7 @@ workflow theiaprok_illumina_pe { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, @@ -621,6 +622,7 @@ workflow theiaprok_illumina_pe { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # kmerfinder outputs String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv From 55dfc2fd09b8cfd3cc2593b3b996299855aab96c Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Thu, 24 Aug 2023 15:32:40 -0400 Subject: [PATCH 15/25] remove "The" --- tasks/quality_control/task_mummer_ani.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 3f2dedaf0..3b9a81295 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -78,7 +78,7 @@ task animummer { # compare ANI_HIGHEST_PERCENT to ani_threshold using awk if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" - echo "The ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt else echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}" fi From c62449e9cc95c69b05674bc072519f69fa5c799c Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 09:40:45 -0400 Subject: [PATCH 16/25] added ani_docker output to theiaprok_fasta wf --- workflows/theiaprok/wf_theiaprok_fasta.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index 2c4ce9f15..c5fe43576 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -175,6 +175,7 @@ workflow theiaprok_fasta { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, @@ -427,6 +428,7 @@ workflow theiaprok_fasta { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # kmerfinder outputs String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv From c9340fef93207ca78b2fd1cf0219c3f70df4e81b Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 09:41:24 -0400 Subject: [PATCH 17/25] removed comma that I accidentally added to call block of qc_check_table in theiaprok illumina pe wf --- workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index af6f9389e..4ffbf5ee9 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -220,7 +220,7 @@ workflow theiaprok_illumina_pe { quast_gc_percent = quast.gc_percent, busco_results = busco.busco_results, ani_highest_percent = ani.ani_highest_percent, - ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned, + ani_highest_percent_bases_aligned = ani.ani_highest_percent_bases_aligned } } call merlin_magic_workflow.merlin_magic { From cb94465b1b3bf9eee92f46929eaf4cb24b34ad37 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 09:57:12 -0400 Subject: [PATCH 18/25] added ani_docker output to theiaprok illumina se and ONT workflows --- workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_ont.wdl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index 8a269be90..365b1ed2f 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -273,6 +273,7 @@ workflow theiaprok_illumina_se { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, @@ -573,6 +574,7 @@ workflow theiaprok_illumina_se { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # kmerfinder outputs String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 5254f8640..593f19723 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -271,6 +271,7 @@ workflow theiaprok_ont { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + ani_docker = ani.ani_docker, kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, @@ -547,6 +548,7 @@ workflow theiaprok_ont { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + String? ani_mummer_docker = ani.ani_docker # kmerfinder outputs String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv From f08ee35133cac13c423561c4923b239b7cf7e887 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 25 Aug 2023 11:10:57 -0400 Subject: [PATCH 19/25] update CI --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index ee07b678f..9e4362064 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -574,7 +574,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl contains: ["version", "fastq_scan", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: 21e8b1cbe4c8be26d1ac8b8013970166 + md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 - path: miniwdl_run/wdl/tasks/quality_control/task_ncbi_scrub.wdl contains: ["version", "scrub", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 5d4873a19..b46305071 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -544,7 +544,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl contains: ["version", "fastq_scan", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: 21e8b1cbe4c8be26d1ac8b8013970166 + md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl contains: ["version", "qc_check_table", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_quast.wdl From be3a734533c240e56390708914722240dc9c5935 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 6 Oct 2023 16:56:03 -0400 Subject: [PATCH 20/25] mummer_ani task: added percent_base_aligned_threshold with 70 as default. default ani_threshold is now 85. Also added logic to only output ani_top_species_match if both thresholds are surpassed. --- tasks/quality_control/task_mummer_ani.wdl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 3b9a81295..2c8d87d11 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -6,7 +6,9 @@ task animummer { String samplename File? ref_genome Float mash_filter = 0.9 - Float ani_threshold = 92.0 + # these 2 thresholds were set as they are used by CDC enterics lab/PulseNet for ANI thresholds + Float ani_threshold = 85.0 + Float percent_bases_aligned_threshold = 70.0 String docker= "us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" Int cpus = 4 Int memory = 8 @@ -53,6 +55,7 @@ task animummer { ## parse out highest percentBases aligned cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" + ANI_HIGHEST_PERCENT_BASES_ALIGNED=$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt) ## parse out ANI value using highest percentBases aligned value grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT.txt @@ -72,15 +75,21 @@ task animummer { grep "${ANI_HIGHEST_PERCENT}" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH.txt echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH.txt)" - # if ANI threshold is defined by user, compare to highest ANI value and only output if threshold is surpassed - if [[ -n "~{ani_threshold}" ]]; then + # if ANI threshold or percent_bases_aligned_threshold is defined by user (they both are by default), compare to highest ANI value and corresponding percent_bases_aligned value and only output ANI_top_species_match if both thresholds are surpassed + if [[ -n "~{ani_threshold}" || -n "~{percent_bases_aligned_threshold}" ]]; then echo "Comparing user-defined ANI threshold to highest ANI value..." # compare ANI_HIGHEST_PERCENT to ani_threshold using awk if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + # else if: compare percent_bases_aligned_threshold to ANI_HIGHEST_PERCENT_BASES_ALIGNED using awk + elif ! awk "BEGIN{ exit (${ANI_HIGHEST_PERCENT_BASES_ALIGNED} < ~{percent_bases_aligned_threshold} )}"; then + echo "The highest ANI percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is less than the user-defined threshold of ~{percent_bases_aligned_threshold}" + # overwrite ANI_TOP_SPECIES_MATCH.txt when percent_bases_aligned threshold is not surpassed + echo "ANI percent bases aligned did not surpass the user-defined threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt else - echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}" + echo "The highest ANI value ${ANI_HIGHEST_PERCENT} is greater than the user-defined threshold ~{ani_threshold}" + echo "The highest percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is greater than the user-defined threshold ~{percent_bases_aligned_threshold}" fi fi else From 3e23ba7b6f0802498adb2062e7de9657b57b9591 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 6 Oct 2023 17:52:48 -0400 Subject: [PATCH 21/25] clarified messages when 2 thresholds are not passed --- tasks/quality_control/task_mummer_ani.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 2c8d87d11..630ff9d10 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -80,13 +80,13 @@ task animummer { echo "Comparing user-defined ANI threshold to highest ANI value..." # compare ANI_HIGHEST_PERCENT to ani_threshold using awk if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then - echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}" - echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt + echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined ANI threshold of ~{ani_threshold}" + echo "ANI top species match did not surpass the user-defined ANI threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt # else if: compare percent_bases_aligned_threshold to ANI_HIGHEST_PERCENT_BASES_ALIGNED using awk elif ! awk "BEGIN{ exit (${ANI_HIGHEST_PERCENT_BASES_ALIGNED} < ~{percent_bases_aligned_threshold} )}"; then echo "The highest ANI percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is less than the user-defined threshold of ~{percent_bases_aligned_threshold}" # overwrite ANI_TOP_SPECIES_MATCH.txt when percent_bases_aligned threshold is not surpassed - echo "ANI percent bases aligned did not surpass the user-defined threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt + echo "ANI top species match did not surpass the user-defined percent bases aligned threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt else echo "The highest ANI value ${ANI_HIGHEST_PERCENT} is greater than the user-defined threshold ~{ani_threshold}" echo "The highest percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is greater than the user-defined threshold ~{percent_bases_aligned_threshold}" From 1c86b20ce0e63dfbb2d936d2567b9c018ed0f5b1 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Tue, 10 Oct 2023 12:16:41 -0400 Subject: [PATCH 22/25] removed accidental duplication in code introduced during merge conflict resolution (my bad!) --- tasks/utilities/task_broad_terra_tools.wdl | 2 -- workflows/theiaprok/wf_theiaprok_fasta.wdl | 2 -- workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 -- workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 2 -- workflows/theiaprok/wf_theiaprok_ont.wdl | 2 -- 5 files changed, 10 deletions(-) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 8ee0f3613..eb819b302 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -99,7 +99,6 @@ task export_taxon_tables { String? kmerfinder_query_coverage String? kmerfinder_template_coverage String? kmerfinder_database - String? ani_docker File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -604,7 +603,6 @@ task export_taxon_tables { "kmerfinder_query_coverage": "~{kmerfinder_query_coverage}", "kmerfinder_template_coverage": "~{kmerfinder_template_coverage}", "kmerfinder_database": "~{kmerfinder_database}", - "ani_docker": "~{ani_docker}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index 65f67fdc2..c5fe43576 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -182,7 +182,6 @@ workflow theiaprok_fasta { kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, kmerfinder_database = kmerfinder.kmerfinder_database, - ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -437,7 +436,6 @@ workflow theiaprok_fasta { String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage String? kmerfinder_database = kmerfinder.kmerfinder_database - String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 15e807e8e..4ffbf5ee9 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -307,7 +307,6 @@ workflow theiaprok_illumina_pe { kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, kmerfinder_database = kmerfinder.kmerfinder_database, - ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -631,7 +630,6 @@ workflow theiaprok_illumina_pe { String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage String? kmerfinder_database = kmerfinder.kmerfinder_database - String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index 9f27ce061..365b1ed2f 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -280,7 +280,6 @@ workflow theiaprok_illumina_se { kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, kmerfinder_database = kmerfinder.kmerfinder_database, - ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -583,7 +582,6 @@ workflow theiaprok_illumina_se { String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage String? kmerfinder_database = kmerfinder.kmerfinder_database - String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 3acc68b30..593f19723 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -278,7 +278,6 @@ workflow theiaprok_ont { kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, kmerfinder_database = kmerfinder.kmerfinder_database, - ani_docker = ani.ani_docker, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -557,7 +556,6 @@ workflow theiaprok_ont { String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage String? kmerfinder_database = kmerfinder.kmerfinder_database - String? ani_mummer_docker = ani.ani_docker # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report From d25e9565d8c5ff613490b38fcdbac931721b78b4 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Tue, 10 Oct 2023 12:36:33 -0400 Subject: [PATCH 23/25] update CI for theiaprok illumina pe and se --- .../workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 7 +++---- .../workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 52c7979fc..19a51c2ae 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -574,8 +574,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl contains: ["version", "fastq_scan", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 - md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 + md5sum: b578e00dc2dd8a978718e0b67ee9330a - path: miniwdl_run/wdl/tasks/quality_control/task_ncbi_scrub.wdl contains: ["version", "scrub", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl @@ -633,9 +632,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 43ef050bde1fb8755f38e697a1794918 + md5sum: 26ac9a20c8043a28d373bfe0ca361cc6 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 6dc6c393281a19e8dcbcc15964b8e08a + md5sum: ee5c79cbc195aad75c0a39c3c2acd28e - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 2614fac622fa2035b80a7b220b1aed86 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 013ad6f4b..da8b5054f 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -544,8 +544,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl contains: ["version", "fastq_scan", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 - md5sum: 33a111c260b2fb0b6db6d9ed77ec8e87 + md5sum: b578e00dc2dd8a978718e0b67ee9330a - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl contains: ["version", "qc_check_table", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_quast.wdl @@ -601,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 1d7365ed0f29a7632568a37bfce0f1b6 + md5sum: 26ac9a20c8043a28d373bfe0ca361cc6 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: d144cb871c175029a34107a369e70c5d + md5sum: 68a10e1e76ba0d7ec4c119e4f5fc3ecd - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 2614fac622fa2035b80a7b220b1aed86 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From b500a8ac5df0443ef5f5022068779dd3358b0d91 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 20 Oct 2023 12:22:30 -0400 Subject: [PATCH 24/25] lower default ani_threshold to 80 (CDC standard) from 85 --- tasks/quality_control/task_mummer_ani.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/task_mummer_ani.wdl b/tasks/quality_control/task_mummer_ani.wdl index 630ff9d10..fc048bfb1 100644 --- a/tasks/quality_control/task_mummer_ani.wdl +++ b/tasks/quality_control/task_mummer_ani.wdl @@ -7,7 +7,7 @@ task animummer { File? ref_genome Float mash_filter = 0.9 # these 2 thresholds were set as they are used by CDC enterics lab/PulseNet for ANI thresholds - Float ani_threshold = 85.0 + Float ani_threshold = 80.0 Float percent_bases_aligned_threshold = 70.0 String docker= "us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2" Int cpus = 4 From 5a540523e046319c981062a79b9031a4c2ab09d3 Mon Sep 17 00:00:00 2001 From: kapsakcj Date: Fri, 20 Oct 2023 12:39:56 -0400 Subject: [PATCH 25/25] update CI --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 6 +++--- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 20d79901a..8a6a1e96a 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -574,7 +574,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl contains: ["version", "fastq_scan", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: b578e00dc2dd8a978718e0b67ee9330a + md5sum: d50a40d533d51834d0000971dc2c2014 - path: miniwdl_run/wdl/tasks/quality_control/task_ncbi_scrub.wdl contains: ["version", "scrub", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl @@ -624,7 +624,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl - md5sum: 9c02b95d23a602d9842fdeac883037b1 + md5sum: 3469cf6787f279ffa81fa50f6257fcd7 - path: miniwdl_run/wdl/tasks/taxon_id/task_gambit.wdl md5sum: 08987d952c67c6ff6debf6898af15f9a - path: miniwdl_run/wdl/tasks/taxon_id/task_kraken2.wdl @@ -634,7 +634,7 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 26ac9a20c8043a28d373bfe0ca361cc6 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 0c87a7279c4870a821c3dc1db9a6a94b + md5sum: ac4971ad992c3e8abee5d3817928a8f2 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 00bd2489b2a7aa5b88340a940961a857 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 35fafa245..714fb937e 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -544,7 +544,7 @@ - path: miniwdl_run/wdl/tasks/quality_control/task_fastq_scan.wdl contains: ["version", "fastq_scan", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_mummer_ani.wdl - md5sum: b578e00dc2dd8a978718e0b67ee9330a + md5sum: d50a40d533d51834d0000971dc2c2014 - path: miniwdl_run/wdl/tasks/quality_control/task_qc_check_phb.wdl contains: ["version", "qc_check_table", "output"] - path: miniwdl_run/wdl/tasks/quality_control/task_quast.wdl @@ -592,7 +592,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl md5sum: d49ae0b02e798af0636eb2721bb434b4 - path: miniwdl_run/wdl/tasks/task_versioning.wdl - md5sum: 9c02b95d23a602d9842fdeac883037b1 + md5sum: 3469cf6787f279ffa81fa50f6257fcd7 - path: miniwdl_run/wdl/tasks/taxon_id/task_gambit.wdl md5sum: 08987d952c67c6ff6debf6898af15f9a - path: miniwdl_run/wdl/tasks/taxon_id/task_kraken2.wdl @@ -602,7 +602,7 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 26ac9a20c8043a28d373bfe0ca361cc6 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: e1d9e75dae5176ceeb95b88a5d3bbba7 + md5sum: 7303247badeb82119ccef528f7367f89 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 00bd2489b2a7aa5b88340a940961a857 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl