From 76cf2d50be841a4afab8d8cd7b5693265fe3c5f6 Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 11 Sep 2023 16:55:44 +0000 Subject: [PATCH 01/24] skeleton on kmerfinder task --- tasks/taxon_id/task_kmerfinder.wdl | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tasks/taxon_id/task_kmerfinder.wdl diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl new file mode 100644 index 000000000..6cb07e0c2 --- /dev/null +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -0,0 +1,38 @@ +version 1.0 + +task kmerfinder { + input { + File assembly + File kmerfinder_db + String kmerfinder_db_name # allowed: archea, bacteria, fungi, protozoa, virus, typestrain + String samplename + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/kmerfinder:3.0.2--hdfd78af_0" + Int memory = 32 + Int cpu = 4 + String kmerfinder_args = "" + } + command <<< + # Decompress the kmerfinder database + mkdir db + tar -C ./db/ -xzvf ~{kmerfinder_db} + + # Run kmerfinder + kmerfinder.py \ + -db ./db/~{kmerfinder_db_name} \ + -i ~{assembly} \ + -o ~{samplename} \ + ~{kmerfinder_args} \ + -x + + >>> + output { + String kmerfinder_docker = docker + } + runtime { + docker: docker + memory: "~{memory} GB" + cpu: cpu + disks: "local-disk 100 SSD" + preemptible: 0 + } +} \ No newline at end of file From 9762160bb4f482c43eaa8fce3f5c8c69e5c51696 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 12 Sep 2023 09:16:46 +0000 Subject: [PATCH 02/24] remove kmerfinder_db_name --- tasks/taxon_id/task_kmerfinder.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 6cb07e0c2..8bbcd97a2 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -4,7 +4,6 @@ task kmerfinder { input { File assembly File kmerfinder_db - String kmerfinder_db_name # allowed: archea, bacteria, fungi, protozoa, virus, typestrain String samplename String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/kmerfinder:3.0.2--hdfd78af_0" Int memory = 32 @@ -18,7 +17,7 @@ task kmerfinder { # Run kmerfinder kmerfinder.py \ - -db ./db/~{kmerfinder_db_name} \ + -db ./db/ \ -i ~{assembly} \ -o ~{samplename} \ ~{kmerfinder_args} \ From 9457c555dae71dc670f695292df15a349733023f Mon Sep 17 00:00:00 2001 From: cimendes Date: Wed, 13 Sep 2023 15:25:15 +0000 Subject: [PATCH 03/24] still trying to get kmerfinder to run --- tasks/taxon_id/task_kmerfinder.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 8bbcd97a2..518ab51a9 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -17,7 +17,8 @@ task kmerfinder { # Run kmerfinder kmerfinder.py \ - -db ./db/ \ + -db ./db/* \ + -tax ./db/*/*.tax \ -i ~{assembly} \ -o ~{samplename} \ ~{kmerfinder_args} \ From 52f173b6f753561f3cefe44844de1d2971e06d6d Mon Sep 17 00:00:00 2001 From: cimendes Date: Wed, 13 Sep 2023 16:16:27 +0000 Subject: [PATCH 04/24] is this working now?!? --- tasks/taxon_id/task_kmerfinder.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 518ab51a9..198f568eb 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -1,9 +1,9 @@ version 1.0 -task kmerfinder { +task kmerfinder_bacteria { input { File assembly - File kmerfinder_db + File kmerfinder_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/kmerfinder_bacteria.tar.gz" String samplename String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/kmerfinder:3.0.2--hdfd78af_0" Int memory = 32 @@ -17,8 +17,8 @@ task kmerfinder { # Run kmerfinder kmerfinder.py \ - -db ./db/* \ - -tax ./db/*/*.tax \ + -db ./db/bacteria/bacteria.ATG \ + -tax ./db/bacteria/bacteria.tax \ -i ~{assembly} \ -o ~{samplename} \ ~{kmerfinder_args} \ From 9fca4710335cbbaa0494e02c7cc901ad2763af6f Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 14 Sep 2023 10:04:41 +0000 Subject: [PATCH 05/24] first working version of kmerfinder task --- tasks/taxon_id/task_kmerfinder.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 198f568eb..0ffc972f9 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -11,7 +11,7 @@ task kmerfinder_bacteria { String kmerfinder_args = "" } command <<< - # Decompress the kmerfinder database + # Decompress the kmerfinder bacterial qdatabase mkdir db tar -C ./db/ -xzvf ~{kmerfinder_db} @@ -21,12 +21,15 @@ task kmerfinder_bacteria { -tax ./db/bacteria/bacteria.tax \ -i ~{assembly} \ -o ~{samplename} \ - ~{kmerfinder_args} \ - -x + ~{kmerfinder_args} + mv ~{samplename}/results.txt ~{samplename}_kmerfinder.txt + + # extract the top result >>> output { String kmerfinder_docker = docker + File kmerfinder_txt = "~{samplename}_kmerfinder.txt" } runtime { docker: docker From 36b43ddc1ee7914687c0749bfac6a5ec87e2b3ab Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 14 Sep 2023 10:36:02 +0000 Subject: [PATCH 06/24] parse top hit --- tasks/taxon_id/task_kmerfinder.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 0ffc972f9..7a66fb708 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -23,6 +23,17 @@ task kmerfinder_bacteria { -o ~{samplename} \ ~{kmerfinder_args} + # parse outputs + if [ ! -f ~{samplename}/results.txt ]; then + PF="No hit detected in database" + else + PF="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 19)" + if [ "$PF" == "" ]; then + PF="No hit detected in database" + fi + fi + echo $PF | tee TOP_HIT + mv ~{samplename}/results.txt ~{samplename}_kmerfinder.txt # extract the top result @@ -30,6 +41,7 @@ task kmerfinder_bacteria { output { String kmerfinder_docker = docker File kmerfinder_txt = "~{samplename}_kmerfinder.txt" + String kmerfinder_top_hit = read_string("TOP_HIT") } runtime { docker: docker From 9f18b7d895f3268d6a61e792eeff39bce0ab85dd Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 14 Sep 2023 10:45:42 +0000 Subject: [PATCH 07/24] make results output file optional --- tasks/taxon_id/task_kmerfinder.wdl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 7a66fb708..bf74bbcb9 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -30,17 +30,14 @@ task kmerfinder_bacteria { PF="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 19)" if [ "$PF" == "" ]; then PF="No hit detected in database" - fi + fi + mv ~{samplename}/results.txt ~{samplename}_kmerfinder.txt fi echo $PF | tee TOP_HIT - - mv ~{samplename}/results.txt ~{samplename}_kmerfinder.txt - - # extract the top result >>> output { String kmerfinder_docker = docker - File kmerfinder_txt = "~{samplename}_kmerfinder.txt" + File? kmerfinder_txt = "~{samplename}_kmerfinder.txt" String kmerfinder_top_hit = read_string("TOP_HIT") } runtime { From 98ba2c628cfc3ab2f729739eb68ba072cd593f60 Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 14 Sep 2023 13:53:35 +0000 Subject: [PATCH 08/24] final version of task (for now..) --- tasks/taxon_id/task_kmerfinder.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index bf74bbcb9..332ef8edd 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -3,11 +3,12 @@ version 1.0 task kmerfinder_bacteria { input { File assembly - File kmerfinder_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/kmerfinder_bacteria.tar.gz" String samplename + File kmerfinder_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/kmerfinder_bacteria.tar.gz" String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/kmerfinder:3.0.2--hdfd78af_0" Int memory = 32 Int cpu = 4 + Int disk_size = 100 String kmerfinder_args = "" } command <<< @@ -31,20 +32,20 @@ task kmerfinder_bacteria { if [ "$PF" == "" ]; then PF="No hit detected in database" fi - mv ~{samplename}/results.txt ~{samplename}_kmerfinder.txt + mv -v ~{samplename}/results.txt ~{samplename}_kmerfinder.tsv fi echo $PF | tee TOP_HIT >>> output { String kmerfinder_docker = docker - File? kmerfinder_txt = "~{samplename}_kmerfinder.txt" + File? kmerfinder_results_tsv = "~{samplename}_kmerfinder.tsv" String kmerfinder_top_hit = read_string("TOP_HIT") } runtime { docker: docker memory: "~{memory} GB" cpu: cpu - disks: "local-disk 100 SSD" + disks: "local-disk ~{disk_size} SSD" preemptible: 0 } } \ No newline at end of file From d14549a92b005c13a64c2a959d797d4020b63f3e Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 14 Sep 2023 14:10:48 +0000 Subject: [PATCH 09/24] add kmerfinder_bacteria to theiaprok! --- tasks/utilities/task_broad_terra_tools.wdl | 6 ++++++ workflows/theiaprok/wf_theiaprok_fasta.wdl | 16 ++++++++++++++++ workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 16 ++++++++++++++++ workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 16 ++++++++++++++++ workflows/theiaprok/wf_theiaprok_ont.wdl | 16 ++++++++++++++++ 5 files changed, 70 insertions(+) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index b585d73f2..f8715322b 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -92,6 +92,9 @@ task export_taxon_tables { File? ani_output_tsv String? ani_top_species_match String? ani_mummer_version + String? kmerfinder_docker + File? kmerfinder_results_tsv + String? kmerfinder_top_hit File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -587,6 +590,9 @@ task export_taxon_tables { "ani_output_tsv": "~{ani_output_tsv}", "ani_top_species_match": "~{ani_top_species_match}", "ani_mummer_version": "~{ani_mummer_version}", + "kmerfinder_docker": "~{kmerfinder_docker}", + "kmerfinder_results_tsv": "~{kmerfinder_results_tsv}", + "kmerfinder_top_hit": "~{kmerfinder_top_hit}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index cf4f7df7b..82f2d0f8a 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -5,6 +5,7 @@ import "../../tasks/quality_control/task_quast.wdl" as quast_task import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -35,6 +36,7 @@ workflow theiaprok_fasta { String terra_workspace="NA" # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species @@ -66,6 +68,13 @@ workflow theiaprok_fasta { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = assembly_fasta, + samplename = samplename + } + } call amrfinderplus.amrfinderplus_nuc as amrfinderplus_task { input: assembly = assembly_fasta, @@ -166,6 +175,9 @@ workflow theiaprok_fasta { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -411,6 +423,10 @@ workflow theiaprok_fasta { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index e960a7eb1..51af2f0e8 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -9,6 +9,7 @@ import "../../tasks/quality_control/task_screen.wdl" as screen import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -53,6 +54,7 @@ workflow theiaprok_illumina_pe { Int trim_window_size = 10 # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species # qc check parameters @@ -142,6 +144,13 @@ workflow theiaprok_illumina_pe { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = shovill_pe.assembly_fasta, + samplename = samplename + } + } call amrfinderplus.amrfinderplus_nuc as amrfinderplus_task { input: assembly = shovill_pe.assembly_fasta, @@ -291,6 +300,9 @@ workflow theiaprok_illumina_pe { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -605,6 +617,10 @@ workflow theiaprok_illumina_pe { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index 4f2025bd6..cf93aec8d 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -9,6 +9,7 @@ import "../../tasks/quality_control/task_screen.wdl" as screen import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -52,6 +53,7 @@ workflow theiaprok_illumina_se { Int trim_window_size = 4 # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species @@ -135,6 +137,13 @@ workflow theiaprok_illumina_se { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = shovill_se.assembly_fasta, + samplename = samplename + } + } call amrfinderplus.amrfinderplus_nuc as amrfinderplus_task { input: assembly = shovill_se.assembly_fasta, @@ -264,6 +273,9 @@ workflow theiaprok_illumina_se { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -557,6 +569,10 @@ workflow theiaprok_illumina_se { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 06628be0a..e8421283c 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -9,6 +9,7 @@ import "../../tasks/quality_control/task_screen.wdl" as screen_task import "../../tasks/quality_control/task_busco.wdl" as busco_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/quality_control/task_mummer_ani.wdl" as ani_task +import "../../tasks/taxon_id/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus_task import "../../tasks/gene_typing/task_resfinder.wdl" as resfinder_task import "../../tasks/species_typing/task_ts_mlst.wdl" as ts_mlst_task @@ -48,6 +49,7 @@ workflow theiaprok_ont { Int min_coverage = 5 # reduced from 10 because some institutions sequence at lower depth because of longer read length # module options Boolean call_ani = false # by default do not call ANI task, but user has ability to enable this task if working with enteric pathogens or supply their own high-quality reference genome + Boolean call_kmerfinder = false Boolean call_resfinder = false String genome_annotation = "prokka" # options: "prokka" or "bakta" String? expected_taxon # allow user to provide organism (e.g. "Clostridioides_difficile") string to amrfinder. Useful when gambit does not predict the correct species @@ -131,6 +133,13 @@ workflow theiaprok_ont { samplename = samplename } } + if (call_kmerfinder) { + call kmerfinder_task.kmerfinder_bacteria as kmerfinder { + input: + assembly = dragonflye.assembly_fasta, + samplename = samplename + } + } call amrfinderplus_task.amrfinderplus_nuc as amrfinderplus { input: assembly = dragonflye.assembly_fasta, @@ -262,6 +271,9 @@ workflow theiaprok_ont { ani_output_tsv = ani.ani_output_tsv, ani_top_species_match = ani.ani_top_species_match, ani_mummer_version = ani.ani_mummer_version, + kmerfinder_docker = kmerfinder.kmerfinder_docker, + kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, + kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -531,6 +543,10 @@ workflow theiaprok_ont { File? ani_output_tsv = ani.ani_output_tsv String? ani_top_species_match = ani.ani_top_species_match String? ani_mummer_version = ani.ani_mummer_version + # kmerfinder outputs + String? kmerfinder_docker = kmerfinder.kmerfinder_docker + File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv + String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report From c55ca1012252cd68870033b3d46f018776905d7c Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 18 Sep 2023 15:56:39 +0000 Subject: [PATCH 10/24] fix typo --- tasks/taxon_id/task_kmerfinder.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 332ef8edd..5e9c14f0e 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -12,7 +12,7 @@ task kmerfinder_bacteria { String kmerfinder_args = "" } command <<< - # Decompress the kmerfinder bacterial qdatabase + # Decompress the kmerfinder bacterial database mkdir db tar -C ./db/ -xzvf ~{kmerfinder_db} From c834b8985dc61bf5e1c8977c04a18a58b5a50c84 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 08:15:26 +0000 Subject: [PATCH 11/24] pass empty file for tests - kmerfinder_db --- tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json | 4 +++- tests/inputs/theiaprok/wf_theiaprok_illumina_se.json | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json index c5e3444a7..143c2737a 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json @@ -3,6 +3,7 @@ "theiaprok_illumina_pe.read1_raw": "bactopia-tests/data/species/portiera/illumina/SRR2838702_R1.fastq.gz", "theiaprok_illumina_pe.read2_raw": "bactopia-tests/data/species/portiera/illumina/SRR2838702_R2.fastq.gz", "theiaprok_illumina_pe.skip_screen": true, + "theiaprok_illumina_pe.call_kmerfinder": false, "theiaprok_illumina_pe.read_QC_trim.read_processing": "trimmomatic", "theiaprok_illumina_pe.read_QC_trim.call_midas": false, "theiaprok_illumina_pe.read_QC_trim.midas.midas_db" : "./tests/inputs/empty-for-test.txt", @@ -36,5 +37,6 @@ "theiaprok_illumina_pe.merlin_magic.cladetyper.ref_clade5_annotated" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_pe.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_pe.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_pe.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_pe.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", + "theiaprok_illumina_pe.gambit.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json index 126792e6d..c0aacc2bd 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json @@ -2,6 +2,7 @@ "theiaprok_illumina_se.samplename": "test", "theiaprok_illumina_se.read1_raw": "bactopia-tests/data/species/portiera/illumina/SRR2838702_R1.fastq.gz", "theiaprok_illumina_se.skip_screen": true, + "theiaprok_illumina_pe.call_kmerfinder": false, "theiaprok_illumina_se.read_QC_trim.read_processing": "trimmomatic", "theiaprok_illumina_se.read_QC_trim.call_midas": false, "theiaprok_illumina_se.read_QC_trim.midas.midas_db" : "./tests/inputs/empty-for-test.txt", @@ -35,5 +36,6 @@ "theiaprok_illumina_se.merlin_magic.cladetyper.ref_clade5_annotated" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_se.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_se.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_se.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_se.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", + "theiaprok_illumina_pe.gambit.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } From 2bdc5eea36a18013b00600e4a1c9607478ae8472 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 08:31:20 +0000 Subject: [PATCH 12/24] fix input json --- tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json | 2 +- tests/inputs/theiaprok/wf_theiaprok_illumina_se.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json index 143c2737a..e66701e50 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json @@ -38,5 +38,5 @@ "theiaprok_illumina_pe.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_pe.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", "theiaprok_illumina_pe.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_pe.gambit.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_pe.kmerfinder.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json index c0aacc2bd..fbaface2e 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json @@ -37,5 +37,5 @@ "theiaprok_illumina_se.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_se.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", "theiaprok_illumina_se.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_pe.gambit.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_pe.kmerfinder.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } From 97842168fe5f5c21011f819434d59dd4776ebd06 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 08:37:40 +0000 Subject: [PATCH 13/24] add kmerfinder query coverage to theiaprok SE, PE, FASTA and ONT --- tasks/taxon_id/task_kmerfinder.wdl | 5 +++++ tasks/utilities/task_broad_terra_tools.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_fasta.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_ont.wdl | 2 ++ 6 files changed, 15 insertions(+) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 5e9c14f0e..1b0265d37 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -27,19 +27,24 @@ task kmerfinder_bacteria { # parse outputs if [ ! -f ~{samplename}/results.txt ]; then PF="No hit detected in database" + QC="No hit detected in database" else PF="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 19)" + QC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 6)" if [ "$PF" == "" ]; then PF="No hit detected in database" + QC="No hit detected in database" fi mv -v ~{samplename}/results.txt ~{samplename}_kmerfinder.tsv fi echo $PF | tee TOP_HIT + echo $QC | tee QC_METRIC >>> output { String kmerfinder_docker = docker File? kmerfinder_results_tsv = "~{samplename}_kmerfinder.tsv" String kmerfinder_top_hit = read_string("TOP_HIT") + String kmerfinder_query_coverage = read_string("QC_METRIC") } runtime { docker: docker diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index f8715322b..711ebcdf7 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -95,6 +95,7 @@ task export_taxon_tables { String? kmerfinder_docker File? kmerfinder_results_tsv String? kmerfinder_top_hit + String? kmerfinder_query_coverage File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -593,6 +594,7 @@ task export_taxon_tables { "kmerfinder_docker": "~{kmerfinder_docker}", "kmerfinder_results_tsv": "~{kmerfinder_results_tsv}", "kmerfinder_top_hit": "~{kmerfinder_top_hit}", + "kmerfinder_query_coverage": "~{kmerfinder_query_coverage}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index 82f2d0f8a..db3682394 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -178,6 +178,7 @@ workflow theiaprok_fasta { kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -427,6 +428,7 @@ workflow theiaprok_fasta { String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 51af2f0e8..4f56f085a 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -303,6 +303,7 @@ workflow theiaprok_illumina_pe { kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -621,6 +622,7 @@ workflow theiaprok_illumina_pe { String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index cf93aec8d..599b902c5 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -276,6 +276,7 @@ workflow theiaprok_illumina_se { kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -573,6 +574,7 @@ workflow theiaprok_illumina_se { String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index e8421283c..ed96dcedc 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -274,6 +274,7 @@ workflow theiaprok_ont { kmerfinder_docker = kmerfinder.kmerfinder_docker, kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, + kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -547,6 +548,7 @@ workflow theiaprok_ont { String? kmerfinder_docker = kmerfinder.kmerfinder_docker File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit + String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report From 3381183b8ef87f02009523646ff3fffd1bcb8e23 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 08:38:30 +0000 Subject: [PATCH 14/24] remove undeeded skip on kmerfinder (set to false by default) --- tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json | 1 - tests/inputs/theiaprok/wf_theiaprok_illumina_se.json | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json index e66701e50..3dd186232 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_pe.json @@ -3,7 +3,6 @@ "theiaprok_illumina_pe.read1_raw": "bactopia-tests/data/species/portiera/illumina/SRR2838702_R1.fastq.gz", "theiaprok_illumina_pe.read2_raw": "bactopia-tests/data/species/portiera/illumina/SRR2838702_R2.fastq.gz", "theiaprok_illumina_pe.skip_screen": true, - "theiaprok_illumina_pe.call_kmerfinder": false, "theiaprok_illumina_pe.read_QC_trim.read_processing": "trimmomatic", "theiaprok_illumina_pe.read_QC_trim.call_midas": false, "theiaprok_illumina_pe.read_QC_trim.midas.midas_db" : "./tests/inputs/empty-for-test.txt", diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json index fbaface2e..1eb3f13d6 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json @@ -2,7 +2,6 @@ "theiaprok_illumina_se.samplename": "test", "theiaprok_illumina_se.read1_raw": "bactopia-tests/data/species/portiera/illumina/SRR2838702_R1.fastq.gz", "theiaprok_illumina_se.skip_screen": true, - "theiaprok_illumina_pe.call_kmerfinder": false, "theiaprok_illumina_se.read_QC_trim.read_processing": "trimmomatic", "theiaprok_illumina_se.read_QC_trim.call_midas": false, "theiaprok_illumina_se.read_QC_trim.midas.midas_db" : "./tests/inputs/empty-for-test.txt", From e2dfb57dc527cb81f7af6f16d4b0145422328734 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 08:45:25 +0000 Subject: [PATCH 15/24] fix ops --- tests/inputs/theiaprok/wf_theiaprok_illumina_se.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json index 1eb3f13d6..9e0c42ed7 100644 --- a/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json +++ b/tests/inputs/theiaprok/wf_theiaprok_illumina_se.json @@ -36,5 +36,5 @@ "theiaprok_illumina_se.bakta.bakta_db" : "./tests/inputs/empty-for-test.txt", "theiaprok_illumina_se.gambit.gambit_db_signatures" : "./tests/inputs/completely-empty-for-test.txt", "theiaprok_illumina_se.gambit.gambit_db_genomes" : "./tests/inputs/completely-empty-for-test.txt", - "theiaprok_illumina_pe.kmerfinder.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" + "theiaprok_illumina_se.kmerfinder.kmerfinder_db" : "./tests/inputs/completely-empty-for-test.txt" } From b6dd70969fff98cbd15a53be76b328c6a8e920b0 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 09:01:45 +0000 Subject: [PATCH 16/24] update md5sum (part1) --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 4 ++-- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 5fa38e056..ae62062bb 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -632,9 +632,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 0236363c7f0694cd3f96416aa43e2f91 + md5sum: 3c45d7016af73f22e16984c0e6693662 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: a76d59109075ce8b861e63ffe70d7c77 + md5sum: 43b2512c04abd5bd2907c2e9f59666dd - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index dcb4661b5..15a6a8ef3 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -600,7 +600,7 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 0236363c7f0694cd3f96416aa43e2f91 + md5sum: 3c45d7016af73f22e16984c0e6693662 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 858d33eb64b9bda618a47a999f370df5 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl From be9a3e0828b5a49b2a097f98f81416fde85f8791 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 09:17:07 +0000 Subject: [PATCH 17/24] update md5sum (part2) --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 15a6a8ef3..145eb3705 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -602,7 +602,7 @@ - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl md5sum: 3c45d7016af73f22e16984c0e6693662 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 858d33eb64b9bda618a47a999f370df5 + md5sum: 97d84dae28752f6ff49307ae6962b7f2 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From 38fbef075097dc4eb408bf515e676447bcee72b8 Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 17:03:21 +0000 Subject: [PATCH 18/24] fiz sister typo --- tasks/utilities/task_broad_terra_tools.wdl | 4 ++-- workflows/theiaprok/wf_theiaprok_fasta.wdl | 4 ++-- workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 4 ++-- workflows/theiaprok/wf_theiaprok_ont.wdl | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 711ebcdf7..01bd5095c 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -172,7 +172,7 @@ task export_taxon_tables { String? lissero_serotype File? sistr_results File? sistr_allele_json - File? sister_allele_fasta + File? sistr_allele_fasta File? sistr_cgmlst String? sistr_version String? sistr_predicted_serotype @@ -497,7 +497,7 @@ task export_taxon_tables { "lissero_serotype": "~{lissero_serotype}", "sistr_results": "~{sistr_results}", "sistr_allele_json": "~{sistr_allele_json}", - "sister_allele_fasta": "~{sister_allele_fasta}", + "sistr_allele_fasta": "~{sistr_allele_fasta}", "sistr_cgmlst": "~{sistr_cgmlst}", "sistr_version": "~{sistr_version}", "sistr_predicted_serotype": "~{sistr_predicted_serotype}", diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index db3682394..d4ddd54af 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -255,7 +255,7 @@ workflow theiaprok_fasta { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -515,7 +515,7 @@ workflow theiaprok_fasta { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 4f56f085a..c917d6ed2 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -380,7 +380,7 @@ workflow theiaprok_illumina_pe { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -737,7 +737,7 @@ workflow theiaprok_illumina_pe { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index ed96dcedc..043d6a98e 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -341,7 +341,7 @@ workflow theiaprok_ont { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -651,7 +651,7 @@ workflow theiaprok_ont { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype From 5c7b44e4af4d89862d8e6617cf6c77ecb4a6b3cb Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 17:06:52 +0000 Subject: [PATCH 19/24] missed a file --- workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index 599b902c5..b4822d18d 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -353,7 +353,7 @@ workflow theiaprok_illumina_se { lissero_serotype = merlin_magic.lissero_serotype, sistr_results = merlin_magic.sistr_results, sistr_allele_json = merlin_magic.sistr_allele_json, - sister_allele_fasta = merlin_magic.sistr_allele_fasta, + sistr_allele_fasta = merlin_magic.sistr_allele_fasta, sistr_cgmlst = merlin_magic.sistr_cgmlst, sistr_version = merlin_magic.sistr_version, sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype, @@ -689,7 +689,7 @@ workflow theiaprok_illumina_se { # Salmonella Typing File? sistr_results = merlin_magic.sistr_results File? sistr_allele_json = merlin_magic.sistr_allele_json - File? sister_allele_fasta = merlin_magic.sistr_allele_fasta + File? sistr_allele_fasta = merlin_magic.sistr_allele_fasta File? sistr_cgmlst = merlin_magic.sistr_cgmlst String? sistr_version = merlin_magic.sistr_version String? sistr_predicted_serotype = merlin_magic.sistr_predicted_serotype From 53d0683248ee382cb82d33644fb10f0d8178cbeb Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 19 Sep 2023 17:26:25 +0000 Subject: [PATCH 20/24] update CI --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 4 ++-- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index ae62062bb..0a21761c7 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -632,9 +632,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 3c45d7016af73f22e16984c0e6693662 + md5sum: 49b238cfa9148f6b3e05b11287489277 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 43b2512c04abd5bd2907c2e9f59666dd + md5sum: 1e243db7cc93ab641e46c369f9b512c4 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 145eb3705..8fe6c9926 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 3c45d7016af73f22e16984c0e6693662 + md5sum: 49b238cfa9148f6b3e05b11287489277 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 97d84dae28752f6ff49307ae6962b7f2 + md5sum: 764fa10ded5db9358d683d513390c934 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: 53555c2f3e144e55f362080c5e75e434 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From 05c2f960e7d9b68a622c2fabb8aef0a565c9b69e Mon Sep 17 00:00:00 2001 From: cimendes Date: Fri, 22 Sep 2023 11:39:16 +0000 Subject: [PATCH 21/24] add kmerfinder_template_coverage to theiaprok output --- tasks/taxon_id/task_kmerfinder.wdl | 5 +++++ tasks/utilities/task_broad_terra_tools.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_fasta.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_ont.wdl | 2 ++ 6 files changed, 15 insertions(+) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 1b0265d37..827025b49 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -28,23 +28,28 @@ task kmerfinder_bacteria { if [ ! -f ~{samplename}/results.txt ]; then PF="No hit detected in database" QC="No hit detected in database" + TC="No hit detected in database" else PF="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 19)" QC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 6)" + TC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 7)" if [ "$PF" == "" ]; then PF="No hit detected in database" QC="No hit detected in database" + TC="No hit detected in database" fi mv -v ~{samplename}/results.txt ~{samplename}_kmerfinder.tsv fi echo $PF | tee TOP_HIT echo $QC | tee QC_METRIC + echo $TC | tee TEMPLATE_COVERAGE >>> output { String kmerfinder_docker = docker File? kmerfinder_results_tsv = "~{samplename}_kmerfinder.tsv" String kmerfinder_top_hit = read_string("TOP_HIT") String kmerfinder_query_coverage = read_string("QC_METRIC") + String kmerfinder_template_coverage = read_string("TEMPLATE_COVERAGE") } runtime { docker: docker diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 01bd5095c..50e3c23e7 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -96,6 +96,7 @@ task export_taxon_tables { File? kmerfinder_results_tsv String? kmerfinder_top_hit String? kmerfinder_query_coverage + String? kmerfinder_template_coverage File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -595,6 +596,7 @@ task export_taxon_tables { "kmerfinder_results_tsv": "~{kmerfinder_results_tsv}", "kmerfinder_top_hit": "~{kmerfinder_top_hit}", "kmerfinder_query_coverage": "~{kmerfinder_query_coverage}", + "kmerfinder_template_coverage": "~{kmerfinder_template_coverage}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index d4ddd54af..185e158b7 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -179,6 +179,7 @@ workflow theiaprok_fasta { kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -429,6 +430,7 @@ workflow theiaprok_fasta { File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index c917d6ed2..e347ea403 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -304,6 +304,7 @@ workflow theiaprok_illumina_pe { kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -623,6 +624,7 @@ workflow theiaprok_illumina_pe { File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index b4822d18d..de1f82abe 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -277,6 +277,7 @@ workflow theiaprok_illumina_se { kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -575,6 +576,7 @@ workflow theiaprok_illumina_se { File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 043d6a98e..d9c5527d3 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -275,6 +275,7 @@ workflow theiaprok_ont { kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv, kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, + kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -549,6 +550,7 @@ workflow theiaprok_ont { File? kmerfinder_results_tsv = kmerfinder.kmerfinder_results_tsv String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage + String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report From 65ed7cfd23c8fca487e0b2db19a0afbb31f61d7d Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 28 Sep 2023 10:01:54 +0000 Subject: [PATCH 22/24] expose database name on kmerfinder outputs --- tasks/taxon_id/task_kmerfinder.wdl | 7 ++++++- tasks/utilities/task_broad_terra_tools.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_fasta.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_pe.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_illumina_se.wdl | 2 ++ workflows/theiaprok/wf_theiaprok_ont.wdl | 2 ++ 6 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 827025b49..75df08df0 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -4,7 +4,7 @@ task kmerfinder_bacteria { input { File assembly String samplename - File kmerfinder_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/kmerfinder_bacteria.tar.gz" + File kmerfinder_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/kmerfinder_bacteria_20230911.tar.gz" String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/kmerfinder:3.0.2--hdfd78af_0" Int memory = 32 Int cpu = 4 @@ -43,6 +43,10 @@ task kmerfinder_bacteria { echo $PF | tee TOP_HIT echo $QC | tee QC_METRIC echo $TC | tee TEMPLATE_COVERAGE + + # extract database name + DB=$(basename ~{kmerfinder_db} | sed 's/\.tar\.gz$//') + echo $DB | tee DATABASE >>> output { String kmerfinder_docker = docker @@ -50,6 +54,7 @@ task kmerfinder_bacteria { String kmerfinder_top_hit = read_string("TOP_HIT") String kmerfinder_query_coverage = read_string("QC_METRIC") String kmerfinder_template_coverage = read_string("TEMPLATE_COVERAGE") + String kmerfinder_database = read_string("DATABASE") } runtime { docker: docker diff --git a/tasks/utilities/task_broad_terra_tools.wdl b/tasks/utilities/task_broad_terra_tools.wdl index 50e3c23e7..0f430a08e 100644 --- a/tasks/utilities/task_broad_terra_tools.wdl +++ b/tasks/utilities/task_broad_terra_tools.wdl @@ -97,6 +97,7 @@ task export_taxon_tables { String? kmerfinder_top_hit String? kmerfinder_query_coverage String? kmerfinder_template_coverage + String? kmerfinder_database File? amrfinderplus_all_report File? amrfinderplus_amr_report File? amrfinderplus_stress_report @@ -597,6 +598,7 @@ task export_taxon_tables { "kmerfinder_top_hit": "~{kmerfinder_top_hit}", "kmerfinder_query_coverage": "~{kmerfinder_query_coverage}", "kmerfinder_template_coverage": "~{kmerfinder_template_coverage}", + "kmerfinder_database": "~{kmerfinder_database}", "resfinder_pheno_table": "~{resfinder_pheno_table}", "resfinder_pheno_table_species": "~{resfinder_pheno_table_species}", "resfinder_seqs": "~{resfinder_seqs}", diff --git a/workflows/theiaprok/wf_theiaprok_fasta.wdl b/workflows/theiaprok/wf_theiaprok_fasta.wdl index 185e158b7..e26894185 100644 --- a/workflows/theiaprok/wf_theiaprok_fasta.wdl +++ b/workflows/theiaprok/wf_theiaprok_fasta.wdl @@ -180,6 +180,7 @@ workflow theiaprok_fasta { kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -431,6 +432,7 @@ workflow theiaprok_fasta { String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index e347ea403..9565bf6e3 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -305,6 +305,7 @@ workflow theiaprok_illumina_pe { kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -625,6 +626,7 @@ workflow theiaprok_illumina_pe { String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index de1f82abe..d806c6ea4 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -278,6 +278,7 @@ workflow theiaprok_illumina_se { kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus_task.amrfinderplus_stress_report, @@ -577,6 +578,7 @@ workflow theiaprok_illumina_se { String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus_task.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus_task.amrfinderplus_amr_report diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index d9c5527d3..b81560080 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -276,6 +276,7 @@ workflow theiaprok_ont { kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit, kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage, kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage, + kmerfinder_database = kmerfinder.kmerfinder_database, amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report, amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report, amrfinderplus_stress_report = amrfinderplus.amrfinderplus_stress_report, @@ -551,6 +552,7 @@ workflow theiaprok_ont { String? kmerfinder_top_hit = kmerfinder.kmerfinder_top_hit String? kmerfinder_query_coverage = kmerfinder.kmerfinder_query_coverage String? kmerfinder_template_coverage = kmerfinder.kmerfinder_template_coverage + String? kmerfinder_database = kmerfinder.kmerfinder_database # NCBI-AMRFinderPlus Outputs File? amrfinderplus_all_report = amrfinderplus.amrfinderplus_all_report File? amrfinderplus_amr_report = amrfinderplus.amrfinderplus_amr_report From 3c4bbfabd561f85409f78271a06d225fae9897d5 Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 28 Sep 2023 10:23:46 +0000 Subject: [PATCH 23/24] update md5sum --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 4 ++-- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 7883ece6c..198209aa4 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -632,9 +632,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 49b238cfa9148f6b3e05b11287489277 + md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: 1e243db7cc93ab641e46c369f9b512c4 + md5sum: 6dc6c393281a19e8dcbcc15964b8e08a - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: c4861a59d49b13b67706631a0e1246c4 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 31e42ae47..9aebb81ba 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -600,9 +600,9 @@ - path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl md5sum: 024971d1439dff7d59c0a26a824bd2c6 - path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl - md5sum: 49b238cfa9148f6b3e05b11287489277 + md5sum: 43ef050bde1fb8755f38e697a1794918 - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 764fa10ded5db9358d683d513390c934 + md5sum: 24bfd35867f4ae864364e24195bf7f6f - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl md5sum: c4861a59d49b13b67706631a0e1246c4 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl From da98fa63652c59e1f23a0d00ecee6df57f586f84 Mon Sep 17 00:00:00 2001 From: cimendes Date: Mon, 2 Oct 2023 11:10:32 +0000 Subject: [PATCH 24/24] fix bug - headers being outputed to datatable --- tasks/taxon_id/task_kmerfinder.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/taxon_id/task_kmerfinder.wdl b/tasks/taxon_id/task_kmerfinder.wdl index 75df08df0..61b3bed78 100644 --- a/tasks/taxon_id/task_kmerfinder.wdl +++ b/tasks/taxon_id/task_kmerfinder.wdl @@ -33,7 +33,8 @@ task kmerfinder_bacteria { PF="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 19)" QC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 6)" TC="$(cat ~{samplename}/results.txt | head -n 2 | tail -n 1 | cut -f 7)" - if [ "$PF" == "" ]; then + # String is empty or just contains the header + if [ "$PF" == "" ] || [ "$PF" == "Species" ]; then PF="No hit detected in database" QC="No hit detected in database" TC="No hit detected in database"