diff --git a/tasks/utilities/task_kmc.wdl b/tasks/utilities/task_kmc.wdl index a8f32be4e..9307acad6 100644 --- a/tasks/utilities/task_kmc.wdl +++ b/tasks/utilities/task_kmc.wdl @@ -17,6 +17,9 @@ task kmc { command <<< kmc | head -n 1 | tee VERSION + # initialize kmc output + echo 0 > UNIQUE_COUNTED + # run kmc # kmc [options] # -sm - uses strict memory mode (memory from -m switch will not be exceeded) @@ -38,13 +41,13 @@ task kmc { # kmc_outputs is a mess of files that are not human readable # however, the stdout does produce some useful stats. # the no. of unique counted k-mers can be used as an estimate of genome size - grep "unique counted k" LOG | tr -s ' ' | cut -d ' ' -f8 > UNIQUE_COUNTED + grep "unique counted k" LOG | tr -s ' ' | cut -d ' ' -f8 >> UNIQUE_COUNTED # extracting only the kmer statistics and writing to file: tail -n8 LOG > ~{samplename}_kmer_stats.txt >>> output { - String est_genome_size = read_string("UNIQUE_COUNTED") + Int est_genome_size = read_int("UNIQUE_COUNTED") File kmer_stats = "~{samplename}_kmer_stats.txt" String kmc_version = read_string("VERSION") } diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index 06628be0a..6057ddce0 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -489,7 +489,7 @@ workflow theiaprok_ont { String? nanoplot_version = nanoplot_raw.nanoplot_version String? nanoplot_docker = nanoplot_raw.nanoplot_docker # Read QC - kmc outputs - String? kmc_est_genome_size = read_qc_trim.est_genome_size + Int? kmc_est_genome_size = read_qc_trim.est_genome_size File? kmc_kmer_stats = read_qc_trim.kmc_kmer_stats String? kmc_version = read_qc_trim.kmc_version # Read QC - rasusa outputs diff --git a/workflows/utilities/wf_read_QC_trim_ont.wdl b/workflows/utilities/wf_read_QC_trim_ont.wdl index 779d479c0..9bbbf1d36 100644 --- a/workflows/utilities/wf_read_QC_trim_ont.wdl +++ b/workflows/utilities/wf_read_QC_trim_ont.wdl @@ -27,12 +27,6 @@ workflow read_QC_trim_ont { # kraken inputs String? target_org } - # kmc for genome size estimation - call kmc_task.kmc { - input: - read1 = read1, - samplename = samplename - } if ("~{workflow_series}" == "theiacov") { call ncbi_scrub.ncbi_scrub_se { input: @@ -61,6 +55,12 @@ workflow read_QC_trim_ont { } } if ("~{workflow_series}" == "theiaprok") { + # kmc for genome size estimation + call kmc_task.kmc { + input: + read1 = read1, + samplename = samplename + } # rasusa for random downsampling call rasusa_task.rasusa { input: @@ -83,11 +83,6 @@ workflow read_QC_trim_ont { } } output { - # kmc outputs - String est_genome_size = kmc.est_genome_size - File kmc_kmer_stats = kmc.kmer_stats - String kmc_version = kmc.kmc_version - # theiacov outputs # ncbi scrub outputs File? read1_dehosted = ncbi_scrub_se.read1_dehosted @@ -109,6 +104,11 @@ workflow read_QC_trim_ont { File? kraken_report_dehosted = kraken2_dehosted.kraken_report # theiaprok outputs + # kmc outputs + Int? est_genome_size = kmc.est_genome_size + File? kmc_kmer_stats = kmc.kmer_stats + String? kmc_version = kmc.kmc_version + # nanoq outputs File read1_clean = select_first([nanoq.filtered_read1, read_filtering.filtered_reads]) String? nanoq_version = nanoq.version