theiagen · sage-wright · Sep 22, 2023 · Sep 20, 2023
@@ -17,6 +17,9 @@ task kmc {
   command <<<
     kmc | head -n 1 | tee VERSION
 
+    # initialize kmc output
+    echo 0 > UNIQUE_COUNTED
+
     # run kmc
     # kmc [options] <input_file> <output_file> <working_dir>
     # -sm - uses strict memory mode (memory from -m<size> switch will not be exceeded)
@@ -38,13 +41,13 @@ task kmc {
     # kmc_outputs is a mess of files that are not human readable
     # however, the stdout does produce some useful stats. 
     #  the no. of unique counted k-mers can be used as an estimate of genome size
-    grep "unique counted k" LOG | tr -s ' ' | cut -d ' ' -f8 > UNIQUE_COUNTED
+    grep "unique counted k" LOG | tr -s ' ' | cut -d ' ' -f8 >> UNIQUE_COUNTED
 
     # extracting only the kmer statistics and writing to file:
     tail -n8 LOG > ~{samplename}_kmer_stats.txt
   >>>
   output {
-    String est_genome_size = read_string("UNIQUE_COUNTED") 
+    Int est_genome_size = read_int("UNIQUE_COUNTED") 
     File kmer_stats = "~{samplename}_kmer_stats.txt"
     String kmc_version = read_string("VERSION")
   }

@@ -489,7 +489,7 @@ workflow theiaprok_ont {
     String? nanoplot_version = nanoplot_raw.nanoplot_version
     String? nanoplot_docker = nanoplot_raw.nanoplot_docker
     # Read QC - kmc outputs
-    String? kmc_est_genome_size = read_qc_trim.est_genome_size
+    Int? kmc_est_genome_size = read_qc_trim.est_genome_size
     File? kmc_kmer_stats = read_qc_trim.kmc_kmer_stats
     String? kmc_version = read_qc_trim.kmc_version
     # Read QC - rasusa outputs

@@ -27,12 +27,6 @@ workflow read_QC_trim_ont {
     # kraken inputs
     String? target_org
   }
-  # kmc for genome size estimation
-  call kmc_task.kmc {
-    input:
-      read1 = read1,
-      samplename = samplename
-  }
   if ("~{workflow_series}" == "theiacov") {
     call ncbi_scrub.ncbi_scrub_se {
       input:
@@ -61,6 +55,12 @@ workflow read_QC_trim_ont {
     }
   }
   if ("~{workflow_series}" == "theiaprok") {
+    # kmc for genome size estimation
+    call kmc_task.kmc {
+      input:
+        read1 = read1,
+        samplename = samplename
+    }
     # rasusa for random downsampling
     call rasusa_task.rasusa {
       input:
@@ -83,11 +83,6 @@ workflow read_QC_trim_ont {
     }
   }
   output { 
-    # kmc outputs
-    String est_genome_size = kmc.est_genome_size
-    File kmc_kmer_stats = kmc.kmer_stats
-    String kmc_version = kmc.kmc_version
-
     # theiacov outputs
     # ncbi scrub outputs
     File? read1_dehosted = ncbi_scrub_se.read1_dehosted
@@ -109,6 +104,11 @@ workflow read_QC_trim_ont {
     File? kraken_report_dehosted = kraken2_dehosted.kraken_report
 
     # theiaprok outputs
+    # kmc outputs
+    Int? est_genome_size = kmc.est_genome_size
+    File? kmc_kmer_stats = kmc.kmer_stats
+    String? kmc_version = kmc.kmc_version
+
     # nanoq outputs
     File read1_clean = select_first([nanoq.filtered_read1, read_filtering.filtered_reads])
     String? nanoq_version = nanoq.version