Skip to content

Commit

Permalink
Make kmc run only for theiaprok_ont task; change variable type from e…
Browse files Browse the repository at this point in the history
…stimated genome size from string to int (#193)
  • Loading branch information
cimendes authored Sep 22, 2023
1 parent df342b9 commit aa1af2c
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 14 deletions.
7 changes: 5 additions & 2 deletions tasks/utilities/task_kmc.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ task kmc {
command <<<
kmc | head -n 1 | tee VERSION

# initialize kmc output
echo 0 > UNIQUE_COUNTED

# run kmc
# kmc [options] <input_file> <output_file> <working_dir>
# -sm - uses strict memory mode (memory from -m<size> switch will not be exceeded)
Expand All @@ -38,13 +41,13 @@ task kmc {
# kmc_outputs is a mess of files that are not human readable
# however, the stdout does produce some useful stats.
# the no. of unique counted k-mers can be used as an estimate of genome size
grep "unique counted k" LOG | tr -s ' ' | cut -d ' ' -f8 > UNIQUE_COUNTED
grep "unique counted k" LOG | tr -s ' ' | cut -d ' ' -f8 >> UNIQUE_COUNTED

# extracting only the kmer statistics and writing to file:
tail -n8 LOG > ~{samplename}_kmer_stats.txt
>>>
output {
String est_genome_size = read_string("UNIQUE_COUNTED")
Int est_genome_size = read_int("UNIQUE_COUNTED")
File kmer_stats = "~{samplename}_kmer_stats.txt"
String kmc_version = read_string("VERSION")
}
Expand Down
2 changes: 1 addition & 1 deletion workflows/theiaprok/wf_theiaprok_ont.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ workflow theiaprok_ont {
String? nanoplot_version = nanoplot_raw.nanoplot_version
String? nanoplot_docker = nanoplot_raw.nanoplot_docker
# Read QC - kmc outputs
String? kmc_est_genome_size = read_qc_trim.est_genome_size
Int? kmc_est_genome_size = read_qc_trim.est_genome_size
File? kmc_kmer_stats = read_qc_trim.kmc_kmer_stats
String? kmc_version = read_qc_trim.kmc_version
# Read QC - rasusa outputs
Expand Down
22 changes: 11 additions & 11 deletions workflows/utilities/wf_read_QC_trim_ont.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,6 @@ workflow read_QC_trim_ont {
# kraken inputs
String? target_org
}
# kmc for genome size estimation
call kmc_task.kmc {
input:
read1 = read1,
samplename = samplename
}
if ("~{workflow_series}" == "theiacov") {
call ncbi_scrub.ncbi_scrub_se {
input:
Expand Down Expand Up @@ -61,6 +55,12 @@ workflow read_QC_trim_ont {
}
}
if ("~{workflow_series}" == "theiaprok") {
# kmc for genome size estimation
call kmc_task.kmc {
input:
read1 = read1,
samplename = samplename
}
# rasusa for random downsampling
call rasusa_task.rasusa {
input:
Expand All @@ -83,11 +83,6 @@ workflow read_QC_trim_ont {
}
}
output {
# kmc outputs
String est_genome_size = kmc.est_genome_size
File kmc_kmer_stats = kmc.kmer_stats
String kmc_version = kmc.kmc_version

# theiacov outputs
# ncbi scrub outputs
File? read1_dehosted = ncbi_scrub_se.read1_dehosted
Expand All @@ -109,6 +104,11 @@ workflow read_QC_trim_ont {
File? kraken_report_dehosted = kraken2_dehosted.kraken_report

# theiaprok outputs
# kmc outputs
Int? est_genome_size = kmc.est_genome_size
File? kmc_kmer_stats = kmc.kmer_stats
String? kmc_version = kmc.kmc_version

# nanoq outputs
File read1_clean = select_first([nanoq.filtered_read1, read_filtering.filtered_reads])
String? nanoq_version = nanoq.version
Expand Down

0 comments on commit aa1af2c

Please sign in to comment.