Skip to content

Commit

Permalink
Merge pull request #447 from broadinstitute/dp-meta
Browse files Browse the repository at this point in the history
update classify_single to accept multiple bams
  • Loading branch information
dpark01 authored Dec 21, 2022
2 parents e2391a0 + e986cc1 commit 5c0ebd7
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 39 deletions.
66 changes: 33 additions & 33 deletions pipes/WDL/tasks/tasks_metagenomics.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ task kraken2 {
Float? confidence_threshold
Int? min_base_qual

Int? machine_mem_gb
Int machine_mem_gb = 72
String docker = "quay.io/broadinstitute/viral-classify:2.1.33.0"
}

Expand Down Expand Up @@ -237,7 +237,7 @@ task kraken2 {
String out_basename = basename(basename(reads_bam, '.bam'), '.fasta')
Int disk_size = 750

command {
command <<<
set -ex -o pipefail

if [ -z "$TMPDIR" ]; then
Expand All @@ -248,72 +248,72 @@ task kraken2 {

# decompress DB to $DB_DIR
read_utils.py extract_tarball \
${kraken2_db_tgz} $DB_DIR/kraken2 \
"~{kraken2_db_tgz}" $DB_DIR/kraken2 \
--loglevel=DEBUG
du -hs $DB_DIR/kraken2

# unpack krona taxonomy.tab
if [[ ${krona_taxonomy_db_tgz} == *.tar.* ]]; then
if [[ "~{krona_taxonomy_db_tgz}" == *.tar.* ]]; then
read_utils.py extract_tarball \
${krona_taxonomy_db_tgz} $DB_DIR/krona \
"~{krona_taxonomy_db_tgz}" $DB_DIR/krona \
--loglevel=DEBUG & # we don't need this until later
else
if [[ "${krona_taxonomy_db_tgz}" == *.zst ]]; then
cat "${krona_taxonomy_db_tgz}" | zstd -d > $DB_DIR/krona/taxonomy.tab &
elif [[ "${krona_taxonomy_db_tgz}" == *.gz ]]; then
cat "${krona_taxonomy_db_tgz}" | pigz -dc > $DB_DIR/krona/taxonomy.tab &
elif [[ "${krona_taxonomy_db_tgz}" == *.bz2 ]]; then
cat "${krona_taxonomy_db_tgz}" | bzip -dc > $DB_DIR/krona/taxonomy.tab &
if [[ "~{krona_taxonomy_db_tgz}" == *.zst ]]; then
cat "~{krona_taxonomy_db_tgz}" | zstd -d > $DB_DIR/krona/taxonomy.tab &
elif [[ "~{krona_taxonomy_db_tgz}" == *.gz ]]; then
cat "~{krona_taxonomy_db_tgz}" | pigz -dc > $DB_DIR/krona/taxonomy.tab &
elif [[ "~{krona_taxonomy_db_tgz}" == *.bz2 ]]; then
cat "~{krona_taxonomy_db_tgz}" | bzip -dc > $DB_DIR/krona/taxonomy.tab &
else
cp "${krona_taxonomy_db_tgz}" $DB_DIR/krona/taxonomy.tab &
cp "~{krona_taxonomy_db_tgz}" $DB_DIR/krona/taxonomy.tab &
fi
fi

metagenomics.py --version | tee VERSION

if [[ ${reads_bam} == *.bam ]]; then
if [[ "~{reads_bam}" == *.bam ]]; then
metagenomics.py kraken2 \
$DB_DIR/kraken2 \
${reads_bam} \
--outReads "${out_basename}".kraken2.reads.txt \
--outReports "${out_basename}".kraken2.report.txt \
${"--confidence " + confidence_threshold} \
${"--min_base_qual " + min_base_qual} \
"~{reads_bam}" \
--outReads "~{out_basename}".kraken2.reads.txt \
--outReports "~{out_basename}".kraken2.report.txt \
~{"--confidence " + confidence_threshold} \
~{"--min_base_qual " + min_base_qual} \
--loglevel=DEBUG
else # fasta input file: call kraken2 directly
kraken2 \
--db $DB_DIR/kraken2 \
${reads_bam} \
--output "${out_basename}".kraken2.reads.txt \
--report "${out_basename}".kraken2.report.txt \
${"--confidence " + confidence_threshold} \
${"--min_base_qual " + min_base_qual}
"~{reads_bam}" \
--output "~{out_basename}".kraken2.reads.txt \
--report "~{out_basename}".kraken2.report.txt \
~{"--confidence " + confidence_threshold} \
~{"--min_base_qual " + min_base_qual}
fi

wait # for krona_taxonomy_db_tgz to download and extract
pigz "${out_basename}".kraken2.reads.txt &
pigz "~{out_basename}".kraken2.reads.txt &

metagenomics.py krona \
"${out_basename}".kraken2.report.txt \
"~{out_basename}".kraken2.report.txt \
$DB_DIR/krona \
"${out_basename}".kraken2.krona.html \
--sample_name "${out_basename}" \
"~{out_basename}".kraken2.krona.html \
--sample_name "~{out_basename}" \
--noRank --noHits --inputType kraken2 \
--loglevel=DEBUG

wait # pigz reads.txt
}
>>>

output {
File kraken2_reads_report = "${out_basename}.kraken2.reads.txt.gz"
File kraken2_summary_report = "${out_basename}.kraken2.report.txt"
File krona_report_html = "${out_basename}.kraken2.krona.html"
File kraken2_reads_report = "~{out_basename}.kraken2.reads.txt.gz"
File kraken2_summary_report = "~{out_basename}.kraken2.report.txt"
File krona_report_html = "~{out_basename}.kraken2.krona.html"
String viralngs_version = read_string("VERSION")
}

runtime {
docker: "${docker}"
memory: select_first([machine_mem_gb, 52]) + " GB"
docker: docker
memory: machine_mem_gb + " GB"
cpu: 8
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TESs
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_read_utils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ task merge_and_reheader_bams {
Array[File]+ in_bams
String? sample_name
File? reheader_table
String out_basename
String out_basename = basename(in_bams[0], ".bam")
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
}
Expand Down
17 changes: 12 additions & 5 deletions pipes/WDL/workflows/classify_single.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ workflow classify_single {
}

input {
File reads_bam
Array[File]+ reads_bams

File ncbi_taxdump_tgz

Expand All @@ -26,8 +26,8 @@ workflow classify_single {
}

parameter_meta {
reads_bam: {
description: "Reads to classify. May be unmapped or mapped or both, paired-end or single-end.",
reads_bams: {
description: "Reads to classify. May be unmapped or mapped or both, paired-end or single-end. Multiple input files will be merged first.",
patterns: ["*.bam"]
}
spikein_db: {
Expand All @@ -52,9 +52,12 @@ workflow classify_single {
}
}

call reports.fastqc as fastqc_raw {
input: reads_bam = reads_bam
call read_utils.merge_and_reheader_bams as merge_raw_reads {
input:
in_bams = reads_bams
}
File reads_bam = merge_raw_reads.out_bam

call reports.align_and_count as spikein {
input:
reads_bam = reads_bam,
Expand Down Expand Up @@ -110,6 +113,10 @@ workflow classify_single {

File kraken2_summary_report = kraken2.kraken2_summary_report
File kraken2_krona_plot = kraken2.krona_report_html
File raw_fastqc = merge_raw_reads.fastqc
File cleaned_fastqc = fastqc_cleaned.fastqc_html
File spikein_report = spikein.report
String spikein_tophit = spikein.top_hit_id

String kraken2_viral_classify_version = kraken2.viralngs_version
String deplete_viral_classify_version = deplete.viralngs_version
Expand Down

0 comments on commit 5c0ebd7

Please sign in to comment.