diff --git a/tasks/assembly/task_maxbin.wdl b/tasks/assembly/task_maxbin.wdl new file mode 100644 index 000000000..6f4aae615 --- /dev/null +++ b/tasks/assembly/task_maxbin.wdl @@ -0,0 +1,46 @@ +version 1.0 + +task maxbin2 { + input { + File assembly + File read1 + File read2 + String samplename + Int min_contig_length = 1000 + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/maxbin2:2.2.7--hdbdd923_5" + Int disk_size = 100 + Int cpu = 4 + Int memory = 16 + String? maxbin2_opts + } + command <<< + run_MaxBin.pl -v | head -1 | cut -d ' ' -f 2 | tee VERSION + run_MaxBin.pl \ + -reads ~{read1} \ + -reads2 ~{read2} \ + -min_contig_length ~{min_contig_length} + -thread ~{cpu} \ + -contig ~{assembly} \ + -out ~{samplename} \ + ~{maxbin2_opts} + + # compress bins into zip file + tar -cvf ~{samplename}_bins.tar.gz ~{samplename}.*.fasta + >>> + output { + File maxbin2_bins_fasta = "~{samplename}_bins.tar.gz" + File maxbin2_summary = "~{samplename}.summary" + String maxbin2_version = read_string("VERSION") + String maxbin2_docker = '~{docker}' + } + runtime { + docker: "~{docker}" + memory: "~{memory} GB" + cpu: "~{cpu}" + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" + maxRetries: 3 + preemptible: 0 + } +} + diff --git a/tasks/quality_control/task_hostile.wdl b/tasks/quality_control/task_hostile.wdl new file mode 100644 index 000000000..675df9c89 --- /dev/null +++ b/tasks/quality_control/task_hostile.wdl @@ -0,0 +1,46 @@ +version 1.0 + +task hostile_pe { + input { + File read1 + File read2 + String aligner = "bowtie2" + String samplename + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/hostile:0.1.0--pyhdfd78af_0" + Int disk_size = 100 + } + command <<< + # date and version control + date | tee DATE + hostile --version | tee Version + + # run hostile + hostile clean \ + --fastq1 ~{read1} \ + --fastq2 ~{read2} \ + --aligner ~{aligner} + + # rename output reads - to do: fastq.gz or fq.gz termination + filename_without_extension_1=$(basename "~{read1}" .fastq.gz) + filename_without_extension_2=$(basename "~{read2}" .fastq.gz) + + mv "${filename_without_extension_1}.clean_1.fastq.gz" "~{samplename}_R1_dehosted.fastq.gz" + mv "${filename_without_extension_2}.clean_2.fastq.gz" "~{samplename}_R2_dehosted.fastq.gz" + + >>> + output { + File read1_dehosted = "~{samplename}_R1_dehosted.fastq.gz" + File read2_dehosted = "~{samplename}_R2_dehosted.fastq.gz" + String hostile_docker = docker + + } + runtime { + docker: "~{docker}" + memory: "8 GB" + cpu: 4 + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" # TES + preemptible: 0 + maxRetries: 3 + } +} diff --git a/workflows/metagenomics/wf_theiameta_illumina_pe.wdl b/workflows/metagenomics/wf_theiameta_illumina_pe.wdl index 5115c9a2e..08216db25 100644 --- a/workflows/metagenomics/wf_theiameta_illumina_pe.wdl +++ b/workflows/metagenomics/wf_theiameta_illumina_pe.wdl @@ -6,6 +6,7 @@ import "../../tasks/taxon_id/task_kraken2.wdl" as kraken_task import "../../tasks/alignment/task_minimap2.wdl" as minimap2_task import "../../tasks/utilities/task_parse_mapping.wdl" as parse_mapping_task import "../../tasks/quality_control/task_quast.wdl" as quast_task +import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus_task import "../../tasks/task_versioning.wdl" as versioning workflow theiameta_illumina_pe { @@ -19,6 +20,7 @@ workflow theiameta_illumina_pe { File? reference File kraken2_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/k2_standard_8gb_20210517.tar.gz" Boolean output_additional_files = false + String read_scrubber = "ncbi" } call kraken_task.kraken2_standalone as kraken2_raw { input: @@ -32,7 +34,8 @@ workflow theiameta_illumina_pe { samplename = samplename, read1_raw = read1, read2_raw = read2, - workflow_series = "theiameta" + workflow_series = "theiameta", + read_scrubber = read_scrubber } call kraken_task.kraken2_standalone as kraken2_clean { input: @@ -110,6 +113,42 @@ workflow theiameta_illumina_pe { input: bam = sam_to_sorted_bam.bam, } + call amrfinderplus_task.amrfinderplus_nuc { + input: + assembly = select_first([retrieve_aligned_contig_paf.final_assembly, metaspades.assembly_fasta]), + samplename = samplename + } + call minimap2_task.minimap2 as minimap2_reads { + input: + query1 = read_QC_trim.read1_clean, + query2 = read_QC_trim.read2_clean, + reference = select_first([retrieve_aligned_contig_paf.final_assembly, metaspades.assembly_fasta]), + samplename = samplename, + mode = "sr", + output_sam = true + } + call parse_mapping_task.sam_to_sorted_bam { + input: + sam = minimap2_reads.minimap2_out, + samplename = samplename + } + call parse_mapping_task.calculate_coverage { + input: + bam = sam_to_sorted_bam.bam, + bai = sam_to_sorted_bam.bai + } + call parse_mapping_task.retrieve_pe_reads_bam as retrieve_unaligned_pe_reads_sam { + input: + bam = sam_to_sorted_bam.bam, + samplename = samplename, + prefix = "unassembled" + } + call parse_mapping_task.retrieve_pe_reads_bam as retrieve_aligned_pe_reads_sam { + input: + bam = sam_to_sorted_bam.bam, + samplename = samplename, + sam_flag = 2, + prefix = "assembled" } call versioning.version_capture{ input: @@ -166,6 +205,19 @@ workflow theiameta_illumina_pe { Int largest_contig = quast.largest_contig String quast_version = quast.version String quast_docker = quast.quast_docker + # NCBI-AMRFinderPlus Outputs + File? amrfinderplus_all_report = amrfinderplus_nuc.amrfinderplus_all_report + File? amrfinderplus_amr_report = amrfinderplus_nuc.amrfinderplus_amr_report + File? amrfinderplus_stress_report = amrfinderplus_nuc.amrfinderplus_stress_report + File? amrfinderplus_virulence_report = amrfinderplus_nuc.amrfinderplus_virulence_report + String? amrfinderplus_amr_core_genes = amrfinderplus_nuc.amrfinderplus_amr_core_genes + String? amrfinderplus_amr_plus_genes = amrfinderplus_nuc.amrfinderplus_amr_plus_genes + String? amrfinderplus_stress_genes = amrfinderplus_nuc.amrfinderplus_stress_genes + String? amrfinderplus_virulence_genes = amrfinderplus_nuc.amrfinderplus_virulence_genes + String? amrfinderplus_amr_classes = amrfinderplus_nuc.amrfinderplus_amr_classes + String? amrfinderplus_amr_subclasses = amrfinderplus_nuc.amrfinderplus_amr_subclasses + String? amrfinderplus_version = amrfinderplus_nuc.amrfinderplus_version + String? amrfinderplus_db_version = amrfinderplus_nuc.amrfinderplus_db_version # Assembly QC - minimap2 Float? percent_coverage = calculate_coverage_paf.percent_coverage # Assembly QC - bedtools diff --git a/workflows/utilities/wf_read_QC_trim_pe.wdl b/workflows/utilities/wf_read_QC_trim_pe.wdl index e31fea2ed..b3c4235f0 100644 --- a/workflows/utilities/wf_read_QC_trim_pe.wdl +++ b/workflows/utilities/wf_read_QC_trim_pe.wdl @@ -3,6 +3,7 @@ version 1.0 import "../../tasks/quality_control/task_fastq_scan.wdl" as fastq_scan import "../../tasks/quality_control/task_trimmomatic.wdl" as trimmomatic import "../../tasks/quality_control/task_ncbi_scrub.wdl" as ncbi_scrub +import "../../tasks/quality_control/task_hostile.wdl" as hostile_task import "../../tasks/quality_control/task_bbduk.wdl" as bbduk_task import "../../tasks/quality_control/task_readlength.wdl" as readlength_task import "../../tasks/quality_control/task_fastp.wdl" as fastp_task @@ -28,15 +29,26 @@ workflow read_QC_trim_pe { File? phix String? workflow_series String read_processing = "trimmomatic" + String read_scrubber = "ncbi" String? trimmomatic_args String fastp_args = "--detect_adapter_for_pe -g -5 20 -3 20" } if (("~{workflow_series}" == "theiacov") || ("~{workflow_series}" == "theiameta")) { - call ncbi_scrub.ncbi_scrub_pe { - input: - samplename = samplename, - read1 = read1_raw, - read2 = read2_raw + if (read_scrubber == "ncbi"){ + call ncbi_scrub.ncbi_scrub_pe { + input: + samplename = samplename, + read1 = read1_raw, + read2 = read2_raw + } + } + if (read_scrubber == "hostile"){ + call hostile_task.hostile_pe { + input: + samplename = samplename, + read1 = read1_raw, + read2 = read2_raw + } } } if ("~{workflow_series}" == "theiacov") { @@ -50,8 +62,8 @@ workflow read_QC_trim_pe { call kraken.kraken2_theiacov as kraken2_theiacov_dehosted { input: samplename = samplename, - read1 = select_first([ncbi_scrub_pe.read1_dehosted]), - read2 = ncbi_scrub_pe.read2_dehosted, + read1 = select_first([ncbi_scrub_pe.read1_dehosted, hostile_pe.read1_dehosted]), + read2 = select_first([ncbi_scrub_pe.read2_dehosted, hostile_pe.read2_dehosted]), target_org = target_org } } @@ -59,8 +71,8 @@ workflow read_QC_trim_pe { call trimmomatic.trimmomatic_pe { input: samplename = samplename, - read1 = select_first([ncbi_scrub_pe.read1_dehosted, read1_raw]), - read2 = select_first([ncbi_scrub_pe.read2_dehosted, read2_raw]), + read1 = select_first([ncbi_scrub_pe.read1_dehosted, hostile_pe.read1_dehosted, read1_raw]), + read2 = select_first([ncbi_scrub_pe.read2_dehosted, hostile_pe.read2_dehosted, read2_raw]), trimmomatic_window_size = trim_window_size, trimmomatic_quality_trim_score = trim_quality_trim_score, trimmomatic_minlen = trim_minlen, @@ -115,12 +127,13 @@ workflow read_QC_trim_pe { } } output { - # NCBI scrubber - File? read1_dehosted = ncbi_scrub_pe.read1_dehosted - File? read2_dehosted = ncbi_scrub_pe.read2_dehosted + # NCBI scrubber / hostile + File? read1_dehosted = select_first([hostile_pe.read1_dehosted, ncbi_scrub_pe.read1_dehosted]) + File? read2_dehosted = select_first([hostile_pe.read2_dehosted, ncbi_scrub_pe.read2_dehosted]) Int? read1_human_spots_removed = ncbi_scrub_pe.read1_human_spots_removed Int? read2_human_spots_removed = ncbi_scrub_pe.read2_human_spots_removed String? ncbi_scrub_docker = ncbi_scrub_pe.ncbi_scrub_docker + String? hostile_docker = hostile_pe.hostile_docker # bbduk File read1_clean = bbduk.read1_clean