From 6a54e5041cd1016a06fe084f6bcee356bd60dbc3 Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 3 Aug 2023 10:03:43 +0000 Subject: [PATCH 1/7] add amrfinderplus to theiameta --- .../metagenomics/wf_theiameta_illumina_pe.wdl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/workflows/metagenomics/wf_theiameta_illumina_pe.wdl b/workflows/metagenomics/wf_theiameta_illumina_pe.wdl index e341f5f45..7bb6924ec 100644 --- a/workflows/metagenomics/wf_theiameta_illumina_pe.wdl +++ b/workflows/metagenomics/wf_theiameta_illumina_pe.wdl @@ -6,6 +6,7 @@ import "../../tasks/taxon_id/task_kraken2.wdl" as kraken_task import "../../tasks/alignment/task_minimap2.wdl" as minimap2_task import "../../tasks/utilities/task_parse_mapping.wdl" as parse_mapping_task import "../../tasks/quality_control/task_quast.wdl" as quast_task +import "../../tasks/gene_typing/task_amrfinderplus.wdl" as amrfinderplus_task import "../../tasks/task_versioning.wdl" as versioning workflow theiameta_illumina_pe { @@ -72,6 +73,11 @@ workflow theiameta_illumina_pe { samplename = samplename, min_contig_len = 1 } + call amrfinderplus_task.amrfinderplus_nuc { + input: + assembly = select_first([retrieve_aligned_contig_paf.final_assembly, metaspades.assembly_fasta]), + samplename = samplename + } call minimap2_task.minimap2 as minimap2_reads { input: query1 = read_QC_trim.read1_clean, @@ -159,6 +165,19 @@ workflow theiameta_illumina_pe { Int largest_contig = quast.largest_contig String quast_version = quast.version String quast_docker = quast.quast_docker + # NCBI-AMRFinderPlus Outputs + File? amrfinderplus_all_report = amrfinderplus_nuc.amrfinderplus_all_report + File? amrfinderplus_amr_report = amrfinderplus_nuc.amrfinderplus_amr_report + File? amrfinderplus_stress_report = amrfinderplus_nuc.amrfinderplus_stress_report + File? amrfinderplus_virulence_report = amrfinderplus_nuc.amrfinderplus_virulence_report + String? amrfinderplus_amr_core_genes = amrfinderplus_nuc.amrfinderplus_amr_core_genes + String? amrfinderplus_amr_plus_genes = amrfinderplus_nuc.amrfinderplus_amr_plus_genes + String? amrfinderplus_stress_genes = amrfinderplus_nuc.amrfinderplus_stress_genes + String? amrfinderplus_virulence_genes = amrfinderplus_nuc.amrfinderplus_virulence_genes + String? amrfinderplus_amr_classes = amrfinderplus_nuc.amrfinderplus_amr_classes + String? amrfinderplus_amr_subclasses = amrfinderplus_nuc.amrfinderplus_amr_subclasses + String? amrfinderplus_version = amrfinderplus_nuc.amrfinderplus_version + String? amrfinderplus_db_version = amrfinderplus_nuc.amrfinderplus_db_version # Assembly QC - minimap2 Float? percent_coverage = calculate_coverage_paf.percent_coverage # Assembly QC - bedtools From 4fad9b190da1dd404a30e464b50e6b09d83f2646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?In=C3=AAs=20Mendes?= Date: Thu, 3 Aug 2023 13:02:05 +0100 Subject: [PATCH 2/7] update container to use google registry (#138) --- tasks/alignment/task_minimap2.wdl | 2 +- tasks/quality_control/task_pilon.wdl | 2 +- tasks/quality_control/task_readlength.wdl | 2 +- tasks/utilities/task_parse_mapping.wdl | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tasks/alignment/task_minimap2.wdl b/tasks/alignment/task_minimap2.wdl index c075be748..74e6a202f 100644 --- a/tasks/alignment/task_minimap2.wdl +++ b/tasks/alignment/task_minimap2.wdl @@ -9,7 +9,7 @@ task minimap2 { File? query2 File reference String samplename - String docker = "staphb/minimap2:2.22" # newer versions seem to be bugged (infinite loop) + String docker = "us-docker.pkg.dev/general-theiagen/staphb/minimap2:2.22" # newer versions seem to be bugged (infinite loop) String mode = "asm20" Boolean output_sam = false Int disk_size = 100 diff --git a/tasks/quality_control/task_pilon.wdl b/tasks/quality_control/task_pilon.wdl index 486c0bf32..b71f64a9a 100644 --- a/tasks/quality_control/task_pilon.wdl +++ b/tasks/quality_control/task_pilon.wdl @@ -6,7 +6,7 @@ task pilon { File bam File bai String samplename - String docker = "quay.io/biocontainers/pilon:1.24--hdfd78af_0" + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/pilon:1.24--hdfd78af_0" Int cpu = 4 Int memory = 8 Int disk_size = 100 diff --git a/tasks/quality_control/task_readlength.wdl b/tasks/quality_control/task_readlength.wdl index d76b266bd..fd8e59e3f 100644 --- a/tasks/quality_control/task_readlength.wdl +++ b/tasks/quality_control/task_readlength.wdl @@ -5,7 +5,7 @@ task readlength { File read1 File read2 Int memory = 8 - String docker = "quay.io/staphb/bbtools:38.76" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/bbtools:38.76" Int disk_size = 100 } command <<< diff --git a/tasks/utilities/task_parse_mapping.wdl b/tasks/utilities/task_parse_mapping.wdl index f5d8447b6..a084bb0b8 100644 --- a/tasks/utilities/task_parse_mapping.wdl +++ b/tasks/utilities/task_parse_mapping.wdl @@ -8,7 +8,7 @@ task retrieve_aligned_contig_paf { File paf File assembly String samplename - String docker = "quay.io/biocontainers/seqkit:2.4.0--h9ee0642_0" + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/seqkit:2.4.0--h9ee0642_0" Int disk_size = 100 Int cpu = 2 Int mem = 8 @@ -51,7 +51,7 @@ task calculate_coverage_paf { } input { File paf - String docker = "quay.io/quay/ubuntu" + String docker = "us-docker.pkg.dev/general-theiagen/quay/ubuntu:latest" Int disk_size = 100 Int cpu = 2 Int mem = 8 @@ -89,7 +89,7 @@ task sam_to_sorted_bam { input { File sam String samplename - String docker = "quay.io/staphb/samtools:1.17" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/samtools:1.17" Int disk_size = 100 Int cpu = 2 Int mem = 8 @@ -131,7 +131,7 @@ task retrieve_pe_reads_bam { String samplename String prefix = "" Int sam_flag = "4" # unmapped reads (SAM flag 4) - String docker = "quay.io/staphb/samtools:1.17" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/samtools:1.17" Int disk_size = 100 Int cpu = 2 Int mem = 8 @@ -163,7 +163,7 @@ task calculate_coverage { input { File bam File bai - String docker = "quay.io/staphb/bedtools:2.31.0" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/bedtools:2.31.0" Int disk_size = 100 Int cpu = 2 Int mem = 8 From 25059453fe57b8cb685f7e028b4a777da7591767 Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 3 Aug 2023 14:34:36 +0000 Subject: [PATCH 3/7] add hostile task --- tasks/quality_control/task_hostile.wdl | 46 ++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tasks/quality_control/task_hostile.wdl diff --git a/tasks/quality_control/task_hostile.wdl b/tasks/quality_control/task_hostile.wdl new file mode 100644 index 000000000..b97bbf630 --- /dev/null +++ b/tasks/quality_control/task_hostile.wdl @@ -0,0 +1,46 @@ +version 1.0 + +task hostile_pe { + input { + File read1 + File read2 + String aligner = "bowtie2" + String samplename + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/hostile:0.1.0--pyhdfd78af_0" + Int disk_size = 100 + } + command <<< + # date and version control + date | tee DATE + hostile --version | tee Version + + # run hostile + hostile clean \ + --fastq1 ~{read1} \ + --fastq2 ~{read2} \ + --aligner ~{aligner} + + # rename output reads + filename_without_extension_1=$(basename "~{read1}" .fastq.gz) + filename_without_extension_2=$(basename "~{read2}" .fastq.gz) + + mv "${filename_without_extension_1}.clean_1.fastq.gz" "~{samplename}_R1_dehosted.fastq.gz" + mv "${filename_without_extension_2}.clean_2.fastq.gz" "~{samplename}_R2_dehosted.fastq.gz" + + >>> + output { + File read1_dehosted = "~{samplename}_R1_dehosted.fastq.gz" + File read2_dehosted = "~{samplename}_R2_dehosted.fastq.gz" + String hostile_docker = docker + + } + runtime { + docker: "~{docker}" + memory: "8 GB" + cpu: 4 + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" # TES + preemptible: 0 + maxRetries: 3 + } +} From b351889a076130adf746e1ed8dc96dcdb8efd712 Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 3 Aug 2023 14:35:58 +0000 Subject: [PATCH 4/7] add option to choose either ncbi or hostile for human read scrubbing --- .../metagenomics/wf_theiameta_illumina_pe.wdl | 4 +- workflows/utilities/wf_read_QC_trim_pe.wdl | 37 +++++++++++++------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/workflows/metagenomics/wf_theiameta_illumina_pe.wdl b/workflows/metagenomics/wf_theiameta_illumina_pe.wdl index 7bb6924ec..8ef1d7982 100644 --- a/workflows/metagenomics/wf_theiameta_illumina_pe.wdl +++ b/workflows/metagenomics/wf_theiameta_illumina_pe.wdl @@ -19,6 +19,7 @@ workflow theiameta_illumina_pe { String samplename File? reference File kraken2_db = "gs://theiagen-public-files-rp/terra/theiaprok-files/k2_standard_8gb_20210517.tar.gz" + String read_scrubber = "ncbi" } call kraken_task.kraken2_standalone as kraken2_raw { input: @@ -32,7 +33,8 @@ workflow theiameta_illumina_pe { samplename = samplename, read1_raw = read1, read2_raw = read2, - workflow_series = "theiameta" + workflow_series = "theiameta", + read_scrubber = read_scrubber } call kraken_task.kraken2_standalone as kraken2_clean { input: diff --git a/workflows/utilities/wf_read_QC_trim_pe.wdl b/workflows/utilities/wf_read_QC_trim_pe.wdl index e31fea2ed..b3c4235f0 100644 --- a/workflows/utilities/wf_read_QC_trim_pe.wdl +++ b/workflows/utilities/wf_read_QC_trim_pe.wdl @@ -3,6 +3,7 @@ version 1.0 import "../../tasks/quality_control/task_fastq_scan.wdl" as fastq_scan import "../../tasks/quality_control/task_trimmomatic.wdl" as trimmomatic import "../../tasks/quality_control/task_ncbi_scrub.wdl" as ncbi_scrub +import "../../tasks/quality_control/task_hostile.wdl" as hostile_task import "../../tasks/quality_control/task_bbduk.wdl" as bbduk_task import "../../tasks/quality_control/task_readlength.wdl" as readlength_task import "../../tasks/quality_control/task_fastp.wdl" as fastp_task @@ -28,15 +29,26 @@ workflow read_QC_trim_pe { File? phix String? workflow_series String read_processing = "trimmomatic" + String read_scrubber = "ncbi" String? trimmomatic_args String fastp_args = "--detect_adapter_for_pe -g -5 20 -3 20" } if (("~{workflow_series}" == "theiacov") || ("~{workflow_series}" == "theiameta")) { - call ncbi_scrub.ncbi_scrub_pe { - input: - samplename = samplename, - read1 = read1_raw, - read2 = read2_raw + if (read_scrubber == "ncbi"){ + call ncbi_scrub.ncbi_scrub_pe { + input: + samplename = samplename, + read1 = read1_raw, + read2 = read2_raw + } + } + if (read_scrubber == "hostile"){ + call hostile_task.hostile_pe { + input: + samplename = samplename, + read1 = read1_raw, + read2 = read2_raw + } } } if ("~{workflow_series}" == "theiacov") { @@ -50,8 +62,8 @@ workflow read_QC_trim_pe { call kraken.kraken2_theiacov as kraken2_theiacov_dehosted { input: samplename = samplename, - read1 = select_first([ncbi_scrub_pe.read1_dehosted]), - read2 = ncbi_scrub_pe.read2_dehosted, + read1 = select_first([ncbi_scrub_pe.read1_dehosted, hostile_pe.read1_dehosted]), + read2 = select_first([ncbi_scrub_pe.read2_dehosted, hostile_pe.read2_dehosted]), target_org = target_org } } @@ -59,8 +71,8 @@ workflow read_QC_trim_pe { call trimmomatic.trimmomatic_pe { input: samplename = samplename, - read1 = select_first([ncbi_scrub_pe.read1_dehosted, read1_raw]), - read2 = select_first([ncbi_scrub_pe.read2_dehosted, read2_raw]), + read1 = select_first([ncbi_scrub_pe.read1_dehosted, hostile_pe.read1_dehosted, read1_raw]), + read2 = select_first([ncbi_scrub_pe.read2_dehosted, hostile_pe.read2_dehosted, read2_raw]), trimmomatic_window_size = trim_window_size, trimmomatic_quality_trim_score = trim_quality_trim_score, trimmomatic_minlen = trim_minlen, @@ -115,12 +127,13 @@ workflow read_QC_trim_pe { } } output { - # NCBI scrubber - File? read1_dehosted = ncbi_scrub_pe.read1_dehosted - File? read2_dehosted = ncbi_scrub_pe.read2_dehosted + # NCBI scrubber / hostile + File? read1_dehosted = select_first([hostile_pe.read1_dehosted, ncbi_scrub_pe.read1_dehosted]) + File? read2_dehosted = select_first([hostile_pe.read2_dehosted, ncbi_scrub_pe.read2_dehosted]) Int? read1_human_spots_removed = ncbi_scrub_pe.read1_human_spots_removed Int? read2_human_spots_removed = ncbi_scrub_pe.read2_human_spots_removed String? ncbi_scrub_docker = ncbi_scrub_pe.ncbi_scrub_docker + String? hostile_docker = hostile_pe.hostile_docker # bbduk File read1_clean = bbduk.read1_clean From d65466c124f6dc76447ad94058a8cc4f3cbcaec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?In=C3=AAs=20Mendes?= Date: Thu, 3 Aug 2023 17:02:00 +0100 Subject: [PATCH 5/7] Im metagenomics workflow (#140) * update container to use google registry * update metaspades docker --- tasks/assembly/task_metaspades.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/assembly/task_metaspades.wdl b/tasks/assembly/task_metaspades.wdl index 8a376cce4..a6473ae14 100644 --- a/tasks/assembly/task_metaspades.wdl +++ b/tasks/assembly/task_metaspades.wdl @@ -5,7 +5,7 @@ task metaspades_pe { File read1_cleaned File read2_cleaned String samplename - String docker = "quay.io/biocontainers/spades:3.12.0--h9ee0642_3" + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/spades:3.12.0--h9ee0642_3" Int disk_size = 100 Int cpu = 4 Int memory = 16 From 25862ddc85635b5d86fa2f601b40dbde5bc04365 Mon Sep 17 00:00:00 2001 From: cimendes Date: Thu, 3 Aug 2023 16:56:02 +0000 Subject: [PATCH 6/7] add maxbin2 task --- tasks/assembly/task_maxbin.wdl | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tasks/assembly/task_maxbin.wdl diff --git a/tasks/assembly/task_maxbin.wdl b/tasks/assembly/task_maxbin.wdl new file mode 100644 index 000000000..6f4aae615 --- /dev/null +++ b/tasks/assembly/task_maxbin.wdl @@ -0,0 +1,46 @@ +version 1.0 + +task maxbin2 { + input { + File assembly + File read1 + File read2 + String samplename + Int min_contig_length = 1000 + String docker = "us-docker.pkg.dev/general-theiagen/biocontainers/maxbin2:2.2.7--hdbdd923_5" + Int disk_size = 100 + Int cpu = 4 + Int memory = 16 + String? maxbin2_opts + } + command <<< + run_MaxBin.pl -v | head -1 | cut -d ' ' -f 2 | tee VERSION + run_MaxBin.pl \ + -reads ~{read1} \ + -reads2 ~{read2} \ + -min_contig_length ~{min_contig_length} + -thread ~{cpu} \ + -contig ~{assembly} \ + -out ~{samplename} \ + ~{maxbin2_opts} + + # compress bins into zip file + tar -cvf ~{samplename}_bins.tar.gz ~{samplename}.*.fasta + >>> + output { + File maxbin2_bins_fasta = "~{samplename}_bins.tar.gz" + File maxbin2_summary = "~{samplename}.summary" + String maxbin2_version = read_string("VERSION") + String maxbin2_docker = '~{docker}' + } + runtime { + docker: "~{docker}" + memory: "~{memory} GB" + cpu: "~{cpu}" + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" + maxRetries: 3 + preemptible: 0 + } +} + From 4c5f0701f37b402fa9d6151d9f6b5a44b03a41aa Mon Sep 17 00:00:00 2001 From: cimendes Date: Tue, 8 Aug 2023 14:54:56 +0000 Subject: [PATCH 7/7] add comment --- tasks/quality_control/task_hostile.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/quality_control/task_hostile.wdl b/tasks/quality_control/task_hostile.wdl index b97bbf630..675df9c89 100644 --- a/tasks/quality_control/task_hostile.wdl +++ b/tasks/quality_control/task_hostile.wdl @@ -20,7 +20,7 @@ task hostile_pe { --fastq2 ~{read2} \ --aligner ~{aligner} - # rename output reads + # rename output reads - to do: fastq.gz or fq.gz termination filename_without_extension_1=$(basename "~{read1}" .fastq.gz) filename_without_extension_2=$(basename "~{read2}" .fastq.gz)