From 34cdc54514bd70da3028ab1c1f1042a8e5dde640 Mon Sep 17 00:00:00 2001 From: golu099 Date: Thu, 23 Mar 2023 01:54:13 -0400 Subject: [PATCH 01/12] Seperating 16S train classifier and annotations into different branches. --- pipes/WDL/workflows/16S_train_classifier.wdl | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 pipes/WDL/workflows/16S_train_classifier.wdl diff --git a/pipes/WDL/workflows/16S_train_classifier.wdl b/pipes/WDL/workflows/16S_train_classifier.wdl new file mode 100644 index 000000000..7efd7a524 --- /dev/null +++ b/pipes/WDL/workflows/16S_train_classifier.wdl @@ -0,0 +1,26 @@ +version 1.0 + +import "../tasks/tasks_16S_amplicon.wdl" as qiime + +workflow train_classifier_16S { + meta { + description: "User imports OTU database that will be trained on your primer sequences." + author: "Broad Viral Genomics" + email: "viral_ngs@broadinstitue.org" + allowNestedInputs: true + } + input { + File otu_ref + File taxanomy_ref + String forward_adapter + String reverse_adapter + } + + call qiime.train_classifier { + input: + otu_ref = otu_ref, + taxanomy_ref = taxanomy_ref, + forward_adapter = forward_adapter, + reverse_adapter = reverse_adapter + } +} From 67ebdc8d4d23a6bf4d15e4c802d36a3ca1982fa2 Mon Sep 17 00:00:00 2001 From: golu099 Date: Fri, 24 Mar 2023 10:49:49 -0400 Subject: [PATCH 02/12] Update pipes/WDL/workflows/16S_train_classifier.wdl Co-authored-by: Daniel Park --- pipes/WDL/workflows/16S_train_classifier.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/16S_train_classifier.wdl b/pipes/WDL/workflows/16S_train_classifier.wdl index 7efd7a524..f64d87782 100644 --- a/pipes/WDL/workflows/16S_train_classifier.wdl +++ b/pipes/WDL/workflows/16S_train_classifier.wdl @@ -6,7 +6,7 @@ workflow train_classifier_16S { meta { description: "User imports OTU database that will be trained on your primer sequences." author: "Broad Viral Genomics" - email: "viral_ngs@broadinstitue.org" + email: "viral-ngs@broadinstitue.org" allowNestedInputs: true } input { From 88d5ab4b155dbe4e5dfe5cc12f48f83c80aad28a Mon Sep 17 00:00:00 2001 From: golu099 Date: Fri, 24 Mar 2023 11:14:56 -0400 Subject: [PATCH 03/12] Adding parameter meta to tasks_16S_amplicon.wdl --- .dockstore.yml | 5 + pipes/WDL/tasks/tasks_16S_amplicon.wdl | 166 +++++++++++++++++- ...lassifier.wdl => train_16S_classifier.wdl} | 2 +- 3 files changed, 165 insertions(+), 8 deletions(-) rename pipes/WDL/workflows/{16S_train_classifier.wdl => train_16S_classifier.wdl} (87%) diff --git a/.dockstore.yml b/.dockstore.yml index 76e333a4e..f4753dc49 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -357,5 +357,10 @@ workflows: - name: qiime_import_bam subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/qiime_import_bam.wdl + testParameterFiles: + - empty.json + - name: train_16S_classifier + subclass: WDL + primaryDescriptorPath: /pipes/WDL/workflows/train_16S_classifier.wdl testParameterFiles: - empty.json \ No newline at end of file diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index 0d123d28e..5d3c79dcf 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -6,14 +6,27 @@ task qiime_import_from_bam { } input { Array[File] reads_bam - Int memory_mb = 7000 + Int memory_mb = 7000 Int cpu = 5 Int disk_size_gb = ceil(2*20) + 5 String docker = "quay.io/broadinstitute/qiime2" } parameter_meta { - reads_bam: {description: "Unaligned reads in BAM format, one sample per BAM file."} - reads_qza: {description: "All unaligned reads in a single QZA (QIIME) file"} + reads_bam: { + description: "Unaligned reads in BAM format, one sample per BAM file." + pattern: ["*.bam"] + category: "required" + } + reads_qza: { + description: "All unaligned reads in a single QZA (QIIME) file." + pattern: ["*.qza"] + category: "other" + } + samplename_master_sheet: { + description: "File contains all samples names." + pattern:["*.txt"] + category: "other" + } } command <<< @@ -69,17 +82,48 @@ task trim_reads { input { File reads_qza - #Boolean not_default = false String forward_adapter = "CTGCTGCCTCCCGTAGGAGT" String reverse_adapter = "AGAGTTTGATCCTGGCTCAG" - Int min_length = 1 + Int min_length = 1 Boolean keep_untrimmed_reads = false Int memory_mb = 2000 Int cpu = 4 Int disk_size_gb = ceil(2*size(reads_qza, "GiB")) + 5 String docker = "quay.io/broadinstitute/qiime2" } - + parameter_meta { + reads_qza: { + description: "All unaligned reads in a single QZA (QIIME) file." + pattern: ["*.qza"] + category: "required" + } + forward_adapter: { + description: "Forward amplicon primer sequence." + category: "advanced" + } + reverse_adapter: { + description: "Reverse amplicon primer sequence." + category: "advanced" + } + min_length: { + description: "Minimum length of the read, cutadapt will discard anything that is shorter than n bp AFTER trimming." + category: "other" + } + keep_untrimmed_reads: { + description: "Allows you to choose whether or not to discard untrimmed reads." + category: "advanced" + } + trimmed_reads_qza: { + description: "Trimmed reads data file." + pattern: ["*.qza"] + category: "other" + } + trimmed_visualization: { + description: "A diagram that compares your demuxed reads before and after cutting (i.e. length of reads, how many reads were retained)." + pattern: ["*.qzv"] + category: "other" + } + } command <<< set -ex -o pipefail qiime cutadapt trim-paired \ @@ -124,7 +168,23 @@ task join_paired_ends { Int disk_size_gb = ceil(2*size(trimmed_reads_qza, "GiB")) + 50 String docker = "quay.io/broadinstitute/qiime2" } - + parameter_meta{ + trimmed_reads_qza: { + description:"Trimmed reads data file." + patterns:[ '*.qza'] + category: "required" + } + joined_end_reads_qza:{ + description: "Merge paired read file." + patterns:[ '*.qza'] + category: "required" + } + joined_end_visualization: { + description: "This summary is especially useful for assessing the length of linked reads and the quality scores at each sequence base position. " + patterns:[ '*.qzv'] + catgeory: "other" + } + } command <<< set -ex -o pipefail qiime vsearch join-pairs \ @@ -160,6 +220,38 @@ task deblur { Int cpu = 1 Int disk_size_gb = ceil(2*size(joined_end_reads_qza, "GiB")) + 5 String docker = "quay.io/broadinstitute/qiime2" + } + parameter_meta { + joined_end_reads_qza: { + description: "Merge paired read file." + patterns:[ '*.qza'] + category: "required" + + } + trim_length_var: { + description: "Length that all seqeuences will be trimmed, and discard any sequences that are not at least this long. Default = 300 bp" + category: "advanced" + } + representative_seqs_qza: { + description: "Generate a list of the representative sequences. May be useful to the user if they want to blast these sequences or check for correct trimming." + patterns:[ '*.qza'] + category: "other" + } + representative_table_qza: { + description: "Generate a table of the representaitve sequences." + patterns:[ '*.qza'] + category: "other" + } + feature_table: { + description: "A table that represent the number of of features per sample, the number of samples a given feature is found in." + patterns:[ '*.qzv'] + category: "other" + } + visualize_stats:{ + description: "Generate visualization of deblur stats. " + patterns:[ '*.qzv'] + category: "other" + } } command <<< set -ex -o pipefail @@ -213,6 +305,39 @@ task train_classifier { Int disk_size_gb = ceil(2*size(otu_ref, "GiB")) + 5 String docker = "quay.io/broadinstitute/qiime2" } + parameter_meta{ + otu_ref: { + description: "Operational taxonomic units (OTUs) sequences imported as FASTA file." + pattern: ["*.fasta"] + category: "required" + } + taxanomy_ref: { + description: "Reference taxonomy file that list the sequence ID's of the OTUs in the reference file." + category: "required" + } + forward_adapter: { + description: "The forward primer sequence for the amplicon target." + category: "other" + } + reverse_adapter: { + description: "The reverse primer sequence for the amplicon target." + category: "other" + } + min_length: { + description: "Minimum length of amplicon sequences." + category: "other" + } + max_length: { + description: "Maximum length of amplicon sequences." + category: "other" + } + trained_classifier: { + description: "Trained taxonomic classifier on target amplicon sequences." + pattern: ["*.qza"] + category: "other" + } + } + command <<< set -ex -o pipefail CONDA_ENV_NAME=$(conda info --envs -q | awk -F" " '/qiime.*/{ print $1 }') @@ -266,6 +391,33 @@ task tax_analysis { Int disk_size_gb = 375 String docker = "quay.io/broadinstitute/qiime2" } + parameter_meta{ + trained_classifier: { + description: "Trained taxonomic classifier on target amplicon sequences." + pattern: ["*.qza"] + category: "required" + } + representative_seqs_qza: { + description: "List of representative sequences." + pattern: ["*.qza"] + category: "required" + } + representative_table_qza: { + description: "Table of representative sequences." + pattern: ["*.qza"] + category: "required" + } + rep_seq_list: { + description: "Generate list of representative sequences." + pattern: ["*.qzv"] + category: "other" + } + tax_classification_graph: { + description: "Create a bar graph of your taxonomic classification." + pattern: ["*.qzv"] + category: "other" + } + } command <<< set -ex -o pipefail qiime feature-classifier classify-sklearn \ diff --git a/pipes/WDL/workflows/16S_train_classifier.wdl b/pipes/WDL/workflows/train_16S_classifier.wdl similarity index 87% rename from pipes/WDL/workflows/16S_train_classifier.wdl rename to pipes/WDL/workflows/train_16S_classifier.wdl index f64d87782..137589792 100644 --- a/pipes/WDL/workflows/16S_train_classifier.wdl +++ b/pipes/WDL/workflows/train_16S_classifier.wdl @@ -4,7 +4,7 @@ import "../tasks/tasks_16S_amplicon.wdl" as qiime workflow train_classifier_16S { meta { - description: "User imports OTU database that will be trained on your primer sequences." + description: "User imports OTU database that will be trained on your primer sequences. All outputs can be visualized using https://view.qiime2.org/" author: "Broad Viral Genomics" email: "viral-ngs@broadinstitue.org" allowNestedInputs: true From 64f53a441be267ddab1a158303100edeeb53005f Mon Sep 17 00:00:00 2001 From: golu099 Date: Fri, 24 Mar 2023 12:09:39 -0400 Subject: [PATCH 04/12] Addresing bugs --- pipes/WDL/tasks/tasks_16S_amplicon.wdl | 42 +++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index 5d3c79dcf..50e2bd62e 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -14,17 +14,17 @@ task qiime_import_from_bam { parameter_meta { reads_bam: { description: "Unaligned reads in BAM format, one sample per BAM file." - pattern: ["*.bam"] + patterns: ["*.bam"] category: "required" } reads_qza: { description: "All unaligned reads in a single QZA (QIIME) file." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "other" } samplename_master_sheet: { description: "File contains all samples names." - pattern:["*.txt"] + patterns:["*.txt"] category: "other" } } @@ -94,7 +94,7 @@ task trim_reads { parameter_meta { reads_qza: { description: "All unaligned reads in a single QZA (QIIME) file." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "required" } forward_adapter: { @@ -115,12 +115,12 @@ task trim_reads { } trimmed_reads_qza: { description: "Trimmed reads data file." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "other" } trimmed_visualization: { description: "A diagram that compares your demuxed reads before and after cutting (i.e. length of reads, how many reads were retained)." - pattern: ["*.qzv"] + patterns: ["*.qzv"] category: "other" } } @@ -171,17 +171,17 @@ task join_paired_ends { parameter_meta{ trimmed_reads_qza: { description:"Trimmed reads data file." - patterns:[ '*.qza'] + patternss:[ '*.qza'] category: "required" } joined_end_reads_qza:{ description: "Merge paired read file." - patterns:[ '*.qza'] + patternss:[ '*.qza'] category: "required" } joined_end_visualization: { description: "This summary is especially useful for assessing the length of linked reads and the quality scores at each sequence base position. " - patterns:[ '*.qzv'] + patternss:[ '*.qzv'] catgeory: "other" } } @@ -224,7 +224,7 @@ task deblur { parameter_meta { joined_end_reads_qza: { description: "Merge paired read file." - patterns:[ '*.qza'] + patternss:[ '*.qza'] category: "required" } @@ -234,22 +234,22 @@ task deblur { } representative_seqs_qza: { description: "Generate a list of the representative sequences. May be useful to the user if they want to blast these sequences or check for correct trimming." - patterns:[ '*.qza'] + patternss:[ '*.qza'] category: "other" } representative_table_qza: { description: "Generate a table of the representaitve sequences." - patterns:[ '*.qza'] + patternss:[ '*.qza'] category: "other" } feature_table: { description: "A table that represent the number of of features per sample, the number of samples a given feature is found in." - patterns:[ '*.qzv'] + patternss:[ '*.qzv'] category: "other" } visualize_stats:{ description: "Generate visualization of deblur stats. " - patterns:[ '*.qzv'] + patternss:[ '*.qzv'] category: "other" } } @@ -308,7 +308,7 @@ task train_classifier { parameter_meta{ otu_ref: { description: "Operational taxonomic units (OTUs) sequences imported as FASTA file." - pattern: ["*.fasta"] + patterns: ["*.fasta"] category: "required" } taxanomy_ref: { @@ -333,7 +333,7 @@ task train_classifier { } trained_classifier: { description: "Trained taxonomic classifier on target amplicon sequences." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "other" } } @@ -394,27 +394,27 @@ task tax_analysis { parameter_meta{ trained_classifier: { description: "Trained taxonomic classifier on target amplicon sequences." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "required" } representative_seqs_qza: { description: "List of representative sequences." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "required" } representative_table_qza: { description: "Table of representative sequences." - pattern: ["*.qza"] + patterns: ["*.qza"] category: "required" } rep_seq_list: { description: "Generate list of representative sequences." - pattern: ["*.qzv"] + patterns: ["*.qzv"] category: "other" } tax_classification_graph: { description: "Create a bar graph of your taxonomic classification." - pattern: ["*.qzv"] + patterns: ["*.qzv"] category: "other" } } From 3d395333b4861e5be9bde9a041595d4cfe32ed85 Mon Sep 17 00:00:00 2001 From: golu099 Date: Fri, 24 Mar 2023 12:52:14 -0400 Subject: [PATCH 05/12] addressing bug fixes --- pipes/WDL/tasks/tasks_16S_amplicon.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index 50e2bd62e..2d3ac5c32 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -13,18 +13,18 @@ task qiime_import_from_bam { } parameter_meta { reads_bam: { - description: "Unaligned reads in BAM format, one sample per BAM file." - patterns: ["*.bam"] + description: "Unaligned reads in BAM format, one sample per BAM file.", + patterns: ["*.bam"], category: "required" } reads_qza: { - description: "All unaligned reads in a single QZA (QIIME) file." - patterns: ["*.qza"] + description: "All unaligned reads in a single QZA (QIIME) file.", + patterns: ["*.qza"], category: "other" } samplename_master_sheet: { - description: "File contains all samples names." - patterns:["*.txt"] + description: "File contains all samples names.", + patterns:["*.txt"], category: "other" } } From 069cff6168c84955e61ed90ff525b2bc2e04989f Mon Sep 17 00:00:00 2001 From: golu099 Date: Fri, 24 Mar 2023 15:38:50 -0400 Subject: [PATCH 06/12] addressing rbracket --- pipes/WDL/tasks/tasks_16S_amplicon.wdl | 91 +++++++++++++------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index 2d3ac5c32..3bea59879 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -28,7 +28,6 @@ task qiime_import_from_bam { category: "other" } } - command <<< set -ex -o pipefail @@ -93,34 +92,34 @@ task trim_reads { } parameter_meta { reads_qza: { - description: "All unaligned reads in a single QZA (QIIME) file." - patterns: ["*.qza"] + description: "All unaligned reads in a single QZA (QIIME) file.", + patterns: ["*.qza"], category: "required" } forward_adapter: { - description: "Forward amplicon primer sequence." + description: "Forward amplicon primer sequence.", category: "advanced" } reverse_adapter: { - description: "Reverse amplicon primer sequence." + description: "Reverse amplicon primer sequence.", category: "advanced" } min_length: { - description: "Minimum length of the read, cutadapt will discard anything that is shorter than n bp AFTER trimming." + description: "Minimum length of the read, cutadapt will discard anything that is shorter than n bp AFTER trimming.", category: "other" } keep_untrimmed_reads: { - description: "Allows you to choose whether or not to discard untrimmed reads." + description: "Allows you to choose whether or not to discard untrimmed reads.", category: "advanced" } trimmed_reads_qza: { - description: "Trimmed reads data file." - patterns: ["*.qza"] + description: "Trimmed reads data file.", + patterns: ["*.qza"], category: "other" } trimmed_visualization: { - description: "A diagram that compares your demuxed reads before and after cutting (i.e. length of reads, how many reads were retained)." - patterns: ["*.qzv"] + description: "A diagram that compares your demuxed reads before and after cutting (i.e. length of reads, how many reads were retained).", + patterns: ["*.qzv"], category: "other" } } @@ -170,18 +169,18 @@ task join_paired_ends { } parameter_meta{ trimmed_reads_qza: { - description:"Trimmed reads data file." - patternss:[ '*.qza'] + description:"Trimmed reads data file.", + patternss:[ '*.qza'], category: "required" } joined_end_reads_qza:{ - description: "Merge paired read file." - patternss:[ '*.qza'] + description: "Merge paired read file.", + patternss:[ '*.qza'], category: "required" } joined_end_visualization: { - description: "This summary is especially useful for assessing the length of linked reads and the quality scores at each sequence base position. " - patternss:[ '*.qzv'] + description: "This summary is especially useful for assessing the length of linked reads and the quality scores at each sequence base position.", + patternss:[ '*.qzv'], catgeory: "other" } } @@ -223,33 +222,33 @@ task deblur { } parameter_meta { joined_end_reads_qza: { - description: "Merge paired read file." - patternss:[ '*.qza'] + description: "Merge paired read file.", + patternss:[ '*.qza'], category: "required" } trim_length_var: { - description: "Length that all seqeuences will be trimmed, and discard any sequences that are not at least this long. Default = 300 bp" + description: "Length that all seqeuences will be trimmed, and discard any sequences that are not at least this long. Default = 300 bp", category: "advanced" } representative_seqs_qza: { - description: "Generate a list of the representative sequences. May be useful to the user if they want to blast these sequences or check for correct trimming." - patternss:[ '*.qza'] + description: "Generate a list of the representative sequences. May be useful to the user if they want to blast these sequences or check for correct trimming.", + patternss:[ '*.qza'], category: "other" } representative_table_qza: { - description: "Generate a table of the representaitve sequences." - patternss:[ '*.qza'] + description: "Generate a table of the representaitve sequences.", + patternss:[ '*.qza'], category: "other" } feature_table: { - description: "A table that represent the number of of features per sample, the number of samples a given feature is found in." - patternss:[ '*.qzv'] + description: "A table that represent the number of of features per sample, the number of samples a given feature is found in.", + patternss:[ '*.qzv'], category: "other" } visualize_stats:{ - description: "Generate visualization of deblur stats. " - patternss:[ '*.qzv'] + description: "Generate visualization of deblur stats.", + patternss:[ '*.qzv'], category: "other" } } @@ -307,32 +306,32 @@ task train_classifier { } parameter_meta{ otu_ref: { - description: "Operational taxonomic units (OTUs) sequences imported as FASTA file." - patterns: ["*.fasta"] + description: "Operational taxonomic units (OTUs) sequences imported as FASTA file.", + patterns: ["*.fasta"], category: "required" } taxanomy_ref: { - description: "Reference taxonomy file that list the sequence ID's of the OTUs in the reference file." + description: "Reference taxonomy file that list the sequence ID's of the OTUs in the reference file.", category: "required" } forward_adapter: { - description: "The forward primer sequence for the amplicon target." + description: "The forward primer sequence for the amplicon target.", category: "other" } reverse_adapter: { - description: "The reverse primer sequence for the amplicon target." + description: "The reverse primer sequence for the amplicon target.", category: "other" } min_length: { - description: "Minimum length of amplicon sequences." + description: "Minimum length of amplicon sequences.", category: "other" } max_length: { - description: "Maximum length of amplicon sequences." + description: "Maximum length of amplicon sequences.", category: "other" } trained_classifier: { - description: "Trained taxonomic classifier on target amplicon sequences." + description: "Trained taxonomic classifier on target amplicon sequences.", patterns: ["*.qza"] category: "other" } @@ -393,28 +392,28 @@ task tax_analysis { } parameter_meta{ trained_classifier: { - description: "Trained taxonomic classifier on target amplicon sequences." - patterns: ["*.qza"] + description: "Trained taxonomic classifier on target amplicon sequences.", + patterns: ["*.qza"], category: "required" } representative_seqs_qza: { - description: "List of representative sequences." - patterns: ["*.qza"] + description: "List of representative sequences.", + patterns: ["*.qza"], category: "required" } representative_table_qza: { - description: "Table of representative sequences." - patterns: ["*.qza"] + description: "Table of representative sequences.", + patterns: ["*.qza"], category: "required" } rep_seq_list: { - description: "Generate list of representative sequences." - patterns: ["*.qzv"] + description: "Generate list of representative sequences.", + patterns: ["*.qzv"], category: "other" } tax_classification_graph: { - description: "Create a bar graph of your taxonomic classification." - patterns: ["*.qzv"] + description: "Create a bar graph of your taxonomic classification.", + patterns: ["*.qzv"], category: "other" } } From 53b2e6bb751c08ada9a7faa136e82850bb6978c1 Mon Sep 17 00:00:00 2001 From: Flavia Negrete Date: Mon, 3 Apr 2023 00:45:36 -0400 Subject: [PATCH 07/12] fixing bugs --- pipes/WDL/tasks/tasks_16S_amplicon.wdl | 61 +++++++------------- pipes/WDL/workflows/16S_train_classifier.wdl | 26 +++++++++ 2 files changed, 46 insertions(+), 41 deletions(-) create mode 100644 pipes/WDL/workflows/16S_train_classifier.wdl diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index 3bea59879..e68231da4 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -14,20 +14,18 @@ task qiime_import_from_bam { parameter_meta { reads_bam: { description: "Unaligned reads in BAM format, one sample per BAM file.", - patterns: ["*.bam"], category: "required" } reads_qza: { description: "All unaligned reads in a single QZA (QIIME) file.", - patterns: ["*.qza"], category: "other" } samplename_master_sheet: { description: "File contains all samples names.", - patterns:["*.txt"], category: "other" } } + command <<< set -ex -o pipefail @@ -93,8 +91,7 @@ task trim_reads { parameter_meta { reads_qza: { description: "All unaligned reads in a single QZA (QIIME) file.", - patterns: ["*.qza"], - category: "required" + cateogry: "required" } forward_adapter: { description: "Forward amplicon primer sequence.", @@ -102,10 +99,10 @@ task trim_reads { } reverse_adapter: { description: "Reverse amplicon primer sequence.", - category: "advanced" + cateogry: "advanced" } min_length: { - description: "Minimum length of the read, cutadapt will discard anything that is shorter than n bp AFTER trimming.", + description: "Minimum length of the read, cutadapt will discard anything that is shorter than n bp AFTER trimming.Set to default.", category: "other" } keep_untrimmed_reads: { @@ -114,13 +111,11 @@ task trim_reads { } trimmed_reads_qza: { description: "Trimmed reads data file.", - patterns: ["*.qza"], - category: "other" + category: "advanced" } trimmed_visualization: { description: "A diagram that compares your demuxed reads before and after cutting (i.e. length of reads, how many reads were retained).", - patterns: ["*.qzv"], - category: "other" + category: "advanced" } } command <<< @@ -170,18 +165,15 @@ task join_paired_ends { parameter_meta{ trimmed_reads_qza: { description:"Trimmed reads data file.", - patternss:[ '*.qza'], category: "required" } joined_end_reads_qza:{ description: "Merge paired read file.", - patternss:[ '*.qza'], - category: "required" + category: "other" } joined_end_visualization: { - description: "This summary is especially useful for assessing the length of linked reads and the quality scores at each sequence base position.", - patternss:[ '*.qzv'], - catgeory: "other" + description: "This summary is especially useful for assessing the length of linked reads and the quality scores at each sequence base position. ", + category: "other" } } command <<< @@ -223,32 +215,26 @@ task deblur { parameter_meta { joined_end_reads_qza: { description: "Merge paired read file.", - patternss:[ '*.qza'], category: "required" - } trim_length_var: { - description: "Length that all seqeuences will be trimmed, and discard any sequences that are not at least this long. Default = 300 bp", + description: "Length that all seqeuences will be trimmed, and discard any sequences that are not at least this long.", category: "advanced" } representative_seqs_qza: { description: "Generate a list of the representative sequences. May be useful to the user if they want to blast these sequences or check for correct trimming.", - patternss:[ '*.qza'], category: "other" } representative_table_qza: { description: "Generate a table of the representaitve sequences.", - patternss:[ '*.qza'], category: "other" } feature_table: { description: "A table that represent the number of of features per sample, the number of samples a given feature is found in.", - patternss:[ '*.qzv'], category: "other" } visualize_stats:{ description: "Generate visualization of deblur stats.", - patternss:[ '*.qzv'], category: "other" } } @@ -307,32 +293,30 @@ task train_classifier { parameter_meta{ otu_ref: { description: "Operational taxonomic units (OTUs) sequences imported as FASTA file.", - patterns: ["*.fasta"], - category: "required" + category:"required" } taxanomy_ref: { - description: "Reference taxonomy file that list the sequence ID's of the OTUs in the reference file.", + description: "Reference taxonomy file.", category: "required" } forward_adapter: { description: "The forward primer sequence for the amplicon target.", - category: "other" + category: "advanced" } reverse_adapter: { description: "The reverse primer sequence for the amplicon target.", - category: "other" + category:"advanced" } min_length: { description: "Minimum length of amplicon sequences.", - category: "other" + category: "advanced" } max_length: { description: "Maximum length of amplicon sequences.", - category: "other" + category:"advanced" } trained_classifier: { description: "Trained taxonomic classifier on target amplicon sequences.", - patterns: ["*.qza"] category: "other" } } @@ -393,28 +377,23 @@ task tax_analysis { parameter_meta{ trained_classifier: { description: "Trained taxonomic classifier on target amplicon sequences.", - patterns: ["*.qza"], category: "required" } representative_seqs_qza: { description: "List of representative sequences.", - patterns: ["*.qza"], - category: "required" + category:"required" } representative_table_qza: { description: "Table of representative sequences.", - patterns: ["*.qza"], - category: "required" + category:"other" } rep_seq_list: { description: "Generate list of representative sequences.", - patterns: ["*.qzv"], - category: "other" + category:"other" } tax_classification_graph: { description: "Create a bar graph of your taxonomic classification.", - patterns: ["*.qzv"], - category: "other" + category:"other" } } command <<< diff --git a/pipes/WDL/workflows/16S_train_classifier.wdl b/pipes/WDL/workflows/16S_train_classifier.wdl new file mode 100644 index 000000000..7efd7a524 --- /dev/null +++ b/pipes/WDL/workflows/16S_train_classifier.wdl @@ -0,0 +1,26 @@ +version 1.0 + +import "../tasks/tasks_16S_amplicon.wdl" as qiime + +workflow train_classifier_16S { + meta { + description: "User imports OTU database that will be trained on your primer sequences." + author: "Broad Viral Genomics" + email: "viral_ngs@broadinstitue.org" + allowNestedInputs: true + } + input { + File otu_ref + File taxanomy_ref + String forward_adapter + String reverse_adapter + } + + call qiime.train_classifier { + input: + otu_ref = otu_ref, + taxanomy_ref = taxanomy_ref, + forward_adapter = forward_adapter, + reverse_adapter = reverse_adapter + } +} From 5769ccd67e19a7ffbfe27a499c393f92cd1ec6ff Mon Sep 17 00:00:00 2001 From: golu099 Date: Thu, 13 Apr 2023 14:00:47 -0400 Subject: [PATCH 08/12] Update pipes/WDL/workflows/train_16S_classifier.wdl Co-authored-by: Daniel Park --- pipes/WDL/workflows/train_16S_classifier.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/train_16S_classifier.wdl b/pipes/WDL/workflows/train_16S_classifier.wdl index 137589792..fe1180ba7 100644 --- a/pipes/WDL/workflows/train_16S_classifier.wdl +++ b/pipes/WDL/workflows/train_16S_classifier.wdl @@ -2,7 +2,7 @@ version 1.0 import "../tasks/tasks_16S_amplicon.wdl" as qiime -workflow train_classifier_16S { +workflow train_16S_classifier { meta { description: "User imports OTU database that will be trained on your primer sequences. All outputs can be visualized using https://view.qiime2.org/" author: "Broad Viral Genomics" From dc7b9483888ccb5f526fbd04fb26aeb7704564b1 Mon Sep 17 00:00:00 2001 From: golu099 Date: Thu, 13 Apr 2023 14:07:54 -0400 Subject: [PATCH 09/12] Update pipes/WDL/tasks/tasks_16S_amplicon.wdl Co-authored-by: Daniel Park --- pipes/WDL/tasks/tasks_16S_amplicon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index e68231da4..1e621535c 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -374,7 +374,7 @@ task tax_analysis { Int disk_size_gb = 375 String docker = "quay.io/broadinstitute/qiime2" } - parameter_meta{ + parameter_meta { trained_classifier: { description: "Trained taxonomic classifier on target amplicon sequences.", category: "required" From e5aca1a850371a9130651901579ba269830f554a Mon Sep 17 00:00:00 2001 From: golu099 Date: Thu, 13 Apr 2023 14:08:01 -0400 Subject: [PATCH 10/12] Update pipes/WDL/tasks/tasks_16S_amplicon.wdl Co-authored-by: Daniel Park --- pipes/WDL/tasks/tasks_16S_amplicon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl index 1e621535c..aa52d6788 100755 --- a/pipes/WDL/tasks/tasks_16S_amplicon.wdl +++ b/pipes/WDL/tasks/tasks_16S_amplicon.wdl @@ -290,7 +290,7 @@ task train_classifier { Int disk_size_gb = ceil(2*size(otu_ref, "GiB")) + 5 String docker = "quay.io/broadinstitute/qiime2" } - parameter_meta{ + parameter_meta { otu_ref: { description: "Operational taxonomic units (OTUs) sequences imported as FASTA file.", category:"required" From 307458b15334f3ba09f2032edc97afe023d598e9 Mon Sep 17 00:00:00 2001 From: golu099 Date: Thu, 20 Apr 2023 13:00:53 -0400 Subject: [PATCH 11/12] Update pipes/WDL/workflows/train_16S_classifier.wdl Co-authored-by: Daniel Park --- pipes/WDL/workflows/train_16S_classifier.wdl | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pipes/WDL/workflows/train_16S_classifier.wdl b/pipes/WDL/workflows/train_16S_classifier.wdl index fe1180ba7..974f87b19 100644 --- a/pipes/WDL/workflows/train_16S_classifier.wdl +++ b/pipes/WDL/workflows/train_16S_classifier.wdl @@ -9,18 +9,5 @@ workflow train_16S_classifier { email: "viral-ngs@broadinstitue.org" allowNestedInputs: true } - input { - File otu_ref - File taxanomy_ref - String forward_adapter - String reverse_adapter - } - - call qiime.train_classifier { - input: - otu_ref = otu_ref, - taxanomy_ref = taxanomy_ref, - forward_adapter = forward_adapter, - reverse_adapter = reverse_adapter - } + call qiime.train_classifier } From 444f30dd1e94c81702a5628da8fecd14f666c6d3 Mon Sep 17 00:00:00 2001 From: golu099 Date: Fri, 12 May 2023 13:45:15 -0400 Subject: [PATCH 12/12] Fixing missing block in training_16S_classifier --- pipes/WDL/workflows/train_16S_classifier.wdl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/train_16S_classifier.wdl b/pipes/WDL/workflows/train_16S_classifier.wdl index 974f87b19..37614cef0 100644 --- a/pipes/WDL/workflows/train_16S_classifier.wdl +++ b/pipes/WDL/workflows/train_16S_classifier.wdl @@ -9,5 +9,12 @@ workflow train_16S_classifier { email: "viral-ngs@broadinstitue.org" allowNestedInputs: true } - call qiime.train_classifier + call qiime.train_classifier{ + input: + otu_ref = otu_ref, + taxonomy_ref = taxonomy_ref, + forward_adapter = forward_adapter, + reverse_adapter = reverse_adapter + + } }