From def7883632f9011d385a34f80130217b9d94d612 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 08:39:48 +0000 Subject: [PATCH 01/36] created new task set_mpxv_defaults --- tasks/utilities/task_augur_utilities.wdl | 38 ++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index c305222db..5ab8f68ac 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -324,6 +324,44 @@ task set_flu_defaults { # establish flu default values for augur } } +task set_mpxv_defaults { # establish mpxv default values for augur + input { + # in the future we will wget from the repo directly + File mpxv_lat_longs_tsv = "gs://theiagen-public-files-rp/terra/flu-references/lat_longs.tsv" + File mpxv_clades_tsv = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_clades.tsv" + File mpxv_reference_fasta = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1.reference.fasta" + File mpxv_reference_genbank = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" + File mpxv_auspice_config = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_auspice_config_mpxv.json" + + Int disk_size = 50 + } + command <<< + # nothing to do here for now + echo "working...very hard" + + >>> + output { + Int min_num_unambig = 150000 + File? clades_tsv = mpxv_clades_tsv + File lat_longs_tsv = mpxv_lat_longs_tsv + File reference_fasta = mpxv_reference_fasta + File reference_genbank = mpxv_reference_genbank + File auspice_config = mpxv_auspice_config + # inherited from flu defaults + Float min_date = 2020.0 + Int pivot_interval = 1 + Float narrow_bandwidth = 0.1666667 + Float proportion_wide = 0.0 + } + runtime { + docker: "us-docker.pkg.dev/general-theiagen/biocontainers/augur:22.0.2--pyhdfd78af_0" + memory: "1 GB" + cpu: 1 + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" + } +} + task prep_augur_metadata { input { File assembly From 1b70ce9f6512aea79c7114091718b3a9b2afed26 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 08:50:30 +0000 Subject: [PATCH 02/36] added mpxv inputs to augur workflow --- tasks/utilities/task_augur_utilities.wdl | 2 +- workflows/phylogenetics/wf_augur.wdl | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index 5ab8f68ac..4f05553f4 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -342,7 +342,7 @@ task set_mpxv_defaults { # establish mpxv default values for augur >>> output { Int min_num_unambig = 150000 - File? clades_tsv = mpxv_clades_tsv + File clades_tsv = mpxv_clades_tsv File lat_longs_tsv = mpxv_lat_longs_tsv File reference_fasta = mpxv_reference_fasta File reference_genbank = mpxv_reference_genbank diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index 8ed6948d9..0d3dd7c51 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -52,15 +52,20 @@ workflow augur { flu_subtype = flu_subtype } } + if (organism == "MPXV") { + call augur_utils.set_mpxv_defaults as mpxv_defaults { # establish default parameters for mpxv + input: + } + } call augur_utils.filter_sequences_by_length { # remove any sequences that do not meet the quality threshold input: sequences_fasta = cat_files.concatenated_files, - min_non_N = select_first([min_num_unambig, sc2_defaults.min_num_unambig, flu_defaults.min_num_unambig]) + min_non_N = select_first([min_num_unambig, sc2_defaults.min_num_unambig, flu_defaults.min_num_unambig, mpxv_defaults.min_num_unambig]), } call align_task.augur_align { # perform mafft alignment on the sequences input: assembly_fasta = filter_sequences_by_length.filtered_fasta, - reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta]) + reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta, mpxv_defaults.reference_fasta]), } call augur_utils.tsv_join { # merge the metadata files input: @@ -95,19 +100,19 @@ workflow augur { input: refined_tree = augur_refine.refined_tree, ancestral_nt_muts_json = augur_ancestral.ancestral_nt_muts_json, - reference_genbank = select_first([reference_genbank, sc2_defaults.reference_genbank, flu_defaults.reference_genbank]), + reference_genbank = select_first([reference_genbank, sc2_defaults.reference_genbank, flu_defaults.reference_genbank, mpxv_defaults.reference_genbank]), build_name = build_name } if (flu_segment == "HA") { # we only have clade information for HA segments (but SC2 defaults will be selected first) - if (defined(clades_tsv) || defined(sc2_defaults.clades_tsv) || defined(flu_defaults.clades_tsv)) { # one of these must be present + if (defined(clades_tsv) || defined(sc2_defaults.clades_tsv) || defined(flu_defaults.clades_tsv) || defined(mpxv_defaults.clades_tsv) ) { # one of these must be present call clades_task.augur_clades { # assign clades to nodes based on amino-acid or nucleotide signatures input: refined_tree = augur_refine.refined_tree, ancestral_nt_muts_json = augur_ancestral.ancestral_nt_muts_json, translated_aa_muts_json = augur_translate.translated_aa_muts_json, - reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta]), + reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta, mpxv_defaults.reference_fasta]), build_name = build_name, - clades_tsv = select_first([clades_tsv, sc2_defaults.clades_tsv, flu_defaults.clades_tsv]) + clades_tsv = select_first([clades_tsv, sc2_defaults.clades_tsv, flu_defaults.clades_tsv, mpxv_defaults.clades_tsv]) } } } @@ -121,8 +126,8 @@ workflow augur { augur_translate.translated_aa_muts_json, augur_clades.clade_assignments_json]), build_name = build_name, - lat_longs_tsv = select_first([sc2_defaults.lat_longs_tsv, flu_defaults.lat_longs_tsv, lat_longs_tsv]), - auspice_config = select_first([sc2_defaults.auspice_config, flu_defaults.auspice_config, auspice_config]) + lat_longs_tsv = select_first([sc2_defaults.lat_longs_tsv, flu_defaults.lat_longs_tsv, mpxv_defaults.lat_longs_tsv, lat_longs_tsv]), + auspice_config = select_first([sc2_defaults.auspice_config, flu_defaults.auspice_config, mpxv_defaults.auspice_config, auspice_config]) } } call snp_dists_task.snp_dists { # create a snp matrix from the alignment From 2dc86e6ecd78a2c4c957f1a43f2dbd1fe077a9bc Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 11:23:30 +0000 Subject: [PATCH 03/36] updated reference fasta to gb file --- tasks/utilities/task_augur_utilities.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index 4f05553f4..71a0be0b0 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -329,7 +329,7 @@ task set_mpxv_defaults { # establish mpxv default values for augur # in the future we will wget from the repo directly File mpxv_lat_longs_tsv = "gs://theiagen-public-files-rp/terra/flu-references/lat_longs.tsv" File mpxv_clades_tsv = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_clades.tsv" - File mpxv_reference_fasta = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1.reference.fasta" + File mpxv_reference_fasta = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" File mpxv_reference_genbank = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" File mpxv_auspice_config = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_auspice_config_mpxv.json" From fce14c8f7342d8a6854c8a0d6ba624257156e897 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 11:52:43 +0000 Subject: [PATCH 04/36] new workflow for adding samples to a ref tree --- .../wf_nextclade_addToRefTree.wdl | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 workflows/phylogenetics/wf_nextclade_addToRefTree.wdl diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl new file mode 100644 index 000000000..bc26b277b --- /dev/null +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -0,0 +1,50 @@ +version 1.0 + +import "../../tasks/utilities/task_file_handling.wdl" as file_handling +import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade + +workflow nextclade_addToRefTree { + meta { + description: "Nextclade workflow that adds samples to a curated JSON tree from Augur." + } + input { + Array[File]+ assembly_fastas + String build_name + File root_sequence_fasta + File reference_tree_json + File? qc_config_json + File? gene_annotations_gff + File? pcr_primers_csv + File? virus_properties + String docker = "nextstrain/nextclade:2.13.0" + String dataset_name = "MPXV" + String dataset_reference = "ancestral" + String dataset_tag = "2023-01-26T12:00:00Z" + } + call file_handling.cat_files { # concatenate all of the input fasta files together + input: + files_to_cat = assembly_fastas, + concatenated_file_name = "~{build_name}_concatenated.fasta" + } + call nextclade.nextclade { # nextclade analysis + input: + genome_fasta = cat_files.concatenated_files, + root_sequence = root_sequence_fasta, + auspice_reference_tree_json = reference_tree_json, + qc_config_json = qc_config_json, + gene_annotations_json = gene_annotations_gff, + pcr_primers_csv = pcr_primers_csv, + virus_properties = virus_properties, + docker = docker, + dataset_name = dataset_name, + dataset_reference = dataset_reference, + dataset_tag + } + output { + String treeUpdate_nextclade_version = select_first([nextclade.nextclade_version, ""]) + File treeUpdate_nextclade_json = select_first([nextclade.nextclade_json, ""]) + File treeUpdate_auspice_json = select_first([nextclade.auspice_json, ""]) + File treeUpdate_nextclade_tsv = select_first([nextclade.nextclade_tsv, ""]) + String treeUpdate_nextclade_docker = select_first([nextclade.nextclade_docker, ""]) + } +} \ No newline at end of file From c152d9f93a2eb9706a400b16868d8b04da498fe4 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 11:56:22 +0000 Subject: [PATCH 05/36] small change --- workflows/phylogenetics/wf_nextclade_addToRefTree.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index bc26b277b..96b991e92 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -38,7 +38,7 @@ workflow nextclade_addToRefTree { docker = docker, dataset_name = dataset_name, dataset_reference = dataset_reference, - dataset_tag + dataset_tag = dataset_tag } output { String treeUpdate_nextclade_version = select_first([nextclade.nextclade_version, ""]) From 9aff5ba6e5eb30a6a05c571d267e12d0bac088cc Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 12:14:04 +0000 Subject: [PATCH 06/36] minor change --- workflows/phylogenetics/wf_nextclade_addToRefTree.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index 96b991e92..12da50433 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -1,7 +1,7 @@ version 1.0 import "../../tasks/utilities/task_file_handling.wdl" as file_handling -import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade +import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade_analysis workflow nextclade_addToRefTree { meta { @@ -11,9 +11,9 @@ workflow nextclade_addToRefTree { Array[File]+ assembly_fastas String build_name File root_sequence_fasta + #File? gene_annotations_gff File reference_tree_json File? qc_config_json - File? gene_annotations_gff File? pcr_primers_csv File? virus_properties String docker = "nextstrain/nextclade:2.13.0" @@ -26,13 +26,13 @@ workflow nextclade_addToRefTree { files_to_cat = assembly_fastas, concatenated_file_name = "~{build_name}_concatenated.fasta" } - call nextclade.nextclade { # nextclade analysis + call nextclade_analysis.nextclade { # nextclade analysis input: genome_fasta = cat_files.concatenated_files, root_sequence = root_sequence_fasta, auspice_reference_tree_json = reference_tree_json, qc_config_json = qc_config_json, - gene_annotations_json = gene_annotations_gff, + #gene_annotations_json = gene_annotations_gff, pcr_primers_csv = pcr_primers_csv, virus_properties = virus_properties, docker = docker, From e33db476da1b26a035e84784fd51121de98ed452 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 12:20:09 +0000 Subject: [PATCH 07/36] updated wf to dockstore --- .dockstore.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.dockstore.yml b/.dockstore.yml index 004bf9955..584123e81 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -220,3 +220,8 @@ workflows: primaryDescriptorPath: /workflows/utilities/wf_theiavalidate.wdl testParameterFiles: - empty.json + - name: Samples_to_Ref_Tree_PHB + subclass: WDL + primaryDescriptorPath: /workflows/phylogenetics/wf_nextclade_addToRef.wdl + testParameterFiles: + - empty.json From aba4cb561dc9229c56fc9b769fff72b8768411e8 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 12:41:46 +0000 Subject: [PATCH 08/36] typo --- .dockstore.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dockstore.yml b/.dockstore.yml index 584123e81..182cdd987 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -222,6 +222,6 @@ workflows: - empty.json - name: Samples_to_Ref_Tree_PHB subclass: WDL - primaryDescriptorPath: /workflows/phylogenetics/wf_nextclade_addToRef.wdl + primaryDescriptorPath: /workflows/phylogenetics/wf_nextclade_addToRefTree.wdl testParameterFiles: - empty.json From 2a2636e1734223fb90a60f43b374603503a42ae2 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 25 Jul 2023 13:37:50 +0000 Subject: [PATCH 09/36] removed concatenate task --- workflows/phylogenetics/wf_nextclade_addToRefTree.wdl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index 12da50433..abbc43072 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -1,6 +1,5 @@ version 1.0 -import "../../tasks/utilities/task_file_handling.wdl" as file_handling import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade_analysis workflow nextclade_addToRefTree { @@ -8,7 +7,7 @@ workflow nextclade_addToRefTree { description: "Nextclade workflow that adds samples to a curated JSON tree from Augur." } input { - Array[File]+ assembly_fastas + File assembly_fastas String build_name File root_sequence_fasta #File? gene_annotations_gff @@ -21,14 +20,9 @@ workflow nextclade_addToRefTree { String dataset_reference = "ancestral" String dataset_tag = "2023-01-26T12:00:00Z" } - call file_handling.cat_files { # concatenate all of the input fasta files together - input: - files_to_cat = assembly_fastas, - concatenated_file_name = "~{build_name}_concatenated.fasta" - } call nextclade_analysis.nextclade { # nextclade analysis input: - genome_fasta = cat_files.concatenated_files, + genome_fasta = assembly_fastas, root_sequence = root_sequence_fasta, auspice_reference_tree_json = reference_tree_json, qc_config_json = qc_config_json, From bb551898402db46a7e766866fa8b49f13b29ea42 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Wed, 26 Jul 2023 07:57:28 +0000 Subject: [PATCH 10/36] updated augur mpxv ref files --- tasks/utilities/task_augur_utilities.wdl | 6 +++++- workflows/phylogenetics/wf_augur.wdl | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index 71a0be0b0..15da6bb51 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -327,11 +327,13 @@ task set_flu_defaults { # establish flu default values for augur task set_mpxv_defaults { # establish mpxv default values for augur input { # in the future we will wget from the repo directly - File mpxv_lat_longs_tsv = "gs://theiagen-public-files-rp/terra/flu-references/lat_longs.tsv" + File mpxv_lat_longs_tsv = "gs://theiagen-public-files-rp/terra/flu-references/lat_longs.tsv" #more comprehensive File mpxv_clades_tsv = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_clades.tsv" File mpxv_reference_fasta = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" File mpxv_reference_genbank = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" File mpxv_auspice_config = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_auspice_config_mpxv.json" + File mpxv_gene_annotations_gff = "gs://theiagen-public-files-rp/terra/augur-mpox-references/genemap.gff" + File mpxv_colors = "gs://theiagen-public-files-rp/terra/augur-mpox-references/colors_mpxv.tsv" Int disk_size = 50 } @@ -347,6 +349,8 @@ task set_mpxv_defaults { # establish mpxv default values for augur File reference_fasta = mpxv_reference_fasta File reference_genbank = mpxv_reference_genbank File auspice_config = mpxv_auspice_config + File genes = mpxv_gene_annotations_gff + File colors = mpxv_colors # inherited from flu defaults Float min_date = 2020.0 Int pivot_interval = 1 diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index 0d3dd7c51..189820905 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -23,6 +23,8 @@ workflow augur { String build_name File? reference_fasta File? reference_genbank + File? genes + File? colors Int? min_num_unambig String organism = "sars-cov-2" # options: sars-cov-2 or flu String flu_segment = "HA" # options: HA or NA @@ -52,7 +54,7 @@ workflow augur { flu_subtype = flu_subtype } } - if (organism == "MPXV") { + if (organism == "MPXV" || organism == "mpxv" || organism == "monkeypox") { call augur_utils.set_mpxv_defaults as mpxv_defaults { # establish default parameters for mpxv input: } @@ -101,6 +103,7 @@ workflow augur { refined_tree = augur_refine.refined_tree, ancestral_nt_muts_json = augur_ancestral.ancestral_nt_muts_json, reference_genbank = select_first([reference_genbank, sc2_defaults.reference_genbank, flu_defaults.reference_genbank, mpxv_defaults.reference_genbank]), + genes = select_first([genes, mpxv_defaults.genes]), build_name = build_name } if (flu_segment == "HA") { # we only have clade information for HA segments (but SC2 defaults will be selected first) @@ -126,6 +129,7 @@ workflow augur { augur_translate.translated_aa_muts_json, augur_clades.clade_assignments_json]), build_name = build_name, + colors_tsv = select_first([colors, mpxv_defaults.colors]), lat_longs_tsv = select_first([sc2_defaults.lat_longs_tsv, flu_defaults.lat_longs_tsv, mpxv_defaults.lat_longs_tsv, lat_longs_tsv]), auspice_config = select_first([sc2_defaults.auspice_config, flu_defaults.auspice_config, mpxv_defaults.auspice_config, auspice_config]) } From 109220ae80c77ef8163e0ccafd8ccad74154876d Mon Sep 17 00:00:00 2001 From: jrotieno Date: Wed, 26 Jul 2023 14:47:05 +0000 Subject: [PATCH 11/36] ancestral reference instead of NC_063383.fasta --- tasks/utilities/task_augur_utilities.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index 15da6bb51..d7b42d350 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -329,7 +329,7 @@ task set_mpxv_defaults { # establish mpxv default values for augur # in the future we will wget from the repo directly File mpxv_lat_longs_tsv = "gs://theiagen-public-files-rp/terra/flu-references/lat_longs.tsv" #more comprehensive File mpxv_clades_tsv = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_clades.tsv" - File mpxv_reference_fasta = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" + File mpxv_reference_fasta = "gs://theiagen-public-files-rp/terra/augur-mpox-references/reconstructed_ancestral_mpox.fasta" File mpxv_reference_genbank = "gs://theiagen-public-files-rp/terra/augur-mpox-references/NC_063383.1_reference.gb" File mpxv_auspice_config = "gs://theiagen-public-files-rp/terra/augur-mpox-references/mpox_auspice_config_mpxv.json" File mpxv_gene_annotations_gff = "gs://theiagen-public-files-rp/terra/augur-mpox-references/genemap.gff" From 61f6f49abc615419b86691be2b8e15900a7f83e5 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 10:40:45 +0000 Subject: [PATCH 12/36] adding traits task to infer ancestral traits when clades task isn't working --- .../augur/task_augur_traits.wdl | 39 +++++++++++++++++++ workflows/phylogenetics/wf_augur.wdl | 15 ++++++- 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tasks/phylogenetic_inference/augur/task_augur_traits.wdl diff --git a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl new file mode 100644 index 000000000..ec84eceb6 --- /dev/null +++ b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl @@ -0,0 +1,39 @@ +version 1.0 + +task augur_traits { + input { + File refined_tree + File metadata + File? weights + Boolean confidence = true + String? metadata_id_columns + String columns + String build_name + + Int mem_size = 30 + Int disk_size = 100 + } + command <<< + AUGUR_RECURSION_LIMIT=10000 augur traits \ + --tree "~{refined_tree}" \ + --metadata "~{metadata}" \ + --columns "~{columns}" \ + --confidence "~{confidence}" \ + ~{'--metadata-id-columns ' + metadata_id_columns} \ + ~{'--weights ' + weights} + --output-node-data "~{build_name}_traits.json" + >>> + output { + File traits_assignments_json = "~{build_name}_traits.json" + } + runtime { + docker: "us-docker.pkg.dev/general-theiagen/biocontainers/augur:22.0.2--pyhdfd78af_0" + memory: mem_size + " GB" + cpu: 4 + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" + dx_instance_type: "mem3_ssd2_x4" + preemptible: 0 + maxRetries: 3 + } +} \ No newline at end of file diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index 189820905..f0484e96f 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -5,6 +5,7 @@ import "../../tasks/utilities/task_augur_utilities.wdl" as augur_utils import "../../tasks/phylogenetic_inference/augur/task_augur_align.wdl" as align_task import "../../tasks/phylogenetic_inference/augur/task_augur_ancestral.wdl" as ancestral_task +import "../../tasks/phylogenetic_inference/augur/task_augur_traits.wdl" as traits_task import "../../tasks/phylogenetic_inference/augur/task_augur_clades.wdl" as clades_task import "../../tasks/phylogenetic_inference/augur/task_augur_export.wdl" as export_task import "../../tasks/phylogenetic_inference/augur/task_augur_refine.wdl" as refine_task @@ -31,6 +32,8 @@ workflow augur { String? flu_subtype # options: "Victoria" "Yamagata" "H3N2" "H1N1" File? clades_tsv + Boolean run_traits = false # by default, do not run traits + String? augur_trait_columns # comma-separated list of columns to use for traits # these are very minimal files that hopefully will prevent workflow failure but will not provide any useful information File lat_longs_tsv = "gs://theiagen-public-files-rp/terra/augur-defaults/minimal-lat-longs.tsv" File auspice_config = "gs://theiagen-public-files-rp/terra/augur-defaults/minimal-auspice-config.json" @@ -107,9 +110,18 @@ workflow augur { build_name = build_name } if (flu_segment == "HA") { # we only have clade information for HA segments (but SC2 defaults will be selected first) + if (run_traits) { # by default do not run traits and clades will be assigned based on the clades_tsv + call traits_task.augur_traits { + input: + refined_tree = augur_refine.refined_tree, + metadata = tsv_join.out_tsv, + columns = select_first([augur_trait_columns, "lineage, clade, clade_membership"]), + build_name = build_name + } + } if (defined(clades_tsv) || defined(sc2_defaults.clades_tsv) || defined(flu_defaults.clades_tsv) || defined(mpxv_defaults.clades_tsv) ) { # one of these must be present call clades_task.augur_clades { # assign clades to nodes based on amino-acid or nucleotide signatures - input: + input: refined_tree = augur_refine.refined_tree, ancestral_nt_muts_json = augur_ancestral.ancestral_nt_muts_json, translated_aa_muts_json = augur_translate.translated_aa_muts_json, @@ -161,6 +173,7 @@ workflow augur { File aligned_fastas = augur_align.aligned_fasta File combined_assemblies = filter_sequences_by_length.filtered_fasta File metadata_merged = tsv_join.out_tsv + File? traits_json = augur_traits.traits_assignments_json # list of samples that were kept and met the length filters File keep_list = fasta_to_ids.ids_txt From cdf84f6d2210743ed9d48c388d334271268b2a77 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 11:36:56 +0000 Subject: [PATCH 13/36] traits confidence --- tasks/phylogenetic_inference/augur/task_augur_traits.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl index ec84eceb6..dfaa712e0 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl @@ -5,7 +5,7 @@ task augur_traits { File refined_tree File metadata File? weights - Boolean confidence = true + #Boolean confidence = true String? metadata_id_columns String columns String build_name @@ -18,7 +18,7 @@ task augur_traits { --tree "~{refined_tree}" \ --metadata "~{metadata}" \ --columns "~{columns}" \ - --confidence "~{confidence}" \ + --confidence \ ~{'--metadata-id-columns ' + metadata_id_columns} \ ~{'--weights ' + weights} --output-node-data "~{build_name}_traits.json" From 6324a2492d5972aa741cee3071c490d3582b7345 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 12:35:59 +0000 Subject: [PATCH 14/36] updated traits task --- tasks/phylogenetic_inference/augur/task_augur_traits.wdl | 2 +- workflows/phylogenetics/wf_augur.wdl | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl index dfaa712e0..182ddfdba 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl @@ -20,7 +20,7 @@ task augur_traits { --columns "~{columns}" \ --confidence \ ~{'--metadata-id-columns ' + metadata_id_columns} \ - ~{'--weights ' + weights} + ~{'--weights ' + weights} \ --output-node-data "~{build_name}_traits.json" >>> output { diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index f0484e96f..c1122b8a8 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -115,7 +115,7 @@ workflow augur { input: refined_tree = augur_refine.refined_tree, metadata = tsv_join.out_tsv, - columns = select_first([augur_trait_columns, "lineage, clade, clade_membership"]), + columns = select_first([augur_trait_columns, "lineage clade clade_membership"]), # default to these columns if none are specified build_name = build_name } } @@ -139,7 +139,8 @@ workflow augur { augur_refine.branch_lengths, augur_ancestral.ancestral_nt_muts_json, augur_translate.translated_aa_muts_json, - augur_clades.clade_assignments_json]), + augur_clades.clade_assignments_json, + augur_traits.traits_assignments_json]), build_name = build_name, colors_tsv = select_first([colors, mpxv_defaults.colors]), lat_longs_tsv = select_first([sc2_defaults.lat_longs_tsv, flu_defaults.lat_longs_tsv, mpxv_defaults.lat_longs_tsv, lat_longs_tsv]), From ab0710b93110bac6e0a66f959a6a2d67dc0337bc Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 13:14:46 +0000 Subject: [PATCH 15/36] update augur prep fields --- tasks/utilities/task_augur_utilities.wdl | 2 +- workflows/phylogenetics/wf_augur.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index d7b42d350..f2022bcdb 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -394,7 +394,7 @@ task prep_augur_metadata { fi # if pango_lineage defined, add to metadata if [[ "~{nextclade_clade}" ]]; then - nextclade_header="pango_lineage" + nextclade_header="nextclade_clade" fi if [[ "~{organism}" == "sars-cov-2" ]]; then diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index c1122b8a8..062ec30ef 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -115,7 +115,7 @@ workflow augur { input: refined_tree = augur_refine.refined_tree, metadata = tsv_join.out_tsv, - columns = select_first([augur_trait_columns, "lineage clade clade_membership"]), # default to these columns if none are specified + columns = select_first([augur_trait_columns, "pango_lineage nextclade_clade"]), # default to these columns if none are specified build_name = build_name } } From 18a61856931ca338e9b06845c7cf2310f91f41d9 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 13:46:44 +0000 Subject: [PATCH 16/36] modifying column input --- tasks/phylogenetic_inference/augur/task_augur_traits.wdl | 2 +- workflows/phylogenetics/wf_augur.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl index 182ddfdba..8955a6c73 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl @@ -17,7 +17,7 @@ task augur_traits { AUGUR_RECURSION_LIMIT=10000 augur traits \ --tree "~{refined_tree}" \ --metadata "~{metadata}" \ - --columns "~{columns}" \ + ~{'--columns ' + columns} \ --confidence \ ~{'--metadata-id-columns ' + metadata_id_columns} \ ~{'--weights ' + weights} \ diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index 062ec30ef..d14c51ddd 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -115,7 +115,7 @@ workflow augur { input: refined_tree = augur_refine.refined_tree, metadata = tsv_join.out_tsv, - columns = select_first([augur_trait_columns, "pango_lineage nextclade_clade"]), # default to these columns if none are specified + columns = select_first([augur_trait_columns, "pango_lineage,nextclade_clade"]), # default to these columns if none are specified build_name = build_name } } From 5827a6094f7c2701774ae323e43238d72b9c29e1 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 15:58:57 +0000 Subject: [PATCH 17/36] changes to traits task and augur prep --- tasks/phylogenetic_inference/augur/task_augur_traits.wdl | 2 +- tasks/utilities/task_augur_utilities.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl index 8955a6c73..3f3f1414b 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_traits.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_traits.wdl @@ -17,7 +17,7 @@ task augur_traits { AUGUR_RECURSION_LIMIT=10000 augur traits \ --tree "~{refined_tree}" \ --metadata "~{metadata}" \ - ~{'--columns ' + columns} \ + ~{'--columns {' + columns + '}'} \ --confidence \ ~{'--metadata-id-columns ' + metadata_id_columns} \ ~{'--weights ' + weights} \ diff --git a/tasks/utilities/task_augur_utilities.wdl b/tasks/utilities/task_augur_utilities.wdl index f2022bcdb..575c0f6e6 100644 --- a/tasks/utilities/task_augur_utilities.wdl +++ b/tasks/utilities/task_augur_utilities.wdl @@ -394,7 +394,7 @@ task prep_augur_metadata { fi # if pango_lineage defined, add to metadata if [[ "~{nextclade_clade}" ]]; then - nextclade_header="nextclade_clade" + nextclade_header="clade_membership" fi if [[ "~{organism}" == "sars-cov-2" ]]; then From c011bf2d0ac9f0307941dad9ea5f12b1698c8cec Mon Sep 17 00:00:00 2001 From: jrotieno Date: Tue, 1 Aug 2023 17:11:24 +0000 Subject: [PATCH 18/36] updated conditional for traits vs clades --- workflows/phylogenetics/wf_augur.wdl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index d14c51ddd..1e1de8095 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -115,19 +115,21 @@ workflow augur { input: refined_tree = augur_refine.refined_tree, metadata = tsv_join.out_tsv, - columns = select_first([augur_trait_columns, "pango_lineage,nextclade_clade"]), # default to these columns if none are specified + columns = select_first([augur_trait_columns, "pango_lineage,clade_membership"]), # default to these columns if none are specified build_name = build_name } } - if (defined(clades_tsv) || defined(sc2_defaults.clades_tsv) || defined(flu_defaults.clades_tsv) || defined(mpxv_defaults.clades_tsv) ) { # one of these must be present - call clades_task.augur_clades { # assign clades to nodes based on amino-acid or nucleotide signatures - input: - refined_tree = augur_refine.refined_tree, - ancestral_nt_muts_json = augur_ancestral.ancestral_nt_muts_json, - translated_aa_muts_json = augur_translate.translated_aa_muts_json, - reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta, mpxv_defaults.reference_fasta]), - build_name = build_name, - clades_tsv = select_first([clades_tsv, sc2_defaults.clades_tsv, flu_defaults.clades_tsv, mpxv_defaults.clades_tsv]) + if (! run_traits) { + if (defined(clades_tsv) || defined(sc2_defaults.clades_tsv) || defined(flu_defaults.clades_tsv) || defined(mpxv_defaults.clades_tsv) ) { # one of these must be present + call clades_task.augur_clades { # assign clades to nodes based on amino-acid or nucleotide signatures + input: + refined_tree = augur_refine.refined_tree, + ancestral_nt_muts_json = augur_ancestral.ancestral_nt_muts_json, + translated_aa_muts_json = augur_translate.translated_aa_muts_json, + reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta, mpxv_defaults.reference_fasta]), + build_name = build_name, + clades_tsv = select_first([clades_tsv, sc2_defaults.clades_tsv, flu_defaults.clades_tsv, mpxv_defaults.clades_tsv]) + } } } } From ee3b711bac0d31448eea9f8aaf262a9ac99b0ef6 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Wed, 16 Aug 2023 17:36:33 +0000 Subject: [PATCH 19/36] adding the option to skip augur align when user has an alignment already --- .../augur/task_augur_tree.wdl | 4 ++ workflows/phylogenetics/wf_augur.wdl | 52 +++++++++++-------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl index e18b5c703..9aad90396 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl @@ -14,6 +14,9 @@ task augur_tree { Int disk_size = 750 } command <<< + # capture version information + augur version > VERSION + AUGUR_RECURSION_LIMIT=10000 augur tree \ --alignment "~{aligned_fasta}" \ --output "~{build_name}_~{method}.nwk" \ @@ -26,6 +29,7 @@ task augur_tree { >>> output { File aligned_tree = "~{build_name}_~{method}.nwk" + String augur_version = read_string("VERSION") } runtime { docker: "us-docker.pkg.dev/general-theiagen/biocontainers/augur:22.0.2--pyhdfd78af_0" diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index 1e1de8095..1e209c698 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -27,9 +27,11 @@ workflow augur { File? genes File? colors Int? min_num_unambig - String organism = "sars-cov-2" # options: sars-cov-2 or flu + String organism = "sars-cov-2" # options: sars-cov-2 or flu or mpxv String flu_segment = "HA" # options: HA or NA String? flu_subtype # options: "Victoria" "Yamagata" "H3N2" "H1N1" + Boolean skip_alignment = false # by default, do not skip alignment + File? alignment_fasta # if alignment is skipped, provide an alignment File? clades_tsv Boolean run_traits = false # by default, do not run traits @@ -40,11 +42,6 @@ workflow augur { Boolean distance_tree_only = false # by default, do not skip making a time tree } - call file_handling.cat_files { # concatenate all of the input fasta files together - input: - files_to_cat = assembly_fastas, - concatenated_file_name = "~{build_name}_concatenated.fasta" - } if (organism == "sars-cov-2") { call augur_utils.set_sc2_defaults as sc2_defaults { # establish default parameters for sars-cov-2 input: @@ -62,35 +59,44 @@ workflow augur { input: } } - call augur_utils.filter_sequences_by_length { # remove any sequences that do not meet the quality threshold - input: - sequences_fasta = cat_files.concatenated_files, - min_non_N = select_first([min_num_unambig, sc2_defaults.min_num_unambig, flu_defaults.min_num_unambig, mpxv_defaults.min_num_unambig]), - } - call align_task.augur_align { # perform mafft alignment on the sequences - input: - assembly_fasta = filter_sequences_by_length.filtered_fasta, - reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta, mpxv_defaults.reference_fasta]), - } call augur_utils.tsv_join { # merge the metadata files input: input_tsvs = sample_metadata_tsvs, id_col = "strain", out_basename = "metadata-merged" } + if (! skip_alignment) { # by default, continue + call file_handling.cat_files { # concatenate all of the input fasta files together + input: + files_to_cat = assembly_fastas, + concatenated_file_name = "~{build_name}_concatenated.fasta" + } + } + call augur_utils.filter_sequences_by_length { # remove any sequences that do not meet the quality threshold + input: + sequences_fasta = select_first([cat_files.concatenated_files, alignment_fasta]), + min_non_N = select_first([min_num_unambig, sc2_defaults.min_num_unambig, flu_defaults.min_num_unambig, mpxv_defaults.min_num_unambig]), + } + if (! skip_alignment) { # by default, continue + call align_task.augur_align { # perform mafft alignment on the sequences + input: + assembly_fasta = filter_sequences_by_length.filtered_fasta, + reference_fasta = select_first([reference_fasta, sc2_defaults.reference_fasta, flu_defaults.reference_fasta, mpxv_defaults.reference_fasta]), + } + } call augur_utils.fasta_to_ids { # extract list of remaining sequences (so we know which ones were dropped) input: - sequences_fasta = augur_align.aligned_fasta + sequences_fasta = select_first([augur_align.aligned_fasta, filter_sequences_by_length.filtered_fasta]) } call tree_task.augur_tree { # create a "draft" (or distance) augur tree input: - aligned_fasta = augur_align.aligned_fasta, + aligned_fasta = select_first([augur_align.aligned_fasta, filter_sequences_by_length.filtered_fasta]), build_name = build_name } if (! distance_tree_only) { # by default, continue call refine_task.augur_refine { # create a timetree (aka, refine augur tree) input: - aligned_fasta = augur_align.aligned_fasta, + aligned_fasta = select_first([augur_align.aligned_fasta, filter_sequences_by_length.filtered_fasta]), draft_augur_tree = augur_tree.aligned_tree, metadata = tsv_join.out_tsv, build_name = build_name @@ -98,7 +104,7 @@ workflow augur { call ancestral_task.augur_ancestral { # infer ancestral sequences input: refined_tree = augur_refine.refined_tree, - aligned_fasta = augur_align.aligned_fasta, + aligned_fasta = select_first([augur_align.aligned_fasta, filter_sequences_by_length.filtered_fasta]), build_name = build_name } call translate_task.augur_translate { # translate gene regions from nucleotides to amino acids @@ -152,7 +158,7 @@ workflow augur { call snp_dists_task.snp_dists { # create a snp matrix from the alignment input: cluster_name = build_name, - alignment = augur_align.aligned_fasta + alignment = select_first([augur_align.aligned_fasta,filter_sequences_by_length.filtered_fasta]) } call reorder_matrix_task.reorder_matrix { # reorder snp matrix to match distance tree input: @@ -167,13 +173,13 @@ workflow augur { # version capture String augur_phb_version = version_capture.phb_version String augur_phb_analysis_date = version_capture.date - String augur_version = augur_align.augur_version + String augur_version = augur_tree.augur_version # augur outputs File? auspice_input_json = augur_export.auspice_json File? time_tree = augur_refine.refined_tree File distance_tree = augur_tree.aligned_tree - File aligned_fastas = augur_align.aligned_fasta + File aligned_fastas = select_first([augur_align.aligned_fasta, alignment_fasta]) File combined_assemblies = filter_sequences_by_length.filtered_fasta File metadata_merged = tsv_join.out_tsv File? traits_json = augur_traits.traits_assignments_json From a41c45414fb7091dd7e6d4515852badf8b8f0fb3 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 17 Aug 2023 11:57:37 +0000 Subject: [PATCH 20/36] Add memory to augur tasks --- tasks/phylogenetic_inference/augur/task_augur_translate.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_translate.wdl b/tasks/phylogenetic_inference/augur/task_augur_translate.wdl index b2efe1a7d..1dd9a65f3 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_translate.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_translate.wdl @@ -10,6 +10,7 @@ task augur_translate { File? genes # a file containing list of genes to translate (from nucleotides to amino acids) Int disk_size = 50 + Int mem_size = 32 } command <<< AUGUR_RECURSION_LIMIT=10000 augur translate \ @@ -24,7 +25,7 @@ task augur_translate { } runtime { docker: "us-docker.pkg.dev/general-theiagen/biocontainers/augur:22.0.2--pyhdfd78af_0" - memory: "2 GB" + memory: mem_size + " GB" cpu : 1 disks: "local-disk " + disk_size + " HDD" disk: disk_size + " GB" From f4e1c460fca4fedf46920eec94ff8cb150029564 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 17 Aug 2023 15:53:47 +0000 Subject: [PATCH 21/36] increase memory for augur align --- tasks/phylogenetic_inference/augur/task_augur_align.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_align.wdl b/tasks/phylogenetic_inference/augur/task_augur_align.wdl index 30c8a7fff..5d9afa2a2 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_align.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_align.wdl @@ -6,7 +6,7 @@ task augur_align { File reference_fasta Boolean fill_gaps = false Int cpus = 64 - Int mem_size = 32 + Int mem_size = 128 Int disk_size = 750 } command <<< From 80a8eb92cd62f5d61f4db835ecd2b5d692d2febd Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 07:37:12 +0000 Subject: [PATCH 22/36] removed the clock filter setting from 4, making it optional input --- tasks/phylogenetic_inference/augur/task_augur_refine.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/phylogenetic_inference/augur/task_augur_refine.wdl b/tasks/phylogenetic_inference/augur/task_augur_refine.wdl index 543b5d58f..1b8982a83 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_refine.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_refine.wdl @@ -19,7 +19,7 @@ task augur_refine { String date_inference = "marginal" # assign internal nodes to their marginally most likley dates (joint, marginal) String? branch_length_inference # branch length mode of treetime to use (auto, joint, marginal, input; default: auto) String? coalescent # coalescent time scale in units of inverse clock rate (float), optimize as scalar ("opt") or skyline (skyline) - Int clock_filter_iqd = 4 # remove tips that deviate more than n_iqd interquartile ranges from the root-to-tip vs time regression + Int? clock_filter_iqd # remove tips that deviate more than n_iqd interquartile ranges from the root-to-tip vs time regression String divergence_units = "mutations" # units in which sequence divergences is exported ("mutations" or "mutations-per-site") Int disk_size = 100 From ddfe22d4358c708ed7595e8ae5692bf98b9de238 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 16:54:43 +0000 Subject: [PATCH 23/36] nextclade task to use dataset json as reference --- tasks/taxon_id/task_nextclade_addSamples.wdl | 68 +++++++++++++++++++ .../wf_nextclade_addToRefTree.wdl | 21 +++--- 2 files changed, 77 insertions(+), 12 deletions(-) create mode 100644 tasks/taxon_id/task_nextclade_addSamples.wdl diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl new file mode 100644 index 000000000..6c68ad8d9 --- /dev/null +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -0,0 +1,68 @@ +version 1.0 + +task nextclade { + meta { + description: "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults." + } + input { + File genome_fasta + File? root_sequence + File? reference_tree_json + File? qc_config_json + File? gene_annotations_gff + File? pcr_primers_csv + File? virus_properties + String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:2.14.0" + String dataset_name + String? dataset_reference + String? dataset_tag + Int disk_size = 50 + } + String basename = basename(genome_fasta, ".fasta") + command <<< + NEXTCLADE_VERSION="$(nextclade --version)" + echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION + + nextclade dataset get \ + --name="~{dataset_name}" \ + ~{"--reference " + dataset_reference} \ + ~{"--tag " + dataset_tag} \ + -o nextclade_dataset_dir \ + --verbose + + # If no referece sequence is provided, use the reference tree from the dataset + if [ -z "~{reference_tree_json}" ]; then + reference_tree_json=nextclade_dataset_dir/tree.json + fi + set -e + nextclade run \ + --input-dataset=nextclade_dataset_dir/ \ + ~{"--input-root-seq " + root_sequence} \ + --input-tree ~{reference_tree_json} \ + ~{"--input-qc-config " + qc_config_json} \ + ~{"--input-gene-map " + gene_annotations_gff} \ + ~{"--input-pcr-primers " + pcr_primers_csv} \ + ~{"--input-virus-properties " + virus_properties} \ + --output-json "~{basename}".nextclade.json \ + --output-tsv "~{basename}".nextclade.tsv \ + --output-tree "~{basename}".nextclade.auspice.json \ + --output-all=. \ + "~{genome_fasta}" + >>> + runtime { + docker: "~{docker}" + memory: "8 GB" + cpu: 2 + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" # TES + dx_instance_type: "mem1_ssd1_v2_x2" + maxRetries: 3 + } + output { + String nextclade_version = read_string("NEXTCLADE_VERSION") + File nextclade_json = "~{basename}.nextclade.json" + File auspice_json = "~{basename}.nextclade.auspice.json" + File nextclade_tsv = "~{basename}.nextclade.tsv" + String nextclade_docker = docker + } +} \ No newline at end of file diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index abbc43072..616423d38 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -1,6 +1,6 @@ version 1.0 -import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade_analysis +import "../../tasks/taxon_id/task_nextclade_addSamples.wdl" as nextclade_analysis workflow nextclade_addToRefTree { meta { @@ -8,28 +8,25 @@ workflow nextclade_addToRefTree { } input { File assembly_fastas - String build_name - File root_sequence_fasta - #File? gene_annotations_gff - File reference_tree_json + File? root_sequence_fasta + File? gene_annotations_gff + File? reference_tree_json File? qc_config_json File? pcr_primers_csv File? virus_properties - String docker = "nextstrain/nextclade:2.13.0" - String dataset_name = "MPXV" - String dataset_reference = "ancestral" - String dataset_tag = "2023-01-26T12:00:00Z" + String dataset_name + String? dataset_reference + String? dataset_tag } call nextclade_analysis.nextclade { # nextclade analysis input: genome_fasta = assembly_fastas, root_sequence = root_sequence_fasta, - auspice_reference_tree_json = reference_tree_json, + reference_tree_json = reference_tree_json, qc_config_json = qc_config_json, - #gene_annotations_json = gene_annotations_gff, + gene_annotations_gff = gene_annotations_gff, pcr_primers_csv = pcr_primers_csv, virus_properties = virus_properties, - docker = docker, dataset_name = dataset_name, dataset_reference = dataset_reference, dataset_tag = dataset_tag From f0c0ae1c504240c9a801f0d6e23c539b7c5fd53c Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 17:50:07 +0000 Subject: [PATCH 24/36] fix input tree --- tasks/taxon_id/task_nextclade_addSamples.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index 6c68ad8d9..784b90e5d 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -31,14 +31,17 @@ task nextclade { --verbose # If no referece sequence is provided, use the reference tree from the dataset - if [ -z "~{reference_tree_json}" ]; then + if [[ ! -z "~{reference_tree_json}" ]]; then reference_tree_json=nextclade_dataset_dir/tree.json + else + reference_tree_json="~{reference_tree_json}" fi + set -e nextclade run \ --input-dataset=nextclade_dataset_dir/ \ ~{"--input-root-seq " + root_sequence} \ - --input-tree ~{reference_tree_json} \ + --input-tree ${reference_tree_json} \ ~{"--input-qc-config " + qc_config_json} \ ~{"--input-gene-map " + gene_annotations_gff} \ ~{"--input-pcr-primers " + pcr_primers_csv} \ From c0483b3e441efe88bc57132b182a20e57008fa35 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 18:44:17 +0000 Subject: [PATCH 25/36] . --- tasks/taxon_id/task_nextclade_addSamples.wdl | 38 +++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index 784b90e5d..e496499be 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -31,26 +31,29 @@ task nextclade { --verbose # If no referece sequence is provided, use the reference tree from the dataset - if [[ ! -z "~{reference_tree_json}" ]]; then - reference_tree_json=nextclade_dataset_dir/tree.json + if [ ! -z "~{reference_tree_json}" ]; then + echo "Default reference tree JSON will be used" + cp nextclade_dataset_dir/tree.json reference_tree.json + tree_json="reference_tree.json" else - reference_tree_json="~{reference_tree_json}" + echo "User reference tree JSON will be used" + tree_json="~{reference_tree_json}" fi - set -e - nextclade run \ - --input-dataset=nextclade_dataset_dir/ \ - ~{"--input-root-seq " + root_sequence} \ - --input-tree ${reference_tree_json} \ - ~{"--input-qc-config " + qc_config_json} \ - ~{"--input-gene-map " + gene_annotations_gff} \ - ~{"--input-pcr-primers " + pcr_primers_csv} \ - ~{"--input-virus-properties " + virus_properties} \ - --output-json "~{basename}".nextclade.json \ - --output-tsv "~{basename}".nextclade.tsv \ - --output-tree "~{basename}".nextclade.auspice.json \ - --output-all=. \ - "~{genome_fasta}" + # set -e + # nextclade run \ + # --input-dataset=nextclade_dataset_dir/ \ + # ~{"--input-root-seq " + root_sequence} \ + # --input-tree ${reference_tree_json} \ + # ~{"--input-qc-config " + qc_config_json} \ + # ~{"--input-gene-map " + gene_annotations_gff} \ + # ~{"--input-pcr-primers " + pcr_primers_csv} \ + # ~{"--input-virus-properties " + virus_properties} \ + # --output-json "~{basename}".nextclade.json \ + # --output-tsv "~{basename}".nextclade.tsv \ + # --output-tree "~{basename}".nextclade.auspice.json \ + # --output-all=. \ + # "~{genome_fasta}" >>> runtime { docker: "~{docker}" @@ -67,5 +70,6 @@ task nextclade { File auspice_json = "~{basename}.nextclade.auspice.json" File nextclade_tsv = "~{basename}.nextclade.tsv" String nextclade_docker = docker + File nextclade_ref_tree_json = "reference_tree.json" } } \ No newline at end of file From c81b9262c08f7841e91c23447126a517af943caa Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 19:04:38 +0000 Subject: [PATCH 26/36] . --- tasks/taxon_id/task_nextclade_addSamples.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index e496499be..c82271572 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -31,8 +31,8 @@ task nextclade { --verbose # If no referece sequence is provided, use the reference tree from the dataset - if [ ! -z "~{reference_tree_json}" ]; then - echo "Default reference tree JSON will be used" + if [ -z "~{reference_tree_json}" ]; then + echo "Default dataset reference tree JSON will be used" cp nextclade_dataset_dir/tree.json reference_tree.json tree_json="reference_tree.json" else @@ -66,10 +66,10 @@ task nextclade { } output { String nextclade_version = read_string("NEXTCLADE_VERSION") - File nextclade_json = "~{basename}.nextclade.json" - File auspice_json = "~{basename}.nextclade.auspice.json" - File nextclade_tsv = "~{basename}.nextclade.tsv" + #File nextclade_json = "~{basename}.nextclade.json" + #File auspice_json = "~{basename}.nextclade.auspice.json" + #File nextclade_tsv = "~{basename}.nextclade.tsv" String nextclade_docker = docker - File nextclade_ref_tree_json = "reference_tree.json" + #File nextclade_ref_tree_json = select_first(["~{reference_tree_json}","reference_tree.json"]) } } \ No newline at end of file From 369d52447d0d1e5a7af678911f38ddbfb7592ec8 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 19:08:35 +0000 Subject: [PATCH 27/36] . --- tasks/taxon_id/task_nextclade_addSamples.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index c82271572..f017cf0dc 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -69,6 +69,9 @@ task nextclade { #File nextclade_json = "~{basename}.nextclade.json" #File auspice_json = "~{basename}.nextclade.auspice.json" #File nextclade_tsv = "~{basename}.nextclade.tsv" + File nextclade_json = read_string("test") + File auspice_json = read_string("test") + File nextclade_tsv = read_string("test") String nextclade_docker = docker #File nextclade_ref_tree_json = select_first(["~{reference_tree_json}","reference_tree.json"]) } From 635377672257bbcbcb913205a50017e2722f9b36 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 19:10:48 +0000 Subject: [PATCH 28/36] . --- tasks/taxon_id/task_nextclade_addSamples.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index f017cf0dc..8a57cd485 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -69,9 +69,9 @@ task nextclade { #File nextclade_json = "~{basename}.nextclade.json" #File auspice_json = "~{basename}.nextclade.auspice.json" #File nextclade_tsv = "~{basename}.nextclade.tsv" - File nextclade_json = read_string("test") - File auspice_json = read_string("test") - File nextclade_tsv = read_string("test") + File nextclade_json = "reference_tree.json" + File auspice_json = "reference_tree.json" + File nextclade_tsv = "reference_tree.json" String nextclade_docker = docker #File nextclade_ref_tree_json = select_first(["~{reference_tree_json}","reference_tree.json"]) } From d4570774127903cb82198610f8af8f063cd55192 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 19:26:29 +0000 Subject: [PATCH 29/36] . --- tasks/taxon_id/task_nextclade_addSamples.wdl | 41 ++++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index 8a57cd485..a79deadb9 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -40,20 +40,20 @@ task nextclade { tree_json="~{reference_tree_json}" fi - # set -e - # nextclade run \ - # --input-dataset=nextclade_dataset_dir/ \ - # ~{"--input-root-seq " + root_sequence} \ - # --input-tree ${reference_tree_json} \ - # ~{"--input-qc-config " + qc_config_json} \ - # ~{"--input-gene-map " + gene_annotations_gff} \ - # ~{"--input-pcr-primers " + pcr_primers_csv} \ - # ~{"--input-virus-properties " + virus_properties} \ - # --output-json "~{basename}".nextclade.json \ - # --output-tsv "~{basename}".nextclade.tsv \ - # --output-tree "~{basename}".nextclade.auspice.json \ - # --output-all=. \ - # "~{genome_fasta}" + set -e + nextclade run \ + --input-dataset=nextclade_dataset_dir/ \ + ~{"--input-root-seq " + root_sequence} \ + --input-tree ${tree_json} \ + ~{"--input-qc-config " + qc_config_json} \ + ~{"--input-gene-map " + gene_annotations_gff} \ + ~{"--input-pcr-primers " + pcr_primers_csv} \ + ~{"--input-virus-properties " + virus_properties} \ + --output-json "~{basename}".nextclade.json \ + --output-tsv "~{basename}".nextclade.tsv \ + --output-tree "~{basename}".nextclade.auspice.json \ + --output-all=. \ + "~{genome_fasta}" >>> runtime { docker: "~{docker}" @@ -66,13 +66,12 @@ task nextclade { } output { String nextclade_version = read_string("NEXTCLADE_VERSION") - #File nextclade_json = "~{basename}.nextclade.json" - #File auspice_json = "~{basename}.nextclade.auspice.json" - #File nextclade_tsv = "~{basename}.nextclade.tsv" - File nextclade_json = "reference_tree.json" - File auspice_json = "reference_tree.json" - File nextclade_tsv = "reference_tree.json" + File nextclade_json = "~{basename}.nextclade.json" + File auspice_json = "~{basename}.nextclade.auspice.json" + File nextclade_tsv = "~{basename}.nextclade.tsv" + #File nextclade_json = "reference_tree.json" + #File auspice_json = "reference_tree.json" + #File nextclade_tsv = "reference_tree.json" String nextclade_docker = docker - #File nextclade_ref_tree_json = select_first(["~{reference_tree_json}","reference_tree.json"]) } } \ No newline at end of file From 17ca4557b33f5f95640b0b3a97d316d1342649c9 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 19:50:06 +0000 Subject: [PATCH 30/36] . --- tasks/taxon_id/task_nextclade_addSamples.wdl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_addSamples.wdl index a79deadb9..07f04b242 100644 --- a/tasks/taxon_id/task_nextclade_addSamples.wdl +++ b/tasks/taxon_id/task_nextclade_addSamples.wdl @@ -34,12 +34,13 @@ task nextclade { if [ -z "~{reference_tree_json}" ]; then echo "Default dataset reference tree JSON will be used" cp nextclade_dataset_dir/tree.json reference_tree.json - tree_json="reference_tree.json" else echo "User reference tree JSON will be used" - tree_json="~{reference_tree_json}" + cp ~{reference_tree_json} reference_tree.json fi + tree_json="reference_tree.json" + set -e nextclade run \ --input-dataset=nextclade_dataset_dir/ \ @@ -69,9 +70,7 @@ task nextclade { File nextclade_json = "~{basename}.nextclade.json" File auspice_json = "~{basename}.nextclade.auspice.json" File nextclade_tsv = "~{basename}.nextclade.tsv" - #File nextclade_json = "reference_tree.json" - #File auspice_json = "reference_tree.json" - #File nextclade_tsv = "reference_tree.json" String nextclade_docker = docker + File netclade_ref_tree = "reference_tree.json" } } \ No newline at end of file From 88846aa2e109a3c8c7e73c6dbe67eb0af44785d0 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 19:53:05 +0000 Subject: [PATCH 31/36] . --- ...task_nextclade_addSamples.wdl => task_nextclade_add_ref.wdl} | 0 workflows/phylogenetics/wf_nextclade_addToRefTree.wdl | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tasks/taxon_id/{task_nextclade_addSamples.wdl => task_nextclade_add_ref.wdl} (100%) diff --git a/tasks/taxon_id/task_nextclade_addSamples.wdl b/tasks/taxon_id/task_nextclade_add_ref.wdl similarity index 100% rename from tasks/taxon_id/task_nextclade_addSamples.wdl rename to tasks/taxon_id/task_nextclade_add_ref.wdl diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index 616423d38..ffbc66c53 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -1,6 +1,6 @@ version 1.0 -import "../../tasks/taxon_id/task_nextclade_addSamples.wdl" as nextclade_analysis +import "../../tasks/taxon_id/task_nextclade_add_ref.wdl" as nextclade_analysis workflow nextclade_addToRefTree { meta { From 802e41436c7cc1d6067151039f397db3f1792e39 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Fri, 18 Aug 2023 20:48:36 +0000 Subject: [PATCH 32/36] . --- tasks/taxon_id/task_nextclade_add_ref.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/taxon_id/task_nextclade_add_ref.wdl b/tasks/taxon_id/task_nextclade_add_ref.wdl index 07f04b242..9ffa2faeb 100644 --- a/tasks/taxon_id/task_nextclade_add_ref.wdl +++ b/tasks/taxon_id/task_nextclade_add_ref.wdl @@ -2,7 +2,7 @@ version 1.0 task nextclade { meta { - description: "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults." + description: "Nextclade task to add samples to either a user specified or a nextclade reference tree." } input { File genome_fasta From f2a740c6abb2f4fb44c4017168de3ba4c377229d Mon Sep 17 00:00:00 2001 From: jrotieno Date: Sat, 19 Aug 2023 05:50:55 +0000 Subject: [PATCH 33/36] use existing nextclade task --- tasks/taxon_id/task_nextclade.wdl | 75 +++++++++++++++++++ .../wf_nextclade_addToRefTree.wdl | 14 ++-- 2 files changed, 82 insertions(+), 7 deletions(-) diff --git a/tasks/taxon_id/task_nextclade.wdl b/tasks/taxon_id/task_nextclade.wdl index aace367a4..5a54865a6 100644 --- a/tasks/taxon_id/task_nextclade.wdl +++ b/tasks/taxon_id/task_nextclade.wdl @@ -165,4 +165,79 @@ task nextclade_output_parser { String nextclade_aa_dels = read_string("NEXTCLADE_AADELS") String nextclade_lineage = read_string("NEXTCLADE_LINEAGE") } +} + +task nextclade_add_ref { + meta { + description: "Nextclade task to add samples to either a user specified or a nextclade reference tree." + } + input { + File genome_fasta + File? root_sequence + File? reference_tree_json + File? qc_config_json + File? gene_annotations_gff + File? pcr_primers_csv + File? virus_properties + String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:2.14.0" + String dataset_name + String? dataset_reference + String? dataset_tag + Int disk_size = 50 + } + String basename = basename(genome_fasta, ".fasta") + command <<< + NEXTCLADE_VERSION="$(nextclade --version)" + echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION + + nextclade dataset get \ + --name="~{dataset_name}" \ + ~{"--reference " + dataset_reference} \ + ~{"--tag " + dataset_tag} \ + -o nextclade_dataset_dir \ + --verbose + + # If no referece sequence is provided, use the reference tree from the dataset + if [ -z "~{reference_tree_json}" ]; then + echo "Default dataset reference tree JSON will be used" + cp nextclade_dataset_dir/tree.json reference_tree.json + else + echo "User reference tree JSON will be used" + cp ~{reference_tree_json} reference_tree.json + fi + + tree_json="reference_tree.json" + + set -e + nextclade run \ + --input-dataset=nextclade_dataset_dir/ \ + ~{"--input-root-seq " + root_sequence} \ + --input-tree ${tree_json} \ + ~{"--input-qc-config " + qc_config_json} \ + ~{"--input-gene-map " + gene_annotations_gff} \ + ~{"--input-pcr-primers " + pcr_primers_csv} \ + ~{"--input-virus-properties " + virus_properties} \ + --output-json "~{basename}".nextclade.json \ + --output-tsv "~{basename}".nextclade.tsv \ + --output-tree "~{basename}".nextclade.auspice.json \ + --output-all=. \ + "~{genome_fasta}" + >>> + runtime { + docker: "~{docker}" + memory: "8 GB" + cpu: 2 + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" # TES + dx_instance_type: "mem1_ssd1_v2_x2" + maxRetries: 3 + } + output { + String nextclade_version = read_string("NEXTCLADE_VERSION") + File nextclade_json = "~{basename}.nextclade.json" + File auspice_json = "~{basename}.nextclade.auspice.json" + File nextclade_tsv = "~{basename}.nextclade.tsv" + String nextclade_docker = docker + File netclade_ref_tree = "reference_tree.json" + } } \ No newline at end of file diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index ffbc66c53..3deb1eee1 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -1,6 +1,6 @@ version 1.0 -import "../../tasks/taxon_id/task_nextclade_add_ref.wdl" as nextclade_analysis +import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade_analysis workflow nextclade_addToRefTree { meta { @@ -18,7 +18,7 @@ workflow nextclade_addToRefTree { String? dataset_reference String? dataset_tag } - call nextclade_analysis.nextclade { # nextclade analysis + call nextclade_analysis.nextclade_add_ref { # nextclade analysis input: genome_fasta = assembly_fastas, root_sequence = root_sequence_fasta, @@ -32,10 +32,10 @@ workflow nextclade_addToRefTree { dataset_tag = dataset_tag } output { - String treeUpdate_nextclade_version = select_first([nextclade.nextclade_version, ""]) - File treeUpdate_nextclade_json = select_first([nextclade.nextclade_json, ""]) - File treeUpdate_auspice_json = select_first([nextclade.auspice_json, ""]) - File treeUpdate_nextclade_tsv = select_first([nextclade.nextclade_tsv, ""]) - String treeUpdate_nextclade_docker = select_first([nextclade.nextclade_docker, ""]) + String treeUpdate_nextclade_version = select_first([nextclade_add_ref.nextclade_version, ""]) + File treeUpdate_nextclade_json = select_first([nextclade_add_ref.nextclade_json, ""]) + File treeUpdate_auspice_json = select_first([nextclade_add_ref.auspice_json, ""]) + File treeUpdate_nextclade_tsv = select_first([nextclade_add_ref.nextclade_tsv, ""]) + String treeUpdate_nextclade_docker = select_first([nextclade_add_ref.nextclade_docker, ""]) } } \ No newline at end of file From f38024684b1638da4cf71376737f0a92a438b73e Mon Sep 17 00:00:00 2001 From: jrotieno Date: Sat, 19 Aug 2023 06:09:02 +0000 Subject: [PATCH 34/36] delete temp task --- tasks/taxon_id/task_nextclade_add_ref.wdl | 76 ----------------------- 1 file changed, 76 deletions(-) delete mode 100644 tasks/taxon_id/task_nextclade_add_ref.wdl diff --git a/tasks/taxon_id/task_nextclade_add_ref.wdl b/tasks/taxon_id/task_nextclade_add_ref.wdl deleted file mode 100644 index 9ffa2faeb..000000000 --- a/tasks/taxon_id/task_nextclade_add_ref.wdl +++ /dev/null @@ -1,76 +0,0 @@ -version 1.0 - -task nextclade { - meta { - description: "Nextclade task to add samples to either a user specified or a nextclade reference tree." - } - input { - File genome_fasta - File? root_sequence - File? reference_tree_json - File? qc_config_json - File? gene_annotations_gff - File? pcr_primers_csv - File? virus_properties - String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:2.14.0" - String dataset_name - String? dataset_reference - String? dataset_tag - Int disk_size = 50 - } - String basename = basename(genome_fasta, ".fasta") - command <<< - NEXTCLADE_VERSION="$(nextclade --version)" - echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION - - nextclade dataset get \ - --name="~{dataset_name}" \ - ~{"--reference " + dataset_reference} \ - ~{"--tag " + dataset_tag} \ - -o nextclade_dataset_dir \ - --verbose - - # If no referece sequence is provided, use the reference tree from the dataset - if [ -z "~{reference_tree_json}" ]; then - echo "Default dataset reference tree JSON will be used" - cp nextclade_dataset_dir/tree.json reference_tree.json - else - echo "User reference tree JSON will be used" - cp ~{reference_tree_json} reference_tree.json - fi - - tree_json="reference_tree.json" - - set -e - nextclade run \ - --input-dataset=nextclade_dataset_dir/ \ - ~{"--input-root-seq " + root_sequence} \ - --input-tree ${tree_json} \ - ~{"--input-qc-config " + qc_config_json} \ - ~{"--input-gene-map " + gene_annotations_gff} \ - ~{"--input-pcr-primers " + pcr_primers_csv} \ - ~{"--input-virus-properties " + virus_properties} \ - --output-json "~{basename}".nextclade.json \ - --output-tsv "~{basename}".nextclade.tsv \ - --output-tree "~{basename}".nextclade.auspice.json \ - --output-all=. \ - "~{genome_fasta}" - >>> - runtime { - docker: "~{docker}" - memory: "8 GB" - cpu: 2 - disks: "local-disk " + disk_size + " SSD" - disk: disk_size + " GB" # TES - dx_instance_type: "mem1_ssd1_v2_x2" - maxRetries: 3 - } - output { - String nextclade_version = read_string("NEXTCLADE_VERSION") - File nextclade_json = "~{basename}.nextclade.json" - File auspice_json = "~{basename}.nextclade.auspice.json" - File nextclade_tsv = "~{basename}.nextclade.tsv" - String nextclade_docker = docker - File netclade_ref_tree = "reference_tree.json" - } -} \ No newline at end of file From 45fdc304f77e4ea68eb5458eb4cfd590b4a67154 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 21 Sep 2023 14:57:40 +0000 Subject: [PATCH 35/36] changing input "dataset_name" to "organism" to be more intuitive --- workflows/phylogenetics/wf_nextclade_addToRefTree.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index 3deb1eee1..80678001f 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -14,7 +14,7 @@ workflow nextclade_addToRefTree { File? qc_config_json File? pcr_primers_csv File? virus_properties - String dataset_name + String organism String? dataset_reference String? dataset_tag } @@ -27,7 +27,7 @@ workflow nextclade_addToRefTree { gene_annotations_gff = gene_annotations_gff, pcr_primers_csv = pcr_primers_csv, virus_properties = virus_properties, - dataset_name = dataset_name, + dataset_name = organism, dataset_reference = dataset_reference, dataset_tag = dataset_tag } From ef3e182ae72eb8fe8a4c37003ed982fb65a8bda7 Mon Sep 17 00:00:00 2001 From: jrotieno Date: Thu, 21 Sep 2023 15:11:50 +0000 Subject: [PATCH 36/36] adding wf date and version capture --- workflows/phylogenetics/wf_nextclade_addToRefTree.wdl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index 80678001f..bd832ba98 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -1,6 +1,7 @@ version 1.0 import "../../tasks/taxon_id/task_nextclade.wdl" as nextclade_analysis +import "../../tasks/task_versioning.wdl" as versioning workflow nextclade_addToRefTree { meta { @@ -31,11 +32,17 @@ workflow nextclade_addToRefTree { dataset_reference = dataset_reference, dataset_tag = dataset_tag } + call versioning.version_capture{ + input: + } output { String treeUpdate_nextclade_version = select_first([nextclade_add_ref.nextclade_version, ""]) File treeUpdate_nextclade_json = select_first([nextclade_add_ref.nextclade_json, ""]) File treeUpdate_auspice_json = select_first([nextclade_add_ref.auspice_json, ""]) File treeUpdate_nextclade_tsv = select_first([nextclade_add_ref.nextclade_tsv, ""]) String treeUpdate_nextclade_docker = select_first([nextclade_add_ref.nextclade_docker, ""]) + # Version Capture + String samples_to_ref_tree_version = version_capture.phb_version + String samples_to_ref_tree_analysis_date = version_capture.date } } \ No newline at end of file