From ad49a36614ab3a5a4713318c429ccd07f5486318 Mon Sep 17 00:00:00 2001 From: Michal Babinski Date: Thu, 14 Nov 2024 14:46:44 -0500 Subject: [PATCH] [Augur] Add augur tree iqtree model type to output (#674) * Add iqtree model extraction to augur_tree task and update workflow output * Refactor augur_tree task to correctly derive FASTA basename and directory for iqtree model extraction * Add iqtree model used field to augur workflow documentation * Handle empty substitution model case in augur_tree task * Ensure iqtree_model_used is a non-nullable String in augur_tree task and workflow * Update augur workflow documentation to include model options for substitution model and rename iqtree model variable for clarity * Add augur_iqtree_model_used variable to documentation for clarity --- .../workflows/phylogenetic_construction/augur.md | 3 ++- .../augur/task_augur_tree.wdl | 16 ++++++++++++++++ workflows/phylogenetics/wf_augur.wdl | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md index 7ccf78d56..d8eb10f9f 100644 --- a/docs/workflows/phylogenetic_construction/augur.md +++ b/docs/workflows/phylogenetic_construction/augur.md @@ -220,7 +220,7 @@ This workflow runs on the set level. Please note that for every task, runtime pa | augur_tree | **exclude_sites** | File | File of one-based sites to exclude for raw tree building (BED format in .bed files, DRM format in tab-delimited files, or one position per line) | | Optional | | augur_tree | **method** | String | Which method to use to build the tree; options: "fasttree", "raxml", "iqtree" | iqtree | Optional | | augur_tree | **override_default_args** | Boolean | If true, override default tree builder arguments instead of augmenting them | FALSE | Optional | -| augur_tree | **substitution_model** | String | The substitution model to use; only available for iqtree. Specify "auto" to run ModelTest; options: "GTR" | GTR | Optional | +| augur_tree | **substitution_model** | String | The substitution model to use; only available for iqtree. Specify "auto" to run ModelTest; model options can be found [here](http://www.iqtree.org/doc/Substitution-Models) | GTR | Optional | | augur_tree | **tree_builder_args** | String | Additional tree builder arguments either augmenting or overriding the default arguments. FastTree defaults: "-nt -nosupport". RAxML defaults: "-f d -m GTRCAT -c 25 -p 235813". IQ-TREE defaults: "-ninit 2 -n 2 -me 0.05 -nt AUTO -redo" | | Optional | | sc2_defaults | **nextstrain_ncov_repo_commit** | String | The version of the from which to draw default values for SARS-CoV-2. | `23d1243127e8838a61b7e5c1a72bc419bf8c5a0d` | Optional | | organism_parameters | **gene_locations_bed_file** | File | Use to provide locations of interest where average coverage will be calculated | Defaults are organism-specific. Please find default values for some organisms here: . For an organism without set defaults, an empty file is provided, "gs://theiagen-public-files/terra/theiacov-files/empty.bed", but will not be as useful as an organism specific gene locations bed file. | Optional | @@ -284,6 +284,7 @@ The Nextstrain team hosts documentation surrounding the Augur workflow → Auspi | **Variable** | **Type** | **Description** | | --- | --- | --- | | aligned_fastas | File | A FASTA file of the aligned genomes | +| augur_iqtree_model_used | String | The iqtree model used during augur tree | | augur_phb_analysis_date | String | The date the analysis was run | | augur_phb_version | String | The version of the Public Health Bioinformatics (PHB) repository used | | augur_version | String | Version of Augur used | diff --git a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl index f16c73618..22bd469e7 100644 --- a/tasks/phylogenetic_inference/augur/task_augur_tree.wdl +++ b/tasks/phylogenetic_inference/augur/task_augur_tree.wdl @@ -28,10 +28,26 @@ task augur_tree { ~{"--tree-builder-args " + tree_builder_args} \ ~{true="--override-default-args" false="" override_default_args} \ --nthreads auto + + # If iqtree, get the model used + if [ "~{method}" == "iqtree" ]; then + if [ "~{substitution_model}" == "auto" ]; then + FASTA_BASENAME=$(basename ~{aligned_fasta} .fasta) + FASTA_DIR=$(dirname ~{aligned_fasta}) + MODEL=$(grep "Best-fit model:" ${FASTA_DIR}/${FASTA_BASENAME}-delim.iqtree.log | sed 's|Best-fit model: ||g;s|chosen.*||' | tr -d '\n\r') + else + MODEL="~{substitution_model}" + fi + echo "$MODEL" > FINAL_MODEL.txt + else + echo "" > FINAL_MODEL.txt + fi >>> + output { File aligned_tree = "~{build_name}_~{method}.nwk" String augur_version = read_string("VERSION") + String iqtree_model_used = read_string("FINAL_MODEL.txt") } runtime { docker: docker diff --git a/workflows/phylogenetics/wf_augur.wdl b/workflows/phylogenetics/wf_augur.wdl index 3398d430f..bb003b705 100644 --- a/workflows/phylogenetics/wf_augur.wdl +++ b/workflows/phylogenetics/wf_augur.wdl @@ -203,6 +203,7 @@ workflow augur { File? auspice_input_json = augur_export.auspice_json File? time_tree = augur_refine.refined_tree File distance_tree = augur_tree.aligned_tree + String augur_iqtree_model_used = augur_tree.iqtree_model_used File aligned_fastas = select_first([augur_align.aligned_fasta, alignment_fasta]) File combined_assemblies = filter_sequences_by_length.filtered_fasta File? metadata_merged = tsv_join.out_tsv