Skip to content

Commit

Permalink
Move rules for annotating phylogeny to its own smk file
Browse files Browse the repository at this point in the history
Part of work to update this repo to match the pathogen-repo-template.
  • Loading branch information
j23414 committed Dec 18, 2023
1 parent a87ad7e commit 2ed2103
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 61 deletions.
62 changes: 1 addition & 61 deletions phylogenetic/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,67 +19,7 @@ files = rules.files.params
include: "workflow/snakemake_rules/usvi.smk"
include: "workflow/snakemake_rules/prepare_sequences.smk"
include: "workflow/snakemake_rules/construct_phylogeny.smk"

rule ancestral:
"""Reconstructing ancestral sequences and mutations"""
input:
tree = "results/tree.nwk",
alignment = "results/aligned.fasta"
output:
node_data = "results/nt_muts.json"
params:
inference = "joint"
shell:
"""
augur ancestral \
--tree {input.tree} \
--alignment {input.alignment} \
--output-node-data {output.node_data} \
--inference {params.inference}
"""

rule translate:
"""Translating amino acid sequences"""
input:
tree = "results/tree.nwk",
node_data = "results/nt_muts.json",
reference = files.reference
output:
node_data = "results/aa_muts.json"
shell:
"""
augur translate \
--tree {input.tree} \
--ancestral-sequences {input.node_data} \
--reference-sequence {input.reference} \
--output {output.node_data} \
"""

rule traits:
"""
Inferring ancestral traits for {params.columns!s}
- increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias
"""
input:
tree = "results/tree.nwk",
metadata = "data/metadata_all.tsv"
output:
node_data = "results/traits.json",
params:
columns = "region country",
sampling_bias_correction = 3,
strain_id = config.get("strain_id_field", "strain"),
shell:
"""
augur traits \
--tree {input.tree} \
--metadata {input.metadata} \
--metadata-id-columns {params.strain_id} \
--output {output.node_data} \
--columns {params.columns} \
--confidence \
--sampling-bias-correction {params.sampling_bias_correction}
"""
include: "workflow/snakemake_rules/annotate_phylogeny.smk"

rule export:
"""Exporting data files for for auspice"""
Expand Down
93 changes: 93 additions & 0 deletions phylogenetic/workflow/snakemake_rules/annotate_phylogeny.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
This part of the workflow creates additonal annotations for the phylogenetic tree.
REQUIRED INPUTS:
metadata = data/metadata_all.tsv
prepared_sequences = results/aligned.fasta
tree = results/tree.nwk
OUTPUTS:
node_data = results/*.json
There are no required outputs for this part of the workflow as it depends
on which annotations are created. All outputs are expected to be node data
JSON files that can be fed into `augur export`.
See Nextstrain's data format docs for more details on node data JSONs:
https://docs.nextstrain.org/page/reference/data-formats.html
This part of the workflow usually includes the following steps:
- augur traits
- augur ancestral
- augur translate
- augur clades
See Augur's usage docs for these commands for more details.
Custom node data files can also be produced by build-specific scripts in addition
to the ones produced by Augur commands.
"""

rule ancestral:
"""Reconstructing ancestral sequences and mutations"""
input:
tree = "results/tree.nwk",
alignment = "results/aligned.fasta"
output:
node_data = "results/nt_muts.json"
params:
inference = "joint"
shell:
"""
augur ancestral \
--tree {input.tree} \
--alignment {input.alignment} \
--output-node-data {output.node_data} \
--inference {params.inference}
"""

rule translate:
"""Translating amino acid sequences"""
input:
tree = "results/tree.nwk",
node_data = "results/nt_muts.json",
reference = files.reference
output:
node_data = "results/aa_muts.json"
shell:
"""
augur translate \
--tree {input.tree} \
--ancestral-sequences {input.node_data} \
--reference-sequence {input.reference} \
--output {output.node_data} \
"""

rule traits:
"""
Inferring ancestral traits for {params.columns!s}
- increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias
"""
input:
tree = "results/tree.nwk",
metadata = "data/metadata_all.tsv"
output:
node_data = "results/traits.json",
params:
columns = "region country",
sampling_bias_correction = 3,
strain_id = config.get("strain_id_field", "strain"),
shell:
"""
augur traits \
--tree {input.tree} \
--metadata {input.metadata} \
--metadata-id-columns {params.strain_id} \
--output {output.node_data} \
--columns {params.columns} \
--confidence \
--sampling-bias-correction {params.sampling_bias_correction}
"""

0 comments on commit 2ed2103

Please sign in to comment.