diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index e74575c..51dbb2d 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -20,60 +20,7 @@ include: "workflow/snakemake_rules/usvi.smk" include: "workflow/snakemake_rules/prepare_sequences.smk" include: "workflow/snakemake_rules/construct_phylogeny.smk" include: "workflow/snakemake_rules/annotate_phylogeny.smk" - -rule export: - """Exporting data files for for auspice""" - input: - tree = "results/tree.nwk", - metadata = "data/metadata_all.tsv", - branch_lengths = "results/branch_lengths.json", - traits = "results/traits.json", - nt_muts = "results/nt_muts.json", - aa_muts = "results/aa_muts.json", - colors = files.colors, - auspice_config = files.auspice_config, - description = files.description - output: - auspice_json = "results/raw_zika.json", - root_sequence = "results/raw_zika_root-sequence.json", - params: - strain_id = config.get("strain_id_field", "strain"), - shell: - """ - augur export v2 \ - --tree {input.tree} \ - --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --node-data {input.branch_lengths} {input.traits} {input.nt_muts} {input.aa_muts} \ - --colors {input.colors} \ - --auspice-config {input.auspice_config} \ - --description {input.description} \ - --include-root-sequence \ - --output {output.auspice_json} - """ - -rule final_strain_name: - input: - auspice_json="results/raw_zika.json", - metadata="data/metadata_all.tsv", - root_sequence="results/raw_zika_root-sequence.json", - output: - auspice_json="auspice/zika.json", - root_sequence="auspice/zika_root-sequence.json", - params: - strain_id=config["strain_id_field"], - display_strain_field=config.get("display_strain_field", "strain"), - shell: - """ - python3 scripts/set_final_strain_name.py \ - --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --input-auspice-json {input.auspice_json} \ - --display-strain-name {params.display_strain_field} \ - --output {output.auspice_json} - - cp {input.root_sequence} {output.root_sequence} - """ +include: "workflow/snakemake_rules/export.smk" rule clean: """Removing directories: {params}""" diff --git a/phylogenetic/workflow/snakemake_rules/export.smk b/phylogenetic/workflow/snakemake_rules/export.smk new file mode 100644 index 0000000..7dbe431 --- /dev/null +++ b/phylogenetic/workflow/snakemake_rules/export.smk @@ -0,0 +1,80 @@ +""" +This part of the workflow collects the phylogenetic tree and annotations to +export a Nextstrain dataset. + +REQUIRED INPUTS: + + metadata = data/metadata_all.tsv + tree = results/tree.nwk + branch_lengths = results/branch_lengths.json + node_data = results/*.json + +OUTPUTS: + + auspice_json = auspice/${build_name}.json + + There are optional sidecar JSON files that can be exported as part of the dataset. + See Nextstrain's data format docs for more details on sidecar files: + https://docs.nextstrain.org/page/reference/data-formats.html + +This part of the workflow usually includes the following steps: + + - augur export v2 + - augur frequencies + +See Augur's usage docs for these commands for more details. +""" + +rule export: + """Exporting data files for for auspice""" + input: + tree = "results/tree.nwk", + metadata = "data/metadata_all.tsv", + branch_lengths = "results/branch_lengths.json", + traits = "results/traits.json", + nt_muts = "results/nt_muts.json", + aa_muts = "results/aa_muts.json", + colors = files.colors, + auspice_config = files.auspice_config, + description = files.description + output: + auspice_json = "results/raw_zika.json", + root_sequence = "results/raw_zika_root-sequence.json", + params: + strain_id = config.get("strain_id_field", "strain"), + shell: + """ + augur export v2 \ + --tree {input.tree} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --node-data {input.branch_lengths} {input.traits} {input.nt_muts} {input.aa_muts} \ + --colors {input.colors} \ + --auspice-config {input.auspice_config} \ + --description {input.description} \ + --include-root-sequence \ + --output {output.auspice_json} + """ + +rule final_strain_name: + input: + auspice_json="results/raw_zika.json", + metadata="data/metadata_all.tsv", + root_sequence="results/raw_zika_root-sequence.json", + output: + auspice_json="auspice/zika.json", + root_sequence="auspice/zika_root-sequence.json", + params: + strain_id=config["strain_id_field"], + display_strain_field=config.get("display_strain_field", "strain"), + shell: + """ + python3 scripts/set_final_strain_name.py \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --input-auspice-json {input.auspice_json} \ + --display-strain-name {params.display_strain_field} \ + --output {output.auspice_json} + + cp {input.root_sequence} {output.root_sequence} + """