diff --git a/Snakefile b/Snakefile index cdd77ab..7f703e3 100644 --- a/Snakefile +++ b/Snakefile @@ -1,5 +1,8 @@ configfile: "config/configfile.yaml" +wildcard_constraints: + a_or_b = r"a|b" + build_dir = 'results' auspice_dir = 'auspice' @@ -8,9 +11,6 @@ rule all: expand("auspice/rsv_{subtype}_{build}.json", subtype = config.get("subtypes",['a']), build = config.get("buildstorun", ['genome'])), - expand("auspice/rsv_{subtype}_{build}_root-sequence.json", - subtype = config.get("subtypes",['a']), - build = config.get("buildstorun", ['genome'])) include: "workflow/snakemake_rules/chores.smk" diff --git a/config/configfile.yaml b/config/configfile.yaml index 1d2e7cb..0241396 100644 --- a/config/configfile.yaml +++ b/config/configfile.yaml @@ -38,4 +38,14 @@ ancestral: inference: "joint" traits: - columns: "country" \ No newline at end of file + columns: "country" + +nextclade_attributes: + a: + name: "RSV-A NextClade using real-time tree" + reference_name: "hRSV/A/England/397/2017" + accession: "EPI_ISL_412866" + b: + name: "RSV-B NextClade using real-time tree" + reference_name: "hRSV/B/Australia/VIC-RCH056/2019" + accession: "EPI_ISL_1653999" diff --git a/nextclade/Snakefile b/nextclade/Snakefile index 7b34ba5..ac821da 100644 --- a/nextclade/Snakefile +++ b/nextclade/Snakefile @@ -330,6 +330,7 @@ rule export: --node-data {input.node_data}\ --auspice-config {input.auspice_config} \ --color-by-metadata {params.fields} \ + --include-root-sequence-inline \ --minify-json \ --title "Nextclade reference tree for RSV-{wildcards.a_or_b} with root {wildcards.reference} built on {params.date}" \ --output {output.auspice_json} 2>&1; diff --git a/scripts/clade_names.py b/scripts/rename_and_nextclade.py similarity index 60% rename from scripts/clade_names.py rename to scripts/rename_and_nextclade.py index ee8c836..63c1026 100644 --- a/scripts/clade_names.py +++ b/scripts/rename_and_nextclade.py @@ -17,9 +17,17 @@ def replace_clade_recursive(node): ) parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") + parser.add_argument('--pathogen-json', type=str, required=True, help="pathogen json") + parser.add_argument('--reference', type=str, required=True, help="reference") + parser.add_argument('--build-name', type=str, required=True, help="nextclade build name") + parser.add_argument('--reference-accession', type=str, required=True, help="reference accession") parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") args = parser.parse_args() + # read pathogen json + with open(args.pathogen_json, 'r') as fh: + pathogen_data = json.load(fh) + with open(args.input_auspice_json, 'r') as fh: data = json.load(fh) @@ -27,5 +35,12 @@ def replace_clade_recursive(node): if x["key"] != "genome_clade_annotation"] replace_clade_recursive(data['tree']) + # remove unneeded files structure + pathogen_data.pop("files") + + pathogen_data["attributes"] = {"reference accession": args.reference_accession, "reference name": args.reference, "name": args.build_name} + pathogen_data["experimental"] = True + data["meta"]["extensions"] = {'nextclade': {'pathogen': pathogen_data}} + with open(args.output, 'w') as fh: json.dump(data, fh, indent=0) diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index bc9cefe..c79efbc 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -213,7 +213,8 @@ rule ancestral: """ input: tree = rules.refine.output.tree, - alignment = get_alignment + alignment = get_alignment, + root_sequence = build_dir + "/{a_or_b}/{build_name}/{build_name}_reference.gbk" output: node_data = build_dir + "/{a_or_b}/{build_name}/nt_muts.json" params: @@ -224,6 +225,7 @@ rule ancestral: --tree {input.tree} \ --alignment {input.alignment} \ --output-node-data {output.node_data} \ + --root-sequence {input.root_sequence} \ --inference {params.inference} """ diff --git a/workflow/snakemake_rules/export.smk b/workflow/snakemake_rules/export.smk index 1766996..657f608 100644 --- a/workflow/snakemake_rules/export.smk +++ b/workflow/snakemake_rules/export.smk @@ -36,8 +36,7 @@ rule export: auspice_config = config["files"]["auspice_config"], description = config["description"] output: - auspice_json = build_dir + "/{a_or_b}/{build_name}/tree.json", - root_sequence = build_dir + "/{a_or_b}/{build_name}/tree_root-sequence.json" + auspice_json = build_dir + "/{a_or_b}/{build_name}/tree.json" params: title = lambda w: f"RSV-{w.a_or_b.upper()} phylogeny", strain_id=config["strain_id_field"], @@ -53,7 +52,7 @@ rule export: --description {input.description} \ --colors {input.colors} \ --auspice-config {input.auspice_config} \ - --include-root-sequence \ + --include-root-sequence-inline \ --output {output.auspice_json} """ @@ -76,18 +75,23 @@ rule final_strain_name: """ -rule rename_clade_labels: +rule rename_and_ready_for_nextclade: input: auspice_json= rules.final_strain_name.output.auspice_json, - root_sequence= rules.export.output.root_sequence + pathogen_json= "nextclade/config/pathogen.json" output: - auspice_json= "auspice/rsv_{a_or_b}_{build_name}.json", - root_sequence= "auspice/rsv_{a_or_b}_{build_name}_root-sequence.json" + auspice_json= "auspice/rsv_{a_or_b}_{build_name}.json" + params: + accession= lambda w: config["nextclade_attributes"][w.a_or_b]["accession"], + name= lambda w: config["nextclade_attributes"][w.a_or_b]["name"], + ref_name= lambda w: config["nextclade_attributes"][w.a_or_b]["reference_name"] shell: """ - python3 scripts/clade_names.py \ + python3 scripts/rename_and_nextclade.py \ --input-auspice-json {input.auspice_json} \ + --pathogen-json {input.pathogen_json} \ + --reference {params.ref_name:q} \ + --build-name {params.name:q} \ + --reference-accession {params.accession:q} \ --output {output.auspice_json} - - cp {input.root_sequence} {output.root_sequence} """