From d2f40342280edb0ac409dc3856f3820298201d00 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Fri, 24 May 2024 14:56:40 +0200 Subject: [PATCH 1/2] inline root sequence, add pathogen json and nextclade extension --- Snakefile | 6 +++--- config/configfile.yaml | 12 ++++++++++- nextclade/Snakefile | 1 + scripts/clade_names.py | 31 ----------------------------- workflow/snakemake_rules/core.smk | 4 +++- workflow/snakemake_rules/export.smk | 24 ++++++++++++---------- 6 files changed, 32 insertions(+), 46 deletions(-) delete mode 100644 scripts/clade_names.py diff --git a/Snakefile b/Snakefile index cdd77ab..7f703e3 100644 --- a/Snakefile +++ b/Snakefile @@ -1,5 +1,8 @@ configfile: "config/configfile.yaml" +wildcard_constraints: + a_or_b = r"a|b" + build_dir = 'results' auspice_dir = 'auspice' @@ -8,9 +11,6 @@ rule all: expand("auspice/rsv_{subtype}_{build}.json", subtype = config.get("subtypes",['a']), build = config.get("buildstorun", ['genome'])), - expand("auspice/rsv_{subtype}_{build}_root-sequence.json", - subtype = config.get("subtypes",['a']), - build = config.get("buildstorun", ['genome'])) include: "workflow/snakemake_rules/chores.smk" diff --git a/config/configfile.yaml b/config/configfile.yaml index 1d2e7cb..0241396 100644 --- a/config/configfile.yaml +++ b/config/configfile.yaml @@ -38,4 +38,14 @@ ancestral: inference: "joint" traits: - columns: "country" \ No newline at end of file + columns: "country" + +nextclade_attributes: + a: + name: "RSV-A NextClade using real-time tree" + reference_name: "hRSV/A/England/397/2017" + accession: "EPI_ISL_412866" + b: + name: "RSV-B NextClade using real-time tree" + reference_name: "hRSV/B/Australia/VIC-RCH056/2019" + accession: "EPI_ISL_1653999" diff --git a/nextclade/Snakefile b/nextclade/Snakefile index 7b34ba5..ac821da 100644 --- a/nextclade/Snakefile +++ b/nextclade/Snakefile @@ -330,6 +330,7 @@ rule export: --node-data {input.node_data}\ --auspice-config {input.auspice_config} \ --color-by-metadata {params.fields} \ + --include-root-sequence-inline \ --minify-json \ --title "Nextclade reference tree for RSV-{wildcards.a_or_b} with root {wildcards.reference} built on {params.date}" \ --output {output.auspice_json} 2>&1; diff --git a/scripts/clade_names.py b/scripts/clade_names.py deleted file mode 100644 index ee8c836..0000000 --- a/scripts/clade_names.py +++ /dev/null @@ -1,31 +0,0 @@ -import json, argparse - -def replace_clade_recursive(node): - if "genome_clade_annotation" in node["node_attrs"]: - if "labels" not in node["branch_attrs"]: - node["branch_attrs"]["labels"] = {} - node["branch_attrs"]["labels"]["genome_clade"] = node["node_attrs"]["genome_clade_annotation"]["value"] - node["node_attrs"].pop("genome_clade_annotation") - if "children" in node: - for child in node["children"]: - replace_clade_recursive(child) - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="fix genome clade info", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() - - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - data["meta"]["colorings"] = [x for x in data["meta"]["colorings"] - if x["key"] != "genome_clade_annotation"] - replace_clade_recursive(data['tree']) - - with open(args.output, 'w') as fh: - json.dump(data, fh, indent=0) diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index bc9cefe..c79efbc 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -213,7 +213,8 @@ rule ancestral: """ input: tree = rules.refine.output.tree, - alignment = get_alignment + alignment = get_alignment, + root_sequence = build_dir + "/{a_or_b}/{build_name}/{build_name}_reference.gbk" output: node_data = build_dir + "/{a_or_b}/{build_name}/nt_muts.json" params: @@ -224,6 +225,7 @@ rule ancestral: --tree {input.tree} \ --alignment {input.alignment} \ --output-node-data {output.node_data} \ + --root-sequence {input.root_sequence} \ --inference {params.inference} """ diff --git a/workflow/snakemake_rules/export.smk b/workflow/snakemake_rules/export.smk index 1766996..657f608 100644 --- a/workflow/snakemake_rules/export.smk +++ b/workflow/snakemake_rules/export.smk @@ -36,8 +36,7 @@ rule export: auspice_config = config["files"]["auspice_config"], description = config["description"] output: - auspice_json = build_dir + "/{a_or_b}/{build_name}/tree.json", - root_sequence = build_dir + "/{a_or_b}/{build_name}/tree_root-sequence.json" + auspice_json = build_dir + "/{a_or_b}/{build_name}/tree.json" params: title = lambda w: f"RSV-{w.a_or_b.upper()} phylogeny", strain_id=config["strain_id_field"], @@ -53,7 +52,7 @@ rule export: --description {input.description} \ --colors {input.colors} \ --auspice-config {input.auspice_config} \ - --include-root-sequence \ + --include-root-sequence-inline \ --output {output.auspice_json} """ @@ -76,18 +75,23 @@ rule final_strain_name: """ -rule rename_clade_labels: +rule rename_and_ready_for_nextclade: input: auspice_json= rules.final_strain_name.output.auspice_json, - root_sequence= rules.export.output.root_sequence + pathogen_json= "nextclade/config/pathogen.json" output: - auspice_json= "auspice/rsv_{a_or_b}_{build_name}.json", - root_sequence= "auspice/rsv_{a_or_b}_{build_name}_root-sequence.json" + auspice_json= "auspice/rsv_{a_or_b}_{build_name}.json" + params: + accession= lambda w: config["nextclade_attributes"][w.a_or_b]["accession"], + name= lambda w: config["nextclade_attributes"][w.a_or_b]["name"], + ref_name= lambda w: config["nextclade_attributes"][w.a_or_b]["reference_name"] shell: """ - python3 scripts/clade_names.py \ + python3 scripts/rename_and_nextclade.py \ --input-auspice-json {input.auspice_json} \ + --pathogen-json {input.pathogen_json} \ + --reference {params.ref_name:q} \ + --build-name {params.name:q} \ + --reference-accession {params.accession:q} \ --output {output.auspice_json} - - cp {input.root_sequence} {output.root_sequence} """ From 5967140ec4b80819e37f5ba94ea71b75067b5ca6 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Fri, 24 May 2024 15:32:47 +0200 Subject: [PATCH 2/2] add rename/nextclade script --- scripts/rename_and_nextclade.py | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 scripts/rename_and_nextclade.py diff --git a/scripts/rename_and_nextclade.py b/scripts/rename_and_nextclade.py new file mode 100644 index 0000000..63c1026 --- /dev/null +++ b/scripts/rename_and_nextclade.py @@ -0,0 +1,46 @@ +import json, argparse + +def replace_clade_recursive(node): + if "genome_clade_annotation" in node["node_attrs"]: + if "labels" not in node["branch_attrs"]: + node["branch_attrs"]["labels"] = {} + node["branch_attrs"]["labels"]["genome_clade"] = node["node_attrs"]["genome_clade_annotation"]["value"] + node["node_attrs"].pop("genome_clade_annotation") + if "children" in node: + for child in node["children"]: + replace_clade_recursive(child) + +if __name__=="__main__": + parser = argparse.ArgumentParser( + description="fix genome clade info", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") + parser.add_argument('--pathogen-json', type=str, required=True, help="pathogen json") + parser.add_argument('--reference', type=str, required=True, help="reference") + parser.add_argument('--build-name', type=str, required=True, help="nextclade build name") + parser.add_argument('--reference-accession', type=str, required=True, help="reference accession") + parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") + args = parser.parse_args() + + # read pathogen json + with open(args.pathogen_json, 'r') as fh: + pathogen_data = json.load(fh) + + with open(args.input_auspice_json, 'r') as fh: + data = json.load(fh) + + data["meta"]["colorings"] = [x for x in data["meta"]["colorings"] + if x["key"] != "genome_clade_annotation"] + replace_clade_recursive(data['tree']) + + # remove unneeded files structure + pathogen_data.pop("files") + + pathogen_data["attributes"] = {"reference accession": args.reference_accession, "reference name": args.reference, "name": args.build_name} + pathogen_data["experimental"] = True + data["meta"]["extensions"] = {'nextclade': {'pathogen': pathogen_data}} + + with open(args.output, 'w') as fh: + json.dump(data, fh, indent=0)