From bc1d2117eda585d01f23d88a7113aa63d4f762ed Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 26 Aug 2024 11:35:32 -0700 Subject: [PATCH 1/3] Convert auspice_config.json to LF line endings --- phylogenetic/defaults/auspice_config.json | 112 +++++++++++----------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index d8c251d..58656fc 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -1,56 +1,56 @@ -{ - "title": "Real-time tracking of Zika virus evolution", - "maintainers": [ - {"name": "the Nextstrain team", "url": "https://nextstrain.org/team"} - ], - "data_provenance": [ - { - "name": "GenBank", - "url": "https://www.ncbi.nlm.nih.gov/genbank/" - }, - { - "name": "USVI", - "url": "https://github.com/blab/zika-usvi/" - } - ], - "build_url": "https://github.com/nextstrain/zika", - "colorings": [ - { - "key": "gt", - "title": "genotype", - "type": "categorical" - }, - { - "key": "num_date", - "title": "date", - "type": "continuous" - }, - { - "key": "author", - "title": "author", - "type": "categorical" - }, - { - "key": "country", - "title": "country", - "type": "categorical" - }, - { - "key": "region", - "title": "region", - "type": "categorical" - } - ], - "geo_resolutions": [ - "country", - "region" - ], - "display_defaults": { - "map_triplicate": true - }, - "filters": [ - "country", - "region", - "author" - ] -} +{ + "title": "Real-time tracking of Zika virus evolution", + "maintainers": [ + {"name": "the Nextstrain team", "url": "https://nextstrain.org/team"} + ], + "data_provenance": [ + { + "name": "GenBank", + "url": "https://www.ncbi.nlm.nih.gov/genbank/" + }, + { + "name": "USVI", + "url": "https://github.com/blab/zika-usvi/" + } + ], + "build_url": "https://github.com/nextstrain/zika", + "colorings": [ + { + "key": "gt", + "title": "genotype", + "type": "categorical" + }, + { + "key": "num_date", + "title": "date", + "type": "continuous" + }, + { + "key": "author", + "title": "author", + "type": "categorical" + }, + { + "key": "country", + "title": "country", + "type": "categorical" + }, + { + "key": "region", + "title": "region", + "type": "categorical" + } + ], + "geo_resolutions": [ + "country", + "region" + ], + "display_defaults": { + "map_triplicate": true + }, + "filters": [ + "country", + "region", + "author" + ] +} From a0604ad06d1ca9be88bb2c7eb290aca7082b637b Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 26 Aug 2024 11:36:14 -0700 Subject: [PATCH 2/3] phylogenetic: Use `strain` as default tip label Testing Following changes made in mpox --- phylogenetic/defaults/auspice_config.json | 6 ++- phylogenetic/rules/export.smk | 21 +------- phylogenetic/scripts/set_final_strain_name.py | 51 ------------------- 3 files changed, 6 insertions(+), 72 deletions(-) delete mode 100644 phylogenetic/scripts/set_final_strain_name.py diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index 58656fc..973bf12 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -46,8 +46,12 @@ "region" ], "display_defaults": { - "map_triplicate": true + "map_triplicate": true, + "tip_label": "strain" }, + "metadata_columns": [ + "strain" + ], "filters": [ "country", "region", diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 9b9ada0..77d020f 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -38,7 +38,7 @@ rule export: auspice_config = "defaults/auspice_config.json", description = "defaults/description.md" output: - auspice_json = "results/raw_zika.json" + auspice_json = "auspice/zika.json" params: strain_id = config.get("strain_id_field", "strain"), shell: @@ -54,22 +54,3 @@ rule export: --include-root-sequence-inline \ --output {output.auspice_json} """ - -rule final_strain_name: - input: - auspice_json="results/raw_zika.json", - metadata="data/metadata_all.tsv" - output: - auspice_json="auspice/zika.json" - params: - strain_id=config["strain_id_field"], - display_strain_field=config.get("display_strain_field", "strain"), - shell: - """ - python3 scripts/set_final_strain_name.py \ - --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --input-auspice-json {input.auspice_json} \ - --display-strain-name {params.display_strain_field} \ - --output {output.auspice_json} - """ diff --git a/phylogenetic/scripts/set_final_strain_name.py b/phylogenetic/scripts/set_final_strain_name.py deleted file mode 100644 index d104ca1..0000000 --- a/phylogenetic/scripts/set_final_strain_name.py +++ /dev/null @@ -1,51 +0,0 @@ -import pandas as pd -import json, argparse -from augur.io import read_metadata - -def replace_name_recursive(node, lookup, saveoldcolumn): - if node["name"] in lookup: - if saveoldcolumn == "accession": - node["node_attrs"][saveoldcolumn] = node["name"] - elif saveoldcolumn == "genbank_accession": - node["node_attrs"][saveoldcolumn] = {} - node["node_attrs"][saveoldcolumn]["value"] = node["name"] - else: - node["node_attrs"][saveoldcolumn] = node["name"] - - node["name"] = lookup[node["name"]] - - if "children" in node: - for child in node["children"]: - replace_name_recursive(child, lookup, saveoldcolumn) - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Swaps out the strain names in the Auspice JSON with the final strain name", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") - parser.add_argument('--metadata', type=str, required=True, help="input data") - parser.add_argument('--metadata-id-columns', nargs="+", help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.") - parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() - - metadata = read_metadata(args.metadata, id_columns=args.metadata_id_columns) - - if args.display_strain_name in metadata.columns: - name_lookup = {} - for ri, row in metadata.iterrows(): - strain_id = row.name - name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] - - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - replace_name_recursive(data['tree'], name_lookup, args.metadata_id_columns[0]) - else: - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - with open(args.output, 'w') as fh: - json.dump(data, fh, allow_nan=False, indent=None, separators=",:") From 21473b9ce587ce939d0cef43f26c2a9f8aa6be83 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 26 Nov 2024 11:11:20 -0800 Subject: [PATCH 3/3] phylogenetic: Remove unused `display_strain_field` param The display field is now defined through the auspice_config.json and so the `display_strain_field` config param is no longer used in the workflow. --- phylogenetic/defaults/config_zika.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/phylogenetic/defaults/config_zika.yaml b/phylogenetic/defaults/config_zika.yaml index d25c2c6..a3627fb 100644 --- a/phylogenetic/defaults/config_zika.yaml +++ b/phylogenetic/defaults/config_zika.yaml @@ -4,7 +4,6 @@ sequences_url: "https://data.nextstrain.org/files/workflows/zika/sequences.fasta metadata_url: "https://data.nextstrain.org/files/workflows/zika/metadata.tsv.zst" strain_id_field: "accession" -display_strain_field: "strain" filter: group_by: "country year month"