diff --git a/scripts/set_final_strain_name.py b/scripts/set_final_strain_name.py index 08ca935..c285372 100644 --- a/scripts/set_final_strain_name.py +++ b/scripts/set_final_strain_name.py @@ -2,13 +2,19 @@ import json, argparse from augur.io import read_metadata -def replace_name_recursive(node, lookup): +def replace_name_recursive(node, lookup, saveoldcolumn): if node["name"] in lookup: + if saveoldcolumn == "accession" or saveoldcolumn == "genbank_accession": + node["accession"] = node["name"] + node["node_attrs"]["url"] = "https://www.ncbi.nlm.nih.gov/nuccore/" + node["name"] + else: + node["node_attrs"][saveoldcolumn] = node["name"] + node["name"] = lookup[node["name"]] if "children" in node: for child in node["children"]: - replace_name_recursive(child, lookup) + replace_name_recursive(child, lookup, saveoldcolumn) if __name__=="__main__": parser = argparse.ArgumentParser( @@ -24,15 +30,20 @@ def replace_name_recursive(node, lookup): args = parser.parse_args() metadata = read_metadata(args.metadata, id_columns=args.metadata_id_columns) - name_lookup = {} - for ri, row in metadata.iterrows(): - strain_id = row.name - name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) + if args.display_strain_name in metadata.columns: + name_lookup = {} + for ri, row in metadata.iterrows(): + strain_id = row.name + name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] + + with open(args.input_auspice_json, 'r') as fh: + data = json.load(fh) - replace_name_recursive(data['tree'], name_lookup) + replace_name_recursive(data['tree'], name_lookup, args.metadata_id_columns[0]) + else: + with open(args.input_auspice_json, 'r') as fh: + data = json.load(fh) with open(args.output, 'w') as fh: - json.dump(data, fh) + json.dump(data, fh, allow_nan=False, indent=None, separators=",:")