From f5cad6b3d9311c5fd7b77abb2a1696bead6407a3 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 2 Nov 2023 12:12:52 -0700 Subject: [PATCH] Improve Newick parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the basic Newick parser with an external one that handles single-quoted node names. Quoted names are allowed by the format¹ and occur in trees produced by NCBI Pathogens.² The parser's API is a little awkward for our use case, but it's perfectly workable. Out of several parsers I tried on NPM, this was the only one which handled quoted names, so use it despite the slightly awkward API. ¹ See for lack of any formal spec. ² --- auspice_client_customisation/parseNewick.js | 88 +++++++++------------ package-lock.json | 13 ++- package.json | 3 +- 3 files changed, 53 insertions(+), 51 deletions(-) diff --git a/auspice_client_customisation/parseNewick.js b/auspice_client_customisation/parseNewick.js index 768a72b..ae61864 100644 --- a/auspice_client_customisation/parseNewick.js +++ b/auspice_client_customisation/parseNewick.js @@ -1,55 +1,45 @@ -/** - * Newick format parser in JavaScript. - * - * Copyright (c) Jason Davies 2010. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - */ +import { parse as _parseNewick } from "newick-js"; -/* NOTE: parseNewick function slightly modified to produce an object better suited for Nextstrain. */ - -export const parseNewick = (nwk) => { - const ancestors = []; - let tree = {}; - const tokens = nwk.split(/\s*(;|\(|\)|,|:)\s*/); - for (let i=0; i { + const {root, rootWeight, graph: [,edges]} = _parseNewick(nwk); + const edgesByParent = new Map(); + + for (const [parent, child, weight] of edges) { + if (!edgesByParent.has(parent)) + edgesByParent.set(parent, new Set()); + edgesByParent.get(parent).add({child, weight}); } - return tree; -}; + const constructTree = (parent, weight) => { + const tree = { + // Particulars of this object are tied to getTreeStruct() below. + name: parent.label ?? "", + node_attrs: { + div: Number.isFinite(weight) ? weight : 0, + } + }; + + const childEdges = edgesByParent.get(parent); + + if (childEdges?.size) { + tree.children = []; + + for (const {child, weight} of childEdges) { + /* childEdges is reversed relative to the order given by the Newick input + * due to a side-effect of the parser's internals, so we unshift() + * instead of push() to restore the input order. + */ + tree.children.unshift( + constructTree(child, weight) + ); + } + } + + return tree; + }; + + return constructTree(root, rootWeight); +}; const getTreeStruct = (nwk) => { const tree = parseNewick(nwk); diff --git a/package-lock.json b/package-lock.json index 2564275..eeeb68d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,8 @@ "license": "AGPL-3.0-only", "dependencies": { "auspice": "2.49.0", - "heroku-ssl-redirect": "0.0.4" + "heroku-ssl-redirect": "0.0.4", + "newick-js": "^1.2.1" }, "engines": { "node": "16.x" @@ -4750,6 +4751,11 @@ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, + "node_modules/newick-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/newick-js/-/newick-js-1.2.1.tgz", + "integrity": "sha512-qyZVNtlXmORBf2w9vg2S/5N5mQlU46xDFdPX7SEDZTeSElafNQUelNzR7HWSKcVOpjXiDnBqkvjd5RlEG7/SEA==" + }, "node_modules/no-case": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", @@ -10555,6 +10561,11 @@ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, + "newick-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/newick-js/-/newick-js-1.2.1.tgz", + "integrity": "sha512-qyZVNtlXmORBf2w9vg2S/5N5mQlU46xDFdPX7SEDZTeSElafNQUelNzR7HWSKcVOpjXiDnBqkvjd5RlEG7/SEA==" + }, "no-case": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", diff --git a/package.json b/package.json index 4ee0e63..d502439 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ }, "dependencies": { "auspice": "2.49.0", - "heroku-ssl-redirect": "0.0.4" + "heroku-ssl-redirect": "0.0.4", + "newick-js": "^1.2.1" } }