From 2aaf3e81af783469abe6ec1a57812dd358d65d42 Mon Sep 17 00:00:00 2001 From: Jake Low Date: Thu, 29 Aug 2024 14:42:43 -0700 Subject: [PATCH] Use @osmcha/osm-adiff-parser instead of re-implementing it in lib/xml.js --- lib/get-changesets.js | 6 +-- lib/xml.js | 109 ++++++++---------------------------------- package.json | 1 + yarn.lock | 12 +++++ 4 files changed, 36 insertions(+), 92 deletions(-) diff --git a/lib/get-changesets.js b/lib/get-changesets.js index 0e1453a..789a2a1 100644 --- a/lib/get-changesets.js +++ b/lib/get-changesets.js @@ -2,7 +2,7 @@ const _ = require('lodash'); const moment = require('moment'); -const { parseXml, parseChangesetXml, parseAugmentedDiff } = require('../lib/xml'); +const { parseOsmChangeXml, parseChangesetXml, parseAugmentedDiff } = require('../lib/xml'); const { getStateForMinute } = require('../util/get-states'); const { request } = require('../util/request'); const { @@ -14,7 +14,7 @@ const { } = require('./constants'); const getChangesets = async (xml) => { - const jsonData = parseXml(xml); + const jsonData = parseOsmChangeXml(xml); if (!jsonData.osmChange || !jsonData.osmChange[0]) { throw new Error('OSM data missing from XML file'); @@ -54,7 +54,7 @@ const getChangesets = async (xml) => { getBboxParam(meta.bbox) ); - const parsed = parseAugmentedDiff(augmentedDiffXml); + const parsed = await parseAugmentedDiff(augmentedDiffXml); const elements = Object.keys(parsed).reduce( (result, item) => { diff --git a/lib/xml.js b/lib/xml.js index 673222b..b0cb944 100644 --- a/lib/xml.js +++ b/lib/xml.js @@ -2,9 +2,13 @@ const _ = require('lodash'); const htmlparser = require('htmlparser2'); +const osmAdiffParser = require('@osmcha/osm-adiff-parser'); - -const parseXml = (xmlString) => { +/* + * Parse osmChange XML format documented here: https://wiki.openstreetmap.org/wiki/OsmChange + * (Contains new versions of each modified element) + */ +const parseOsmChangeXml = (xmlString) => { let buffer = {}; let items = []; let tempType = ''; @@ -87,7 +91,12 @@ const parseXml = (xmlString) => { }; -const parseChangesetXml = (xml) => { +/* + * Parse OSM Changeset Metadata XML, of the form returned by + * https://www.openstreetmap.org/api/0.6/changeset/:id + * (Contains changeset's bbox, timestamp, comment, and authorship) + */ +const parseChangesetXml = (xmlString) => { const result = {}; const opts = { @@ -113,95 +122,17 @@ const parseChangesetXml = (xml) => { decodeEntities: true, xmlMode: true }); - parser.write(xml); + parser.write(xmlString); parser.end(); return result; }; -const parseAugmentedDiff = (xml, changesetsFilter) => { - let currentMember = {}; - let currentMode = ''; - let currentAction = ''; - let currentElement = {}; - let oldElement = {}; - const changesetMap = {}; - - const isElement = (symbol) => { - return (symbol === 'node' || symbol === 'way' || symbol === 'relation'); - }; - - const opts = { - onopentag: (name, attr) => { - if (name === 'action') { - currentAction = attr.type; - } - if (name === 'new' || name === 'old') { - currentMode = name; - } - if (isElement(name)) { - if (currentMode === 'new' && (currentAction === 'modify' || - currentAction === 'delete')) { - oldElement = _.cloneDeep(currentElement); - currentElement = attr; - currentElement.old = oldElement; - } else { - currentElement = attr; - } - currentElement.action = currentAction; - currentElement.type = name; - currentElement.tags = {}; - if (name === 'way') {currentElement.nodes = []; } - if (name === 'relation') {currentElement.members = []; currentMember = {};} - } - if (name === 'tag' && currentElement) { - currentElement.tags[attr.k] = attr.v; - } - - if (name === 'nd' && currentElement && currentElement.type === 'way') { - currentElement.nodes.push(attr); - } - - if (name === 'nd' && currentElement && currentElement.type === 'relation') { - currentMember.nodes.push(attr); - } - - if (name === 'member' && currentElement && currentElement.type === 'relation') { - currentMember = _.cloneDeep(attr); - currentMember.nodes = []; - currentElement.members.push(currentMember); - } - }, - onclosetag: (name) => { - if (name === 'action') { - const changeset = currentElement.changeset; - if (changesetsFilter && changesetsFilter.length) { - if (changesetsFilter.indexOf(changeset) !== -1) { - if (changesetMap[changeset]) { - changesetMap[changeset].push(currentElement); - } else { - changesetMap[changeset] = [currentElement]; - } - } - } else { - if (changesetMap[changeset]) { - changesetMap[changeset].push(currentElement); - } else { - changesetMap[changeset] = [currentElement]; - } - } - } - } - }; - - const parser = new htmlparser.Parser(opts, { - decodeEntities: true, - xmlMode: true - }); - parser.write(xml); - parser.end(); - - return changesetMap; -}; +/* + * Parse OSM Augmented Diff format, documented here: + * https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs + * (Contains both old and new versions of all modified elements) + */ +const parseAugmentedDiff = osmAdiffParser; -module.exports = { parseXml, parseChangesetXml, parseAugmentedDiff }; +module.exports = { parseOsmChangeXml, parseChangesetXml, parseAugmentedDiff }; diff --git a/package.json b/package.json index db25277..45b8a4b 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ }, "homepage": "https://github.com/OSMCha/osm-adiff-service", "dependencies": { + "@osmcha/osm-adiff-parser": "^2.0.0", "aws-sdk": "^2.625.0", "changetags": "^0.1.2", "htmlparser2": "^4.1.0", diff --git a/yarn.lock b/yarn.lock index e78c13d..353d08a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -17,6 +17,13 @@ dependencies: call-bind "^1.0.7" +"@osmcha/osm-adiff-parser@^2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@osmcha/osm-adiff-parser/-/osm-adiff-parser-2.0.0.tgz#8d199d51613f8fb9229ada87652d447bb0cfa9fc" + integrity sha512-KXpEHYaYQiDaUijYXDAIxsTv9XJ3sxzB/LbyLe8elXTcgcSdyg+RlbTT27Uh4MhBejWSk9ozVBNSpdSkuHW4NA== + dependencies: + sax "^1.4.1" + "@redis/bloom@1.2.0": version "1.2.0" resolved "https://registry.npmjs.org/@redis/bloom/-/bloom-1.2.0.tgz" @@ -1793,6 +1800,11 @@ sax@>=0.6.0: resolved "https://registry.npmjs.org/sax/-/sax-1.3.0.tgz" integrity sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA== +sax@^1.4.1: + version "1.4.1" + resolved "https://registry.yarnpkg.com/sax/-/sax-1.4.1.tgz#44cc8988377f126304d3b3fc1010c733b929ef0f" + integrity sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg== + semver@5.3.0: version "5.3.0" resolved "https://registry.npmjs.org/semver/-/semver-5.3.0.tgz"