generated from technologiestiftung/template-repo-citylab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
2_parse_ckan_data.js
105 lines (93 loc) · 2.69 KB
/
2_parse_ckan_data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"use strict";
const fs = require("fs");
function removeParametersFromURL(url) {
const urlObject = new URL(url);
urlObject.search = "";
return urlObject.toString();
}
function parseCKANData(mainCallback) {
console.log("2. parseCKANData ...");
let ckanData = JSON.parse(
fs.readFileSync(path.join(dirName, "/data/ckan_data.json"), "utf-8")
);
const newDatasets = [];
const allDatasets = [];
ckanData.map((item) => {
let geoResource;
let geoData = {};
// get only WFS and WMS
item.resources.forEach(function (resource) {
if (
(resource.url
.toLowerCase()
.includes("REQUEST=GetCapabilities&SERVICE=wms".toLowerCase()) ||
resource.url
.toLowerCase()
.includes("REQUEST=GetCapabilities&SERVICE=wfs".toLowerCase())) &&
(resource.format === "WFS" || resource.format === "WMS")
) {
geoResource = resource;
}
});
if (!geoResource) return;
// get the metadata about the geo data
geoData.title = item.title;
geoData.serviceURL = removeParametersFromURL(geoResource.url);
geoData.type = geoResource.format;
geoData.name = item.name;
geoData.tags = item.tags.map((d) => {
return d.name;
});
geoData.notes = item.notes;
geoData.url = item.url;
geoData.author = item.author;
item.resources.forEach(function (resource) {
if (resource.format === "PDF") {
geoData.pdf = resource.url;
}
if (
resource.format === "HTML" &&
resource.description === "Technische Beschreibung"
) {
geoData.techHtml = resource.url;
}
});
const existingFiles = fs.readFileSync(
path.join(dirName, "data/datasetsAll.json"),
"utf-8"
);
if (!existingFiles.includes(geoData.name)) {
newDatasets.push(geoData.name);
}
// check if the dataset has a folder
// if not add one and write file
const filePath = `./scraper/data/datasets/${geoData.name}`;
if (!fs.existsSync(filePath)) {
fs.mkdirSync(filePath, { recursive: true });
fs.writeFileSync(filePath + "/ckan.json", JSON.stringify(geoData));
}
allDatasets.push(geoData.name);
});
console.log("Amount of new datasets: ", newDatasets.length);
console.log("Amount of all datasets: ", allDatasets.length);
fs.writeFile(
"./scraper/data/datasetsNew.json",
JSON.stringify(newDatasets),
{
encoding: "utf8",
},
(err) => {
fs.writeFile(
"./scraper/data/datasetsAll.json",
JSON.stringify(allDatasets),
{
encoding: "utf8",
},
(err) => {
mainCallback();
}
);
}
);
}
module.exports = { parseCKANData };