This repository has been archived by the owner on Sep 15, 2020. It is now read-only.
forked from nasa/cmr-stac
-
Notifications
You must be signed in to change notification settings - Fork 1
/
crawler.js
87 lines (75 loc) · 2.31 KB
/
crawler.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// Can verify a CMR STAC endpoint by crawling it and ensuring that endpoints return a response.
// Run with node crawler.js [stac root]
const _ = require('lodash');
const axios = require('axios');
const args = process.argv.slice(2);
if (args.length > 1) {
console.error('Expects one argument of CMR STAC root to crawl.');
process.exit(1);
}
const cmrStacRoot = args[0] || 'http://localhost:3000/cmr-stac';
const numProviders = 2;
const numCollections = 2;
const numGranules = 2;
const fetch = async (path) => {
try {
console.log('Fetching', path);
const resp = await axios.get(path);
return resp.data;
} catch (error) {
console.error('Error fetching path', path);
throw error;
}
};
const linksByRel = (obj) => obj.links.reduce((m, link) => {
m[link.rel] = link.href;
return m;
}, {});
const verifySelfLink = async (link, obj, modifySelfFn = _.identity) => {
const self = await fetch(link);
if (!_.isEqual(self, obj)) {
console.log(`self: ${JSON.stringify(self, null, 2)}`);
console.log(`obj: ${JSON.stringify(obj, null, 2)}`);
throw new Error('Self not the same as obj');
}
};
const crawlItemsLink = async (link) => {
const resp = await fetch(link);
for (const item of resp.features.slice(0, numGranules)) {
const links = linksByRel(item);
await verifySelfLink(links.self, item);
}
};
const crawlCollection = async (collection) => {
const links = linksByRel(collection);
await verifySelfLink(links.self, collection);
// FUTURE this is failing because of invalid handling of collectionId param
// await crawlItemsLink(links.stac);
await crawlItemsLink(links.items);
};
const crawlProvider = async (provider) => {
const links = linksByRel(provider);
await verifySelfLink(links.self, provider);
const resp = await fetch(links.collections);
for (const coll of resp.collections.slice(0, numCollections)) {
await crawlCollection(coll);
}
await crawlItemsLink(links.search);
};
const crawlProviders = async () => {
const provResp = await fetch(cmrStacRoot);
const providers = provResp.links.slice(0, numProviders);
for (const provider of providers) {
await crawlProvider(provider);
}
};
(async () => {
await crawlProviders();
})()
.then(() => {
console.log('Success');
})
.catch((error) => {
console.error(error);
process.exit(1);
});