-
Notifications
You must be signed in to change notification settings - Fork 387
/
discovery-upload.js
246 lines (220 loc) · 6.58 KB
/
discovery-upload.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
// Copyright IBM Corp. 2018. All Rights Reserved.
// Node module: loopback.io-workflow-scripts
// This file is licensed under the MIT License.
// License text available at https://opensource.org/licenses/MIT
/**
* Script to upload docs to Watson Discovery to power search
* on LoopBack.io. Scripts starts with resolving Environments from Watson
* Discovery and cascading through functions to upload.
*/
// Imports
const DiscoveryV1 = require('ibm-watson/discovery/v1');
const fs = require('fs-extra');
const retry = require('retry');
const chalk = require('chalk');
// Create Watson Discovery Object
const discovery = new DiscoveryV1({
version: '2018-03-05',
username: process.env.DISCOVERY_USERNAME,
password: process.env.DISCOVERY_PASSWORD
});
// ID's resolved by Script.
let envID = null;
let collectionID = null;
let intervalID = null;
/**
* Getting / Cleaning Docs to Upload to Discovery
*/
// JSON Object with Docs to Upload
const files = fs.readJSONSync('_site/posts.json');
// Script Config Variables
const BATCH_TIME = 10 * 1000; // In milliseconds
const BATCH_SIZE = 21;
const minTimeout = 10 * 1000;
const maxTimeout = 20 * 1000;
let batch = 0;
let count = 0;
// Clean up the JSON -- remove non doc files
let keys = Object.keys(files);
keys.filter(key => !key.startsWith('doc-')).forEach(key => {
delete files[key];
});
// Reload doc keys after removing the non-doc files.
keys = Object.keys(files);
console.log(`Total number of documents to upload: ${keys.length}`);
/**
* This function gets a list of Discovery Environments. In IBM Cloud Public,
* there can only be one environment for the user (and a default `system` environment).
*
* This sets `envID` as the environment ID (needed for further calls to the API). - It
* filters out the `system` default environment.
*/
discovery.listEnvironments({}, function(err, data) {
if (err) {
console.log(`Unable to get Discovery Environment. ${err}`);
return;
} else {
console.log(`Got environments.`);
data.environments.forEach(env => {
if (env.environment_id !== 'system') {
envID = env.environment_id;
}
});
listAndDeleteCollection(createCollection);
}
});
/**
* Get list of collections -- if more than 2 collections exist, delete the older
* one and create a new one.
*/
function listAndDeleteCollection(cb) {
discovery.listCollections({ environment_id: envID }, function(err, data) {
if (err) {
console.log(`Failed to fetch list of collections. ${err}`);
} else {
console.log(`Got a list of ${data.collections.length} collections.`);
if (data.collections.length >= 2) {
// Sort list of Collections so we delete the oldest one if more than 2 exist
data.collections.sort(collectionSort);
deleteCollection(data.collections[0].collection_id, cb);
} else {
if (cb) {
return cb();
}
}
}
});
}
/**
* Sorting a collection by creation date function.
*/
function collectionSort(a, b) {
if (a.created < b.created) {
return -1;
}
if (a.created > b.created) {
return 1;
}
return 0;
}
/**
* Creating a new Collection
*/
function createCollection() {
const collectionName = `loopback${new Date().getTime()}`;
console.log(collectionName);
discovery.createCollection(
{ environment_id: envID, name: collectionName },
function(err, data) {
if (err) {
console.log(`Failed to create a new collection. ${err}`);
return;
} else {
collectionID = data.collection_id;
startUpload();
}
}
);
}
/**
* Uploading of Documents to Collection
*/
function startUpload() {
if (collectionID === null) {
console.log('no collectionID set for upload');
} else {
// Call function and run it on given interval
uploadDocs();
intervalID = setInterval(uploadDocs, BATCH_TIME);
}
}
/**
* Upload each object in JSON as an individual document
*/
function uploadDocs() {
const processKeys = keys.splice(0, BATCH_SIZE);
if (processKeys.length == 0) {
// No more batches to process -- clear interval
console.log('No more batches to process.');
clearInterval(intervalID);
// Delete the first collection if it exists to save Document-Hours after second collection is ready
return listAndDeleteCollection();
} else {
// Process interval
batch += 1;
console.log(`Processing batch # ${batch}`);
processKeys.forEach(key => {
// Set up data for upload by splitting into file and metadata.
const data = files[key];
const file = { text: data.text };
const metadata = Object.assign({}, data);
delete metadata.text;
// Retry operation -- In case a request is rejected
const operation = retry.operation({
forever: true,
minTimeout: minTimeout,
maxTimeout: maxTimeout,
randomize: true
});
// attempt to upload the file
operation.attempt(function(currentAttempt) {
// Upload object
const doc = {
environment_id: envID,
collection_id: collectionID,
file: Buffer.from(JSON.stringify(file)),
metadata: metadata,
file_content_type: 'application/json'
};
const fileName = chalk.blue(metadata.url);
currentAttempt = chalk.yellow(currentAttempt);
console.log(`Attempt # ${currentAttempt} to upload ${fileName}`);
// Upload call
discovery.addDocument(doc, function(dErr, dStatus) {
if (operation.retry(dErr)) {
console.log(
chalk.red(
`Attempt # ${currentAttempt} to upload ${fileName} failed. Will retry.`
)
);
return;
}
if (dErr) {
console.log(`Upload of document errored with: ${dErr}`);
} else {
count += 1;
console.log(
chalk.green(
`Attempt # ${currentAttempt} to upload ${fileName} succeeded. Total upload count: ${chalk.blue(
count
)}`
)
);
}
});
});
});
}
}
/**
* Delete a collection based on ID and call the callback
*
* @param {*} collection_id
* @param {*} cb
*/
function deleteCollection(collection_id, cb) {
discovery.deleteCollection(
{ environment_id: envID, collection_id: collection_id },
function(deleteErr, deleteData) {
if (deleteErr) {
console.log(`Delete collection failed. ${deleteErr}`);
return;
} else {
console.log(`Collection deletion successful.`);
if (cb) {
return cb();
}
}
}
);
}