-
Notifications
You must be signed in to change notification settings - Fork 3
/
server.js
executable file
·217 lines (189 loc) · 7.12 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#!/usr/bin/env node
'use strict';
const express = require('express')
const fs = require('fs')
const morgan = require('morgan')
const bodyParser = require('body-parser')
const util = require('util')
const oboe = require('oboe')
const API = require('overview-api-node')
const TokenBin = require('overview-js-token-bin')
const SendInterval = 500 // ms between sends
const MaxNTokens = 150 // tokens sent to client
const app = express();
app.use(morgan('short'));
app.use(bodyParser.json());
let nextRequestId = 1;
const MinWordLength = 3;
// Stopwords: hash of token => null
const Stopwords = "a about above after again against all am an and any are aren't as at be because been before being below between both but by can't cannot could couldn't did didn't do does doesn't doing don't down during each few for from further had hadn't has hasn't have haven't having he he'd he'll he's her here here's hers herself him himself his how how's i i'd i'll i'm i've if in into is isn't it it's its itself let's me more most mustn't my myself no nor not of off on once only or other ought our ours ourselves out over own same shan't she she'd she'll she's should shouldn't so some such than that that's the their theirs them themselves then there there's these they they'd they'll they're they've this those through to too under until up very was wasn't we we'd we'll we're we've were weren't what what's when when's where where's which while who who's whom why why's with will won't would wouldn't you you'd you'll you're you've your yours yourself yourselves"
.split(/ /g)
.reduce((a, t) => { a[t] = null; return a }, {})
/*
* Storage version format: 2
*
* 1: { progress: 1, tokens: [ [ 'foo', 2 ], ... ] }
* 2: { version: 2, tokens: [ { name: 'foo', nDocuments: 2, frequency: 5 } ] },
* with overview-js-tokenizer 0.0.2.
*/
const StorageFormatVersion = 2;
app.get('/generate', function(req, res, next) {
const api = new API(req.query.server, req.query.apiToken, oboe)
const requestId = nextRequestId++
const requestStartDate = new Date()
const tokenBin = new TokenBin([])
const storeStream = api.store().getState()
let docStream = null
let sendTimeoutId = null
res.header('Content-Type', 'application/json')
// Check the store. If we don't have a saved result, generate + save one.
storeStream.done(json => {
let nDocumentsTotal = null
let firstSend = true
if (typeof(json) === 'object' && json.version == StorageFormatVersion) {
res.json([ { progress: 1, tokens: json.tokens } ])
console.log(
'[req %d] sent the JSON for docset %d from the store - %d ms elapsed',
requestId,
req.query.documentSetId,
new Date() - requestStartDate
)
} else {
docStream = api.docSet(req.query.documentSetId).getDocuments([ "tokens" ], "random");
docStream
.node('pagination.total', total => {
nDocumentsTotal = total
sendTimeoutId = setTimeout(sendSnapshotAndQueue, SendInterval)
})
.node('items.*', doc => {
const tokenStream = doc.tokens
.toLowerCase()
.split(' ')
.filter(t => !Stopwords.hasOwnProperty(t) && t.length >= MinWordLength)
tokenBin.addTokens(tokenStream)
return oboe.drop // remove the doc from memory
})
.done(() => {
const lastJson = getSnapshotJson();
clearTimeout(sendTimeoutId);
sendSnapshot(true, lastJson);
api.store().setState({
version: StorageFormatVersion,
tokens: lastJson.tokens
})
})
}
function getSnapshotJson() {
const tokens = tokenBin.getTokensByFrequency().slice(0, MaxNTokens)
const progress = nDocumentsTotal ? (tokenBin.nDocuments / nDocumentsTotal) : 0;
return {
progress: progress,
tokens: tokens,
}
}
function sendSnapshot(lastSend, json) {
res.write((firstSend ? '[' : ',') + JSON.stringify(json))
firstSend = false
if (lastSend) {
res.end(']')
}
console.log(
'[req %d] pushed %s JSON for docset %d - %d ms elapsed',
requestId,
lastSend ? 'last' : 'some',
req.query.documentSetId,
new Date() - requestStartDate
)
}
function sendSnapshotAndQueue() {
sendSnapshot(false, getSnapshotJson())
sendTimeoutId = setTimeout(sendSnapshotAndQueue, SendInterval)
}
})
function abort() {
console.log('[req %d] abort', requestId)
storeStream.abort()
if (docStream && docStream.abort) {
docStream.abort() // docStream will fire no more callbacks
}
if (sendTimeoutId) {
clearTimeout(sendTimeoutId)
}
res.end()
}
req.on('close', abort);
})
const readFilePromise = util.promisify(fs.readFile)
const setTimeoutPromise = util.promisify(setTimeout)
function readShowHtml() {
return readFilePromise('./dist/show')
.catch((e) => {
if (e.code === 'ENOENT') {
console.log('Waiting 1s for Webpack to generate dist/show')
return setTimeoutPromise(1000, null)
.then(() => readShowHtml())
}
})
}
app.get('/show', (req, res, next) => {
readShowHtml()
.then(htmlBytes => {
res
.status(200)
.header('Content-Type', 'text/html; charset=utf-8')
.header('Cache-Control', 'public; max-age=10')
.send(htmlBytes)
})
})
app.get('/metadata', (req, res, next) => {
res
.status(200)
.header('Access-Control-Allow-Origin', '*')
.header('Content-Type', 'application/json')
.header('Cache-Control', 'public; max-age=10')
.send('{}')
})
// Below, we read/write the hidden tokens to a separate StoreObject, not
// the Store's main state, because doing so means we don't have to worry
// about a race condition in which the client posts back some hidden
// tokens before main state has been completely computed and saved (such
// that saving the main state would override the hidden words).
app.get('/hidden-tokens', (req, res, next) => {
const api = new API(req.query.server, req.query.apiToken, oboe)
const storeStream = api.store().getObjects()
res
.status(200)
.header('Content-Type', 'application/json')
.header('Cache-Control', 'private; max-age=0')
storeStream.done(storeObjects => {
if (storeObjects[0]) {
res.send(JSON.stringify(storeObjects[0].json))
} else {
res.send('{"hidden-tokens": []}')
}
})
})
app.put('/hidden-tokens', (req, res, next) => {
const api = new API(req.query.server, req.query.apiToken, oboe)
const storeStream = api.store().getObjects()
const storeData = {
indexedString: 'hidden tokens',
json: {
'hidden-tokens': req.body['hidden-tokens'],
},
}
storeStream.done(storeObjects => {
if (storeObjects[0]) {
api.store().object(storeObjects[0].id).update(storeData)
} else {
api.store().createObject(storeData)
}
res.status(204).end()
})
})
app.use(express.static(__dirname + '/dist', {
immutable: true,
index: false,
}))
const PORT = parseFloat(process.env.PORT) || 80
app.listen(PORT, () => { console.log('Listening on port ' + PORT) })