-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
81 lines (63 loc) · 2.14 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
const chromium = require('chrome-aws-lambda');
const CoreClass = require('./src/core.class')
const LogClass = require('./src/logger.class')
var page;
var buffer = {}
var browser;
const Logger = LogClass()
async function getBrowserPage() {
// Launch headless Chrome. Turn off sandbox so Chrome can run under root.
let args = [
...chromium.args,
// '--no-sandbox',
// '--disable-setuid-sandbox',
'--disable-infobars',
// '--window-position=0,0',
'--ignore-certifcate-errors',
'--ignore-certifcate-errors-spki-list',
'--user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3312.0 Safari/537.36"'
]
console.log()
browser = await chromium.puppeteer.launch({
args: args,
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless,
ignoreHTTPSErrors: true
});
return browser.newPage();
}
exports.crawlersingle = async (req, res) => {
const model = req.body;
const Engine = CoreClass()
if ( !model || !Engine.validateModel(model) ) return res.send('404, Please provide valid model', model);
try{
Logger.debug('Initializing model '+model.name)
// starts browser
if (!page) page = await getBrowserPage();
await page.setDefaultNavigationTimeout(0);
// bind console log of evaluate
page.on('console', msg => Logger.log('eval log', msg.text()));
// start process tasks list | for tasks runtime updates, we use "for" to count new tasks
let taskName = model.name
let task = model.task
buffer[taskName] = {}
Logger.debug('initialized task ' + taskName)
buffer[taskName] = await Engine.scrapeData({ model, task, page, taskName })
Logger.debug('finished task '+taskName)
await page.close();
await browser.close()
res.send(buffer);
buffer = {}
browser = null
page = null
return true;
}catch(err){
Logger.error(err)
await page.close();
await browser.close()
browser = null
page = null
return res.status(403).send(err.message)
}
};