-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.js
138 lines (115 loc) · 4.02 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
require('dotenv').config();
const SCRAPPER_INTERVAL = process.env.SCRAPPER_INTERVAL_TIME;
const SCRAPPER_INTERVAL_UNIT = process.env.SCRAPPER_INTERVAL_UNIT;
const cheerio = require('cheerio');
const scraperCtrl = require('./presentation/scraped/controller');
const productCtrl = require('./presentation/products/controller');
const scraperNotifications = require('./presentation/notifications/controller');
const DomAnalyzer = require('./utils/domAnalyzer');
const RulesAnalyzer = require('./utils/rulesAnalyzer');
const Locale = require('./utils/locale');
const CL_TelegramBot = require('./utils/notifications/telegramBot');
const { toUpdateNotificationDate } = require('./utils/notifications/transformer');
const { parseToMiliseconds } = require('./utils/time');
const { printDate } = require('./utils/date/printDate');
const getHtml = require('./application/get-html-by-url');
const DEBUG = process.env._IS_DEBUG;
const isDebug = DEBUG === 'true'
let toScraping = [];
let cont = 0;
let arrayLength = 0;
const locale = new Locale();
const t = locale.getLocale();
const telegram = new CL_TelegramBot();
const executeScraping = async (scraper, cont) => {
if (cont >= arrayLength) {
console.log('\nFINISH SCRAPING AT');
printDate();
console.log('\n');
// telegram.stop();
// process.exit();
return;
};
try {
cont ++;
const [product] = await productCtrl.get(scraper.product_id);
const [rules] = await scraperNotifications.get(scraper.id);
if (typeof rules === 'object' && rules?.length > 0) {
executeScraping(toScraping[cont], cont);
return;
}
const { data, error } = await getHtml(scraper.url_to_scrape, scraper.getting_mode);
if (error) {
if (isDebug) {
console.log(`fail:${scraper.url_to_scrape}`);
console.log(`error:${error}`);
}
executeScraping(toScraping[cont], cont);
return;
}
const $ = cheerio.load(data);
const dom = new DomAnalyzer($, data);
const promisePrice = new Promise((resolve) => {
if (!scraper.price_dom_selector) return resolve(null);
dom.getPrice(scraper.price_dom_selector, resolve);
});
const promiseStock = new Promise((resolve) => {
if (!scraper.stock_dom_selector) return resolve(null);
dom.getStock(scraper.stock_dom_selector, resolve);
});
const promiseAvailability = new Promise((resolve) => {
if (!scraper.availability_dom_selector) return resolve(null);
dom.getAvailability(scraper.availability_dom_selector, resolve);
});
const price = await promisePrice;
const stock = await promiseStock;
const availability = await promiseAvailability;
if (dom.isDisabled) {
scraperCtrl.update(scraper.id, { enable: 0});
const disabledMsg = t('DISABLE_SCRAPER_PRODUCT', {id: scraper.id, name: product.name });
telegram.send(disabledMsg);
executeScraping(toScraping[cont], cont);
return;
}
const ruleAnalyze = new RulesAnalyzer(
price,
stock,
availability,
scraper.id,
scraper.url_to_scrape,
product,
rules,
t
);
await ruleAnalyze.setSnap();
ruleAnalyze.analyzePrice();
ruleAnalyze.analyzeStock();
ruleAnalyze.createSnap();
const toSend = ruleAnalyze.getNotificationsToSend();
if(toSend.length > 0 && process.env.ENABLE_NOTIFICATIONS === 'true') {
const toUpdate = toUpdateNotificationDate(toSend);
await scraperNotifications.update(rules.id, toUpdate);
toSend.forEach((send) => {
telegram.send(send.message);
});
}
executeScraping(toScraping[cont], cont);
} catch (error) {
console.error(error);
}
}
function execute() {
scraperCtrl.getEnables().then((rows) => {
if (rows.length > 0) {
cont = 0;
console.log('\nSTART SCRAPING AT');
printDate();
console.log('\n');
toScraping = rows;
arrayLength = rows.length;
executeScraping(toScraping[cont], cont);
}
});
}
execute();
setInterval(execute, parseToMiliseconds(SCRAPPER_INTERVAL, SCRAPPER_INTERVAL_UNIT));