forked from scotch-io/node-web-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
claire.js
101 lines (83 loc) · 2.13 KB
/
claire.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var Q = require('q');
var app = express();
app.get('/list', function(req, res){
// Let's scrape
urls = [
/* {
url: 'http://www.definebabe.com/gallery/1leu/claire-castel/',
name: '1leu'
},
{
url: 'http://www.definebabe.com/gallery/1let/claire-castel/',
name: '1let'
},
{
url: 'http://www.definebabe.com/gallery/1lew/claire-castel/',
name: '1lew'
},
{
url: 'http://www.definebabe.com/gallery/1les/claire-castel/',
name: '1les'
},
{
url: 'http://www.definebabe.com/gallery/1lev/claire-castel/',
name: '1lev'
}, */
{
url: 'http://www.definebabe.com/gallery/1ler/claire-castel/',
name: '1ler'
}
];
var i = 0;
process();
function process() {
if (i >= urls.length) {
console.log('===========DONE==========');
return;
}
getImgsFromPage(urls[i++])
.finally(process);
}
function getImgsFromPage(pageObj) {
var deferred = Q.defer();
request(pageObj.url, function(error, response, html) {
if(!error) {
var $ = cheerio.load(html), j = 0, k = 0, anchors = $('#ModelMenu + .lblock ul li > a'), l = anchors.length;
anchors.each(function(idx) {
var imgUrl = $(this).attr('href'), filename;
console.log(imgUrl);
filename = pageObj.name + '_' + idx + '.jpg';
j++;
download (imgUrl, filename, idx, function() {
console.log(filename + " downloaded!");
k++;
if (j == k && k == l) {
deferred.resolve();
}
});
});
} else {
console.log('Error');
}
});
return deferred.promise;
}
var download = function(uri, filename, idx, callback){
setTimeout(function() {
request.head(uri, function(err, res, body){
var r = request(uri).pipe(fs.createWriteStream(filename));
if (callback) {
r.on('close', callback);
}
});
}, idx *500);
};
res.send('Check your console!')
});
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;