-
Notifications
You must be signed in to change notification settings - Fork 15
/
vplayScraper.py
58 lines (48 loc) · 2.49 KB
/
vplayScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import re
class Scrap:
def scrapFavorite(self, page):
pos = page.find("<h2>Colec");
if pos == -1:
return [];
page = page[:pos]
match=re.compile('<a href="(/c/.+?/)" title="(.+?)"><span class="coll_poster" title="(.+?)" style="background-image:url\((.+?)\);"></span>').findall(page);
return match
def scrapFavorites(self, page):
pos = page.find('<h2 style="color:#3b5998; font-size:17px; ">Cole');
if pos == -1:
return [];
page = page[pos:]
pos = page.find('<div id="footer" class="foooter-wrap">');
if pos == -1:
return [];
page = page[:pos]
match=re.compile('<a href="(/c/.+?/)" title="(.+?)" style="float:left;" >\n*?\s*?<span class="coll_poster" title="(.+?)" style="background-image:url\((.+?)\);float:left;"></span>').findall(page);
return match
def scrapSearch(self, page):
match=re.compile('<a href="(/c/.+?/)" title="(.+?)"><span class="coll_poster" title="(.+?)" style="background-image:url\((.+?)\);"></span>').findall(page)
return match
def scrapSerials(self, page):
print "Scrap Seriale"
pos = page.find("<h2>Colec");
if pos == -1:
return [];
page = page[pos:]
match=re.compile('<a href="(/c/.+?/)" title="(.+?)"><span class="coll_poster" title="(.+?)" style="background-image:url\((.+?)\);"></span>').findall(page);
return match
def scrapSeasons(self, page):
match=re.compile('href="(/c/.+?/\d+/)"><span>(Sezonul \d+)</span>').findall(page)
return match
def scrapEpisodes(self, page):
#match = re.compile('<a href="(.+?)" title="(.+?)" class="coll-episode-box">\s*<span class="thumb" style="background-image:url\((.+?)\);"></span>\s*<span class="title" title="(.+?)">(.+?)</span>([.|\s|\t]+?)</a>').findall(page);
match = re.compile('<a href="(.+?)" title="(.+?)" class="coll-episode-box">\s*<span class="thumb" style="background-image:url\((.+?)\);"></span>\s*<span class="title" title="(.+?)">(.+?)</span>((.|\n)*?)</a>').findall(page);
print match[0]
return match
def scrapEpisodeId(self, page):
match=re.compile('http://vplay.ro/watch/(.+?)/').findall(page)
return match
def scrapLastPage(self, page):
match=re.compile('href="/c/1g6g9v5m/2/"><span>Sezonul 2</span>').findall(page)
print "TESTTTTT"
print page
print match
return match