-
Notifications
You must be signed in to change notification settings - Fork 4
/
misc.py
89 lines (76 loc) · 2.63 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
import urllib2
import re
from lxml import etree
import socket
import gettext
import chardet
import httplib
headers = {}
headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB;'+\
'rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4'
def force_unicode(string, encoding='utf-8'): #{{{
if type(string) is str:
string = string.decode(encoding)
if type(string) is not unicode:
string = unicode(string)
return string
#}}}
def readUrl(url, cookies=None, bot=None): #{{{
parsedurl = re.search('http://([^/]+)/?(.*)', url)
if parsedurl and len(parsedurl.groups()) > 0:
c = httplib.HTTPConnection(parsedurl.group(1))
c.request('HEAD', '/' + parsedurl.group(2))
r = c.getresponse()
if str(r.status)[0] != '2':
return None
if not r.getheader('Content-type').startswith('text/'):
return None
try:
if url.startswith('http://vkontakte.ru') or url.startswith('http://www.vkontakte.ru'):
if bot:
headers['Cookie'] = bot.settings["vkontakte_cookies"]
if cookies:
headers['Cookie'] = cookies
request = urllib2.Request(url.encode('utf-8'), None, headers)
link = urllib2.urlopen(request, timeout = 5)
data = link.read()
return data
except:
return None
#}}}
def getImgXML(img_url, img_src): #{{{
img_url = re.sub('"|\'|<|>', '', img_url)
img_src = re.sub('"|\'|<|>', '', img_src)
img_url = re.sub('&', '&', img_url)
img_src = re.sub('&', '&', img_src)
return '<html xmlns=\'http://jabber.org/protocol/xhtml-im\'>' +\
'<body xml:lang=\'en-US\' xmlns=\'http://www.w3.org/1999/xhtml\'>' +\
'<a href=\'%s\'><img alt=\'img\' src=\'%s\' /></a>' %(img_url, img_src) +\
'</body>' +\
'</html>'
#}}}
def getTitle(link, bot): #{{{
if re.search('^http://danbooru\.donmai\.us', link) or re.search('^http://(www\.)?gelbooru\.com', link):
return ''
else:
try:
data = readUrl(link, None, bot)
if "opennet.ru" in link:
data_enc = data.decode('koi8-r')
else:
try: data_enc = data.decode('utf-8')
except: data_enc = data.decode('cp1251')
return etree.HTML(data_enc).find('*//title').text.strip().replace('\t', '').replace('\n', ' ')
except:
return ''
#}}}
def makeTiny(link, bot): #{{{
url = 'http://tinyurl.com/api-create.php?url=%s' %link.encode('utf-8')
try:
return readUrl(url, None, bot)
except:
return ''
#}}}
def _(text):
return gettext.gettext(text).decode('utf-8')