forked from mvdctop/Movie_Data_Capture
-
Notifications
You must be signed in to change notification settings - Fork 2
/
ADC_function.py
executable file
·108 lines (82 loc) · 3.47 KB
/
ADC_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import requests
from lxml import etree
import config
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
if "title" not in data or "number" not in data:
return False
if data["title"] is None or data["title"] == "" or data["title"] == "null":
return False
if data["number"] is None or data["number"] == "" or data["number"] == "null":
return False
return True
def getXpathSingle(htmlcode,xpath):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result1 = str(html.xpath(xpath)).strip(" ['']")
return result1
def get_proxy(proxy: str, proxytype: str = None) -> dict:
''' 获得代理参数,默认http代理
'''
if proxy:
if proxytype.startswith("socks"):
proxies = {"http": "socks5://" + proxy, "https": "socks5://" + proxy}
else:
proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
else:
proxies = {}
return proxies
# 网页请求核心
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)
if ua is None:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"} # noqa
else:
headers = {"User-Agent": ua}
for i in range(retry_count):
try:
if not proxy == '':
result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies)
else:
result = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
result.encoding = "utf-8"
if return_type == "object":
return result
else:
return result.text
except requests.exceptions.ProxyError:
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
except requests.exceptions.ConnectionError:
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')
input("Press ENTER to exit!")
exit()
def post_html(url: str, query: dict) -> requests.Response:
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)
for i in range(retry_count):
try:
result = requests.post(url, data=query, proxies=proxies)
return result
except requests.exceptions.ProxyError:
print("[-]Connect retry {}/{}".format(i+1, retry_count))
print("[-]Connect Failed! Please check your Proxy or Network!")
input("Press ENTER to exit!")
exit()
def get_javlib_cookie() -> [dict, str]:
import cloudscraper
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)
raw_cookie = {}
user_agent = ""
# Get __cfduid/cf_clearance and user-agent
for i in range(retry_count):
try:
raw_cookie, user_agent = cloudscraper.get_cookie_string(
"http://www.m45e.com/",
proxies=proxies
)
except requests.exceptions.ProxyError:
print("[-] ProxyError, retry {}/{}".format(i+1, retry_count))
except cloudscraper.exceptions.CloudflareIUAMError:
print("[-] IUAMError, retry {}/{}".format(i+1, retry_count))
return raw_cookie, user_agent