-
Notifications
You must be signed in to change notification settings - Fork 1
/
downttvideo.py
332 lines (260 loc) · 9.98 KB
/
downttvideo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
""" 今日头条视频下载
Usage:
downttvideo [-im] [-c=<thread_count>] <video>
downttvideo (-h | --help)
downttvideo (-v | --version)
Options:
-h --help 显示帮助
-v --version 显示版本号
-i --id 启用ID,参数为头条视频ID,默认为头条视频URL
-m --multi 开启多线程
-c --threadcount=<count> 多线程数量,默认为计算机核心数
Examples:
使用头条视频URL进行下载
downttvideo http://www.365yg.com/item/6526784250472038919
使用头条视频ID进行下载
downttvideo -i 6526784250472038919
开启多线程,并设置8个线程下载
downttvideo -imc 8 6526784250472038919
"""
import os
import os.path
import sys
import math
import random
import base64
import logging
import binascii
import threading
import urllib.parse
import multiprocessing
import requests
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from docopt import docopt
from tqdm import tqdm
BASE_URL = "http://www.365yg.com/item/"
# 默认多线程数量
DEFAULT_THREAD_COUNT = multiprocessing.cpu_count
# 程序参数及视频下载地址
class Config(object):
# 实际线程数量
thread_count = 1
# 开启多线程
enable_multi = False
# 启用ID
enable_id = False
# 视频下载地址
video_url = ""
# 程序路径
APP_PATH, APP_FILE_NAME = os.path.split(os.path.abspath(sys.argv[0]))
# 默认视频下载路径
DEFAULT_OUT_PATH = os.path.join(APP_PATH, "videos")
ARTICLE_URL = "http://www.toutiao.com/c/user/article/"
VIDEO_ITEM_URL = "http://www.toutiao.com/item/"
LOGGER = logging.getLogger('main')
LOGGER.setLevel(logging.ERROR)
FIL_HANDLER = logging.FileHandler(
os.path.join(os.path.join(APP_PATH, "logs"), "main.log")
)
FORMATTER = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
FIL_HANDLER.setFormatter(FORMATTER)
LOGGER.addHandler(FIL_HANDLER)
class VideoDownloader(object):
"""视频下载器"""
def __init__(self, url, video_info, filepath=None, multi=False, thread_count=DEFAULT_THREAD_COUNT):
self._url = url
self._video_info = video_info
self._filepath = filepath
self._multi = multi
self._thread_count = thread_count
self._downloader = None
def download(self):
"""视频下载"""
if self._multi:
self._downloader = MultiThreadDownloader(
self._url, self._video_info, self._filepath, self._thread_count
)
else:
self._downloader = SingleThreadDownloader(
self._url, self._video_info, self._filepath
)
self._downloader.download()
class SingleThreadDownloader(object):
"""单线程视频下载器"""
def __init__(self, url, video_title, filepath=None):
super(SingleThreadDownloader, self).__init__()
self._url = url
self._video_title = video_title
self._filepath = filepath
def download(self):
response = requests.get(self._url, stream=True)
if response.status_code == 200:
title = self._video_title + ".mp4"
if self._filepath:
filename = os.path.join(self._filepath, title)
else:
filename = os.path.join(DEFAULT_OUT_PATH, title)
filesize = int(response.headers["Content-Length"])
with open(filename, "wb") as file:
with tqdm(total=filesize, desc="下载中", unit_scale=True) as pbar:
for chunk in response.iter_content(chunk_size=4096):
if chunk:
file.write(chunk)
file.flush()
pbar.update(len(chunk))
LOGGER.info("下载完成")
else:
LOGGER.error("获取视频失败")
class MultiThreadDownloader(object):
"""多线程视频下载器"""
def __init__(self, url, video_title, filepath, thread_count):
super(MultiThreadDownloader, self).__init__()
self._url = url
self._video_title = video_title
if filepath:
self._filepath = filepath
else:
self._filepath = DEFAULT_OUT_PATH
self._thread_count = thread_count
def __action(self, index, filesize):
persize = math.ceil(filesize / self._thread_count)
start_pos = (index - 1) * persize
end_pos = start_pos + persize - 1
if end_pos > filesize - 1:
end_pos = filesize - 1
content_range = "bytes=%s-%s" % (start_pos, end_pos)
headers = {"Range" : content_range}
response = requests.get(self._url, stream=True, headers=headers)
if response.status_code == 206:
title = self._video_title + ".mp4.pack%s" % index
filename = os.path.join(self._filepath, title)
total = int(response.headers["Content-Length"])
current_size = 0
pbar = tqdm(
total=total, desc="#%s下载中" % index, unit_scale=True,
position=(index-1), ncols=60
)
with open(filename, "wb") as part_file:
for chunk in response.iter_content(chunk_size=4096):
if chunk:
part_file.write(chunk)
part_file.flush()
current_size = current_size + len(chunk)
pbar.update(len(chunk))
LOGGER.info("#%s 下载完成" % index)
pbar.close()
else:
LOGGER.error("#%s 下载视频失败" % index)
def download(self):
"""下载文件"""
filesize = 0
response = requests.get(self._url, stream=True)
if response.status_code == 200:
filesize = int(response.headers["Content-Length"])
else:
LOGGER.error("获取视频文件大小失败")
return
if filesize == 0:
LOGGER.error("未能正确获取视频文件大小")
return
for i in range(self._thread_count):
thread = threading.Thread(
target=self.__action, kwargs={"index":i+1, "filesize":filesize}
)
thread.setDaemon(True)
thread.start()
main_thread = threading.current_thread()
for thread in threading.enumerate():
if thread is main_thread:
continue
thread.join()
self.__merge_file(filesize)
def __merge_file(self, filesize):
"""合并文件"""
fullfilepath = os.path.join(self._filepath, self._video_title+".mp4")
fullfile = open(fullfilepath, "w+b")
pbar = tqdm(
total=filesize, desc="合并中", unit_scale=True,
position=self._thread_count, ncols=60
)
for i in range(1, self._thread_count+1):
with open(fullfilepath + ".pack%s" % i, "rb") as part_file:
while True:
chunk = part_file.read(4096)
if chunk:
fullfile.write(chunk)
fullfile.flush()
pbar.update(len(chunk))
else:
break
part_file.close()
os.remove(fullfilepath + ".pack%s" % i)
pbar.close()
fullfile.close()
LOGGER.info("文件合并完毕")
def down_video(video_url):
"""视频下载"""
driver = webdriver.PhantomJS()
driver.get(video_url)
video_detail_ele = WebDriverWait(driver, 60, 0.5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".video_detail"))
)
video_id = video_detail_ele.get_attribute("videoid")
video_title_ele = WebDriverWait(driver, 60, 0.5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".abs-title"))
)
video_title = video_title_ele.text
print(video_title)
url = "http://ib.365yg.com/video/urls/v/1/toutiao/mp4/%s" % video_id
r_param = str(random.random())[2:]
request_path = urllib.parse.urlparse(url).path + "?r=" + r_param
crc = binascii.crc32(bytes(request_path, encoding="utf8"))
s_param = right_shift(crc, 0)
url = url + '?r=%s&s=%s' % (r_param, s_param)
resp = requests.get(url)
if resp.status_code == 200:
json_data = resp.json()
data = json_data["data"]
video_list = data["video_list"]
videos = []
for key in video_list:
video_info = video_list[key]
main_url = video_info["main_url"]
rel_url = base64.standard_b64decode(main_url)
definition = video_info["definition"]
size = video_info["size"]
videos.append({"size": size, "definition": definition, "rel_url": rel_url})
video = max(videos, key=lambda x: x["size"])
downloader = VideoDownloader(
video["rel_url"], video_title, None, Config.enable_multi, Config.thread_count)
downloader.download()
else:
LOGGER.error("获取视频真实地址失败")
def right_shift(val, n):
"""数据移位"""
return val >> n if val >= 0 else (val + 0x100000000) >> n
def main():
down_video(Config.video_url)
def parseArguments(arguments):
if arguments["--threadcount"]:
Config.thread_count = int(arguments["--threadcount"])
else:
Config.thread_count = DEFAULT_THREAD_COUNT
Config.enable_multi = arguments["--multi"]
enable_id = arguments["--id"]
if enable_id:
Config.video_url = urllib.parse.urljoin(BASE_URL, arguments["<video>"])
else:
if arguments["<video>"].startswith("http://"):
Config.video_url = arguments["<video>"]
else:
Config.video_url = "http://" + arguments["<video>"]
if __name__ == "__main__":
arguments = docopt(__doc__, version="tt video downloader V0.1")
parseArguments(arguments)
main()