From 5de2804671a4922147d0df8e5ca2738c4f74f780 Mon Sep 17 00:00:00 2001 From: sqzw-x Date: Tue, 22 Oct 2024 21:47:08 +0800 Subject: [PATCH] chore: format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 删除大量换行, 增加多参数换行, 提高可读性 * 使用 f-string 代替格式化字符串 * 清理冗余正则表达式 --- .editorconfig | 108 +++++ resources/c_number/__get_c_number.py | 10 +- src/controllers/cut_window.py | 32 +- src/controllers/main_window/init.py | 18 +- src/controllers/main_window/load_config.py | 61 ++- src/controllers/main_window/main_window.py | 339 ++++++-------- src/controllers/main_window/save_config.py | 13 +- src/controllers/main_window/style.py | 60 +-- src/models/base/image.py | 11 +- src/models/base/number.py | 39 +- src/models/base/path.py | 2 +- src/models/base/utils.py | 5 +- src/models/base/web.py | 153 ++---- src/models/config/config.py | 37 +- src/models/config/config_manual.py | 513 +++++++-------------- src/models/config/resources.py | 28 +- src/models/core/crawler.py | 252 ++++------ src/models/core/file.py | 216 ++++----- src/models/core/image.py | 43 +- src/models/core/nfo.py | 14 +- src/models/core/scraper.py | 153 +++--- src/models/core/subtitle.py | 17 +- src/models/core/translate.py | 53 +-- src/models/core/utils.py | 20 +- src/models/core/video.py | 6 +- src/models/core/web.py | 139 ++---- src/models/crawlers/airav.py | 56 +-- src/models/crawlers/airav_cc.py | 40 +- src/models/crawlers/avsex.py | 72 +-- src/models/crawlers/avsox.py | 22 +- src/models/crawlers/cableav.py | 8 +- src/models/crawlers/cnmdb.py | 79 +--- src/models/crawlers/dahlia.py | 27 +- src/models/crawlers/dmm.py | 59 +-- src/models/crawlers/faleno.py | 40 +- src/models/crawlers/fantastica.py | 13 +- src/models/crawlers/fc2.py | 27 +- src/models/crawlers/fc2club.py | 30 +- src/models/crawlers/fc2hub.py | 34 +- src/models/crawlers/freejavbt.py | 146 ++---- src/models/crawlers/getchu.py | 24 +- src/models/crawlers/getchu_dl.py | 18 +- src/models/crawlers/getchu_dmm.py | 20 +- src/models/crawlers/giga.py | 16 +- src/models/crawlers/guochan.py | 177 +++---- src/models/crawlers/hdouban.py | 43 +- src/models/crawlers/hscangku.py | 11 +- src/models/crawlers/iqqtv.py | 16 +- src/models/crawlers/iqqtv_new.py | 48 +- src/models/crawlers/jav321.py | 31 +- src/models/crawlers/javbus.py | 44 +- src/models/crawlers/javday.py | 28 +- src/models/crawlers/javdb.py | 78 +--- src/models/crawlers/javlibrary.py | 35 +- src/models/crawlers/javlibrary_new.py | 48 +- src/models/crawlers/kin8.py | 14 +- src/models/crawlers/love6.py | 8 +- src/models/crawlers/lulubar.py | 21 +- src/models/crawlers/madouqu.py | 70 +-- src/models/crawlers/mdtv.py | 28 +- src/models/crawlers/mgstage.py | 14 +- src/models/crawlers/mmtv.py | 19 +- src/models/crawlers/mywife.py | 26 +- src/models/crawlers/official.py | 37 +- src/models/crawlers/prestige.py | 25 +- src/models/crawlers/theporndb.py | 42 +- src/models/crawlers/theporndb_movies.py | 8 +- src/models/crawlers/xcity.py | 18 +- src/models/tools/actress_db.py | 3 +- src/models/tools/emby_actor_image.py | 44 +- src/models/tools/emby_actor_info.py | 75 ++- src/models/tools/missing.py | 60 +-- 72 files changed, 1277 insertions(+), 2867 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..50eb487 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,108 @@ +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = false +max_line_length = 170 +tab_width = 4 +ij_continuation_indent_size = 8 +ij_formatter_off_tag = @formatter:off +ij_formatter_on_tag = @formatter:on +ij_formatter_tags_enabled = true +ij_smart_tabs = false +ij_visual_guides = +ij_wrap_on_typing = false + +[.editorconfig] +ij_editorconfig_align_group_field_declarations = false +ij_editorconfig_space_after_colon = false +ij_editorconfig_space_after_comma = true +ij_editorconfig_space_before_colon = false +ij_editorconfig_space_before_comma = false +ij_editorconfig_spaces_around_assignment_operators = true + + +[{*.bash,*.sh,*.zsh}] +indent_size = 2 +tab_width = 2 +ij_shell_binary_ops_start_line = false +ij_shell_keep_column_alignment_padding = false +ij_shell_minify_program = false +ij_shell_redirect_followed_by_space = false +ij_shell_switch_cases_indented = false +ij_shell_use_unix_line_separator = true + + +[{*.py,*.pyc,*.pyw}] +ij_python_align_collections_and_comprehensions = true +ij_python_align_multiline_imports = true +ij_python_align_multiline_parameters = true +ij_python_align_multiline_parameters_in_calls = true +ij_python_blank_line_at_file_end = true +ij_python_blank_lines_after_imports = 1 +ij_python_blank_lines_after_local_imports = 0 +ij_python_blank_lines_around_class = 1 +ij_python_blank_lines_around_method = 1 +ij_python_blank_lines_around_top_level_classes_functions = 2 +ij_python_blank_lines_before_first_method = 0 +ij_python_call_parameters_new_line_after_left_paren = false +ij_python_call_parameters_right_paren_on_new_line = false +ij_python_call_parameters_wrap = on_every_item +ij_python_dict_alignment = 0 +ij_python_dict_new_line_after_left_brace = false +ij_python_dict_new_line_before_right_brace = true +ij_python_dict_wrapping = 5 +ij_python_from_import_new_line_after_left_parenthesis = false +ij_python_from_import_new_line_before_right_parenthesis = false +ij_python_from_import_parentheses_force_if_multiline = false +ij_python_from_import_trailing_comma_if_multiline = false +ij_python_from_import_wrapping = 1 +ij_python_hang_closing_brackets = false +ij_python_keep_blank_lines_in_code = 1 +ij_python_keep_blank_lines_in_declarations = 1 +ij_python_keep_indents_on_empty_lines = false +ij_python_keep_line_breaks = true +ij_python_method_parameters_new_line_after_left_paren = false +ij_python_method_parameters_right_paren_on_new_line = false +ij_python_method_parameters_wrap = on_every_item +ij_python_new_line_after_colon = false +ij_python_new_line_after_colon_multi_clause = true +ij_python_optimize_imports_always_split_from_imports = false +ij_python_optimize_imports_case_insensitive_order = false +ij_python_optimize_imports_join_from_imports_with_same_source = true +ij_python_optimize_imports_sort_by_type_first = true +ij_python_optimize_imports_sort_imports = true +ij_python_optimize_imports_sort_names_in_from_imports = true +ij_python_space_after_comma = true +ij_python_space_after_number_sign = true +ij_python_space_after_py_colon = true +ij_python_space_before_backslash = true +ij_python_space_before_comma = false +ij_python_space_before_for_semicolon = false +ij_python_space_before_lbracket = false +ij_python_space_before_method_call_parentheses = false +ij_python_space_before_method_parentheses = false +ij_python_space_before_number_sign = true +ij_python_space_before_py_colon = false +ij_python_space_within_empty_method_call_parentheses = false +ij_python_space_within_empty_method_parentheses = false +ij_python_spaces_around_additive_operators = true +ij_python_spaces_around_assignment_operators = true +ij_python_spaces_around_bitwise_operators = true +ij_python_spaces_around_eq_in_keyword_argument = false +ij_python_spaces_around_eq_in_named_parameter = false +ij_python_spaces_around_equality_operators = true +ij_python_spaces_around_multiplicative_operators = true +ij_python_spaces_around_power_operator = true +ij_python_spaces_around_relational_operators = true +ij_python_spaces_around_shift_operators = true +ij_python_spaces_within_braces = false +ij_python_spaces_within_brackets = false +ij_python_spaces_within_method_call_parentheses = false +ij_python_spaces_within_method_parentheses = false +ij_python_use_continuation_indent_for_arguments = false +ij_python_use_continuation_indent_for_collection_and_comprehensions = false +ij_python_use_continuation_indent_for_parameters = true +ij_python_wrap_long_lines = false + diff --git a/resources/c_number/__get_c_number.py b/resources/c_number/__get_c_number.py index 274556f..a677210 100644 --- a/resources/c_number/__get_c_number.py +++ b/resources/c_number/__get_c_number.py @@ -105,7 +105,7 @@ def get_c_number(): ) while i: - url = ('https://www.sehuatang.org/forum-103-%s.html' % i) + url = (f'https://www.sehuatang.org/forum-103-{i}.html') # 获取当前页面信息 try: res = requests.get(url, headers=headers) @@ -119,9 +119,9 @@ def get_c_number(): html = etree.HTML(res.text.replace('encoding="utf-8"', '')) if i == 1: page_total = html.xpath('//a[@class="last"]/text()')[0][-3:] - print('当前共 %s 页数据!' % page_total) + print('当前共 {} 页数据!'.format(page_total)) print('\n' + '**' * 20) - print('开始下载第 %s 页数据...\n页面地址:%s' % (i, url)) + print(f'开始下载第 {i} 页数据...\n页面地址:{url}') # 获取当前页面帖子列表 try: post_info = html.xpath('//tbody[contains(@id, "normal")]/tr/th/a[2]') @@ -131,7 +131,7 @@ def get_c_number(): save_log(error_info) else: post_number = len(post_info) - print('帖子数量:%s' % post_number) + print(f'帖子数量:{post_number}') j = 0 for each in post_info: j += 1 @@ -183,7 +183,7 @@ def get_c_number(): print(j) print(post_title) print(number + ' : ' + title) - print('\n当前第 %s 页数据...\n页面地址:%s' % (i, url)) + print(f'\n当前第 {i} 页数据...\n页面地址:{url}') print('**' * 20) with open(json_filename, 'w', encoding='utf-8') as f: json.dump( diff --git a/src/controllers/cut_window.py b/src/controllers/cut_window.py index 157910a..63b4893 100644 --- a/src/controllers/cut_window.py +++ b/src/controllers/cut_window.py @@ -64,11 +64,10 @@ def __init__(self, parent=None): self.Ui.pushButton_select_cutrange.setGeometry(QRect(420, 0, 379, 539)) self.Ui.pushButton_select_cutrange.setCursor(QCursor(Qt.OpenHandCursor)) self.Ui.pushButton_select_cutrange.setAcceptDrops(True) - self.Ui.pushButton_select_cutrange.setStyleSheet( - u"background-color: rgba(200, 200, 200, 80);\n" - "font-size:13px;\n" "font-weight:normal;" - "color: rgba(0, 0, 0, 255);\n" - "border:2px solid rgba(0, 55, 255, 255);\n") + self.Ui.pushButton_select_cutrange.setStyleSheet(u"background-color: rgba(200, 200, 200, 80);\n" + "font-size:13px;\n" "font-weight:normal;" + "color: rgba(0, 0, 0, 255);\n" + "border:2px solid rgba(0, 55, 255, 255);\n") self.set_style() self.Ui.horizontalSlider_left.valueChanged.connect(self.change_postion_left) self.Ui.horizontalSlider_right.valueChanged.connect(self.change_postion_right) @@ -80,8 +79,7 @@ def __init__(self, parent=None): def set_style(self): # 控件美化 裁剪弹窗 - self.Ui.widget.setStyleSheet( - ''' + self.Ui.widget.setStyleSheet(''' * { font-family: Consolas, 'PingFang SC', 'Microsoft YaHei UI', 'Noto Color Emoji', 'Segoe UI Emoji'; } @@ -121,8 +119,7 @@ def set_style(self): border-width:14px; font-weight:bold; } - ''' - ) + ''') def change_postion_left(self): # abc: 0-10000 @@ -146,8 +143,7 @@ def change_postion_right(self): # 打开图片选择框 def open_image(self): - img_path, img_type = QFileDialog. \ - getOpenFileName(None, "打开图片", "", "*.jpg *.png;;All Files(*)", options=self.parent().options) + img_path, img_type = QFileDialog.getOpenFileName(None, "打开图片", "", "*.jpg *.png;;All Files(*)", options=self.parent().options) if img_path: self.showimage(img_path) @@ -212,8 +208,8 @@ def showimage(self, img_path='', json_data={}): if '.nfo' in each: temp_path = os.path.join(img_folder, each) break - json_data, movie_number, folder_old_path, file_name, file_ex, \ - sub_list, file_show_name, file_show_path = models.core.file.get_file_info(temp_path, copy_sub=False) + json_data, movie_number, folder_old_path, file_name, file_ex, sub_list, file_show_name, file_show_path = models.core.file.get_file_info(temp_path, + copy_sub=False) self.setWindowTitle(json_data.get('number') + ' 封面图片裁剪') # 设置窗口标题 @@ -226,9 +222,7 @@ def showimage(self, img_path='', json_data={}): poster_path = os.path.join(img_folder, 'poster.jpg') if pic_name == 0: # 文件名-poster.jpg if '-' in img_name: - poster_path = img_path.replace('-fanart', '').replace('-thumb', '').replace('-poster', - '').replace( - img_ex, '') + '-poster.jpg' + poster_path = img_path.replace('-fanart', '').replace('-thumb', '').replace('-poster', '').replace(img_ex, '') + '-poster.jpg' thumb_path = poster_path.replace('poster.', 'thumb.') fanart_path = poster_path.replace('poster.', 'fanart.') self.cut_thumb_path = thumb_path # 裁剪后的thumb路径 @@ -273,8 +267,7 @@ def showimage(self, img_path='', json_data={}): self.rect_h = int(self.rect_w * self.rect_h_w_ratio) # 计算裁剪框的高度 self.rect_x = 0 # 裁剪框左上角的x值 self.rect_y = int((self.pic_new_h - self.rect_h) / 2) # 裁剪框左上角的y值(默认垂直居中) - self.Ui.pushButton_select_cutrange.setGeometry( - QRect(self.rect_x, self.rect_y, self.rect_w, self.rect_h)) # 显示裁剪框 + self.Ui.pushButton_select_cutrange.setGeometry(QRect(self.rect_x, self.rect_y, self.rect_w, self.rect_h)) # 显示裁剪框 self.getRealPos() # 显示裁剪框实际位置 # 计算在原图的裁剪位置 @@ -333,8 +326,7 @@ def getRealPos(self): self.c_y = int(self.c_y) # 显示实际裁剪位置 - self.Ui.label_cut_postion.setText( - '%s, %s, %s, %s' % (str(self.c_x), str(self.c_y), str(self.c_x2), str(self.c_y2))) + self.Ui.label_cut_postion.setText('%s, %s, %s, %s' % (str(self.c_x), str(self.c_y), str(self.c_x2), str(self.c_y2))) # self.show_traceback_log('选择位置: %s, %s, %s, %s' % (str(self.c_x), str(self.c_y), str(self.c_x2), str(self.c_y2))) # 显示实际裁剪尺寸 diff --git a/src/controllers/main_window/init.py b/src/controllers/main_window/init.py index a7aab27..0bfe94f 100644 --- a/src/controllers/main_window/init.py +++ b/src/controllers/main_window/init.py @@ -227,15 +227,11 @@ def Init_Singal(self): def n(a): ... # mousePressEvent 的返回值必须是 None, 用这个包装一下 - self.Ui.label_download_actor_zip.mousePressEvent = lambda e: n(webbrowser.open( - 'https://github.com/moyy996/AVDC/releases/tag/%E5%A4%B4%E5%83%8F%E5%8C%85-2')) - self.Ui.label_download_sub_zip.mousePressEvent = lambda e: n(webbrowser.open( - 'https://www.dropbox.com/sh/vkbxawm6mwmwswr/AADqZiF8aUHmK6qIc7JSlURIa')) - self.Ui.label_download_mark_zip.mousePressEvent = lambda e: n(webbrowser.open( - 'https://www.dropbox.com/sh/vkbxawm6mwmwswr/AADqZiF8aUHmK6qIc7JSlURIa')) + self.Ui.label_download_actor_zip.mousePressEvent = lambda e: n(webbrowser.open('https://github.com/moyy996/AVDC/releases/tag/%E5%A4%B4%E5%83%8F%E5%8C%85-2')) + self.Ui.label_download_sub_zip.mousePressEvent = lambda e: n(webbrowser.open('https://www.dropbox.com/sh/vkbxawm6mwmwswr/AADqZiF8aUHmK6qIc7JSlURIa')) + self.Ui.label_download_mark_zip.mousePressEvent = lambda e: n(webbrowser.open('https://www.dropbox.com/sh/vkbxawm6mwmwswr/AADqZiF8aUHmK6qIc7JSlURIa')) self.Ui.label_get_cookie_url.mousePressEvent = lambda e: n(webbrowser.open('https://tieba.baidu.com/p/5492736764')) - self.Ui.label_download_actor_db.mousePressEvent = lambda e: n(webbrowser.open( - 'https://github.com/sqzw-x/mdcx/releases/tag/actor_info_database')) + self.Ui.label_download_actor_db.mousePressEvent = lambda e: n(webbrowser.open('https://github.com/sqzw-x/mdcx/releases/tag/actor_info_database')) # endregion # region 控件更新 @@ -267,8 +263,7 @@ def n(a): ... # mousePressEvent 的返回值必须是 None, 用这个包装一 self.pushButton_move_mp4.connect(self.Ui.pushButton_move_mp4.setText) self.pushButton_find_missing_number.connect(self.Ui.pushButton_find_missing_number.setText) self.label_result.connect(self.Ui.label_result.setText) - self.label_show_version.connect(self.Ui.label_show_version.setText) - # endregion + self.label_show_version.connect(self.Ui.label_show_version.setText) # endregion def Init_QSystemTrayIcon(self): @@ -289,7 +284,8 @@ def Init_QSystemTrayIcon(self): tray_menu.addAction(quit_action) self.tray_icon.setContextMenu(tray_menu) self.tray_icon.show() - # self.tray_icon.showMessage(f"MDCx {self.localversion}", u'已启动!欢迎使用!', QIcon(self.icon_ico), 3000) # icon的值 0没有图标 1是提示 2是警告 3是错误 + # self.tray_icon.showMessage(f"MDCx {self.localversion}", u'已启动!欢迎使用!', QIcon(self.icon_ico), 3000) + # icon的值 0没有图标 1是提示 2是警告 3是错误 def init_QTreeWidget(self): diff --git a/src/controllers/main_window/load_config.py b/src/controllers/main_window/load_config.py index bae3062..9b8f7ce 100644 --- a/src/controllers/main_window/load_config.py +++ b/src/controllers/main_window/load_config.py @@ -1057,10 +1057,29 @@ def load_config(self): self.timer_scrape.stop() self.statement = int(config.statement) # 间歇刮削间隔时间 + self.Ui.checkBox_show_web_log.setChecked(config.show_web_log == 'on') # 显示字段刮削过程 + self.Ui.checkBox_show_from_log.setChecked(config.show_from_log == 'on') # 显示字段来源信息 + self.Ui.checkBox_show_data_log.setChecked(config.show_data_log == 'on') # 显示字段内容信息 + if config.save_log == 'off': # 保存日志 + self.Ui.radioButton_log_off.setChecked(True) + else: + self.Ui.radioButton_log_on.setChecked(True) + if config.update_check == 'off': # 检查更新 + self.Ui.radioButton_update_off.setChecked(True) + else: + self.Ui.radioButton_update_on.setChecked(True) + + self.Ui.lineEdit_local_library_path.setText(convert_path(config.local_library)) # 本地资源库 + self.Ui.lineEdit_actors_name.setText(str(config.actors_name)) # 演员名 + self.Ui.lineEdit_netdisk_path.setText(convert_path(config.netdisk_path)) # 网盘目录 + self.Ui.lineEdit_localdisk_path.setText(convert_path(config.localdisk_path)) # 本地磁盘目录 + self.Ui.checkBox_hide_window_title.setChecked(config.window_title == 'hide') # 窗口标题栏 + # endregion + + # region switch_on switch_on = config.switch_on if read_version < 20230404: switch_on += 'ipv4_only,' - # region switch_on self.Ui.checkBox_auto_start.setChecked('auto_start' in switch_on) self.Ui.checkBox_auto_exit.setChecked('auto_exit' in switch_on) self.Ui.checkBox_rest_scrape.setChecked('rest_scrape' in switch_on) @@ -1100,8 +1119,7 @@ def load_config(self): except: self.Init_QSystemTrayIcon() if not mdcx_config: - self.tray_icon.showMessage(f"MDCx {self.localversion}", u'配置写入失败!所在目录没有读写权限!', - QIcon(resources.icon_ico), 3000) + self.tray_icon.showMessage(f"MDCx {self.localversion}", u'配置写入失败!所在目录没有读写权限!', QIcon(resources.icon_ico), 3000) if 'passthrough' in switch_on: self.Ui.checkBox_highdpi_passthrough.setChecked(True) if not os.path.isfile('highdpi_passthrough'): @@ -1126,9 +1144,7 @@ def load_config(self): except: self.Init_QSystemTrayIcon() if not mdcx_config: - self.tray_icon.showMessage(f"MDCx {self.localversion}", - u'配置写入失败!所在目录没有读写权限!', - QIcon(resources.icon_ico), 3000) + self.tray_icon.showMessage(f"MDCx {self.localversion}", u'配置写入失败!所在目录没有读写权限!', QIcon(resources.icon_ico), 3000) # TODO macOS上运行pyinstaller打包的程序,这个处理方式有问题 try: @@ -1152,26 +1168,6 @@ def load_config(self): except Exception as e: signal.show_traceback_log(f'hide_dock_flag_file: {os.path.realpath(hide_dock_flag_file)}') signal.show_traceback_log(traceback.format_exc()) - # endregion - - self.Ui.checkBox_show_web_log.setChecked(config.show_web_log == 'on') # 显示字段刮削过程 - self.Ui.checkBox_show_from_log.setChecked(config.show_from_log == 'on') # 显示字段来源信息 - self.Ui.checkBox_show_data_log.setChecked(config.show_data_log == 'on') # 显示字段内容信息 - if config.save_log == 'off': # 保存日志 - self.Ui.radioButton_log_off.setChecked(True) - else: - self.Ui.radioButton_log_on.setChecked(True) - if config.update_check == 'off': # 检查更新 - self.Ui.radioButton_update_off.setChecked(True) - else: - self.Ui.radioButton_update_on.setChecked(True) - - self.Ui.lineEdit_local_library_path.setText(convert_path(config.local_library)) # 本地资源库 - self.Ui.lineEdit_actors_name.setText(str(config.actors_name)) # 演员名 - self.Ui.lineEdit_netdisk_path.setText(convert_path(config.netdisk_path)) # 网盘目录 - self.Ui.lineEdit_localdisk_path.setText(convert_path(config.localdisk_path)) # 本地磁盘目录 - self.Ui.checkBox_hide_window_title.setChecked(config.window_title == 'hide') # 窗口标题栏 - # endregion # endregion self.Ui.checkBox_create_link.setChecked(config.auto_link) @@ -1186,13 +1182,12 @@ def load_config(self): scrape_like_text += " · 软连接开" elif config.soft_link == 2: scrape_like_text += " · 硬连接开" - signal.show_log_text( - f' 🛠 当前配置:{config.path} 加载完成!\n ' - f'📂 程序目录:{get_main_path()} \n ' - f'📂 刮削目录:{get_movie_path_setting()[0]} \n ' - f'💠 刮削模式:{Flags.main_mode_text} · {scrape_like_text} \n ' - f'🖥️ 系统信息:{platform.platform()} \n ' - f'🐰 软件版本:{self.localversion} \n') + signal.show_log_text(f' 🛠 当前配置:{config.path} 加载完成!\n ' + f'📂 程序目录:{get_main_path()} \n ' + f'📂 刮削目录:{get_movie_path_setting()[0]} \n ' + f'💠 刮削模式:{Flags.main_mode_text} · {scrape_like_text} \n ' + f'🖥️ 系统信息:{platform.platform()} \n ' + f'🐰 软件版本:{self.localversion} \n') except: signal.show_traceback_log(traceback.format_exc()) try: diff --git a/src/controllers/main_window/main_window.py b/src/controllers/main_window/main_window.py index f967db7..312b1f4 100644 --- a/src/controllers/main_window/main_window.py +++ b/src/controllers/main_window/main_window.py @@ -8,25 +8,22 @@ from PyQt5.QtCore import QEvent, QPoint, QTimer, Qt, pyqtSignal from PyQt5.QtGui import QCursor, QHoverEvent, QIcon, QKeySequence -from PyQt5.QtWidgets import QAction, QApplication, QFileDialog, QInputDialog, QMainWindow, QMenu, QMessageBox, \ - QShortcut, QTreeWidgetItem +from PyQt5.QtWidgets import QAction, QApplication, QFileDialog, QInputDialog, QMainWindow, QMenu, QMessageBox, QShortcut, QTreeWidgetItem from controllers.cut_window import CutWindow from controllers.main_window.init import Init_QSystemTrayIcon, Init_Singal, Init_Ui, init_QTreeWidget from controllers.main_window.load_config import load_config from controllers.main_window.save_config import save_config from controllers.main_window.style import set_dark_style, set_style -from models.base.file import _open_file_thread, delete_file, move_file, split_path +from models.base.file import _open_file_thread, delete_file, split_path from models.base.image import get_pixmap from models.base.number import get_info from models.base.path import get_main_path, get_path from models.base.utils import _async_raise, add_html, convert_path, get_current_time, get_used_time, kill_a_thread -from models.base.web import check_theporndb_api_token, check_version, get_avsox_domain, get_html, ping_host, \ - scraper_html +from models.base.web import check_theporndb_api_token, check_version, get_avsox_domain, get_html, ping_host, scraper_html from models.config.config import config from models.config.resources import resources -from models.core.file import check_and_clean_files, get_success_list, movie_lists, \ - newtdisk_creat_symlink, save_remain_list, save_success_list +from models.core.file import check_and_clean_files, get_success_list, movie_lists, newtdisk_creat_symlink, save_remain_list, save_success_list from models.core.flags import Flags from models.core.image import add_del_extrafanart_copy from models.core.nfo import write_nfo @@ -148,20 +145,19 @@ def __init__(self, parent=None): self.show_scrape_info() # 主界面左下角显示一些配置信息 self.show_net_info('\n🏠 代理设置在:【设置】 - 【网络】 - 【代理设置】。\n') # 检查网络界面显示提示信息 show_netstatus() # 检查网络界面显示当前网络代理信息 - self.show_net_info( - '\n💡 说明:\n ' - '任意代理:javbus、jav321、javlibrary、mywife、giga、freejavbt、' - 'mdtv、madouqu、7mmtv、faleno、dahlia、prestige、theporndb、cnmdb、fantastica、kin8\n ' - '非日本代理:javdb、airav-cc、avsex(日本代理会报错)\n ' - '日本代理:seesaawiki、mgstage\n ' - '无需代理:avsex、hdouban、iqqtv、airav-wiki、love6、lulubar、fc2、fc2club、fc2hub\n\n' - '▶️ 点击右上角 【开始检测】按钮以测试网络连通性。') # 检查网络界面显示提示信息 + self.show_net_info('\n💡 说明:\n ' + '任意代理:javbus、jav321、javlibrary、mywife、giga、freejavbt、' + 'mdtv、madouqu、7mmtv、faleno、dahlia、prestige、theporndb、cnmdb、fantastica、kin8\n ' + '非日本代理:javdb、airav-cc、avsex(日本代理会报错)\n ' + '日本代理:seesaawiki、mgstage\n ' + '无需代理:avsex、hdouban、iqqtv、airav-wiki、love6、lulubar、fc2、fc2club、fc2hub\n\n' + '▶️ 点击右上角 【开始检测】按钮以测试网络连通性。') # 检查网络界面显示提示信息 signal.add_log("🍯 你可以点击左下角的图标来 显示 / 隐藏 请求信息面板!") self.show_version() # 日志页面显示版本信息 self.creat_right_menu() # 加载右键菜单 self.pushButton_main_clicked() # 切换到主界面 self.auto_start() # 自动开始刮削 - # self.load_langid()# 后台加载langid,第一次加载需要时间,预加载避免卡住 + # self.load_langid() # 后台加载langid,第一次加载需要时间,预加载避免卡住 # endregion # region Init @@ -473,7 +469,7 @@ def pushButton_min_clicked(self): def pushButton_min_clicked2(self): if not config.is_windows: self.setWindowFlag(Qt.FramelessWindowHint, False) # 不隐藏边框 - # self.show() # 加上后可以显示缩小动画 + # self.show() # 加上后可以显示缩小动画 self.showMinimized() # 重置左侧按钮样式 @@ -481,33 +477,20 @@ def set_left_button_style(self): try: if self.dark_mode: self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #1F272F;border-right: 1px solid #20303F;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) - self.Ui.pushButton_main.setStyleSheet( - 'QPushButton:hover#pushButton_main{color: white;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_log.setStyleSheet( - 'QPushButton:hover#pushButton_log{color: white;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_net.setStyleSheet( - 'QPushButton:hover#pushButton_net{color: white;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_tool.setStyleSheet( - 'QPushButton:hover#pushButton_tool{color: white;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_setting.setStyleSheet( - 'QPushButton:hover#pushButton_setting{color: white;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_about.setStyleSheet( - 'QPushButton:hover#pushButton_about{color: white;background-color: rgba(160,160,165,40);}') + f'background: #1F272F;border-right: 1px solid #20303F;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') + self.Ui.pushButton_main.setStyleSheet('QPushButton:hover#pushButton_main{color: white;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_log.setStyleSheet('QPushButton:hover#pushButton_log{color: white;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_net.setStyleSheet('QPushButton:hover#pushButton_net{color: white;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_tool.setStyleSheet('QPushButton:hover#pushButton_tool{color: white;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_setting.setStyleSheet('QPushButton:hover#pushButton_setting{color: white;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_about.setStyleSheet('QPushButton:hover#pushButton_about{color: white;background-color: rgba(160,160,165,40);}') else: - self.Ui.pushButton_main.setStyleSheet( - 'QPushButton:hover#pushButton_main{color: black;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_log.setStyleSheet( - 'QPushButton:hover#pushButton_log{color: black;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_net.setStyleSheet( - 'QPushButton:hover#pushButton_net{color: black;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_tool.setStyleSheet( - 'QPushButton:hover#pushButton_tool{color: black;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_setting.setStyleSheet( - 'QPushButton:hover#pushButton_setting{color: black;background-color: rgba(160,160,165,40);}') - self.Ui.pushButton_about.setStyleSheet( - 'QPushButton:hover#pushButton_about{color: black;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_main.setStyleSheet('QPushButton:hover#pushButton_main{color: black;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_log.setStyleSheet('QPushButton:hover#pushButton_log{color: black;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_net.setStyleSheet('QPushButton:hover#pushButton_net{color: black;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_tool.setStyleSheet('QPushButton:hover#pushButton_tool{color: black;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_setting.setStyleSheet('QPushButton:hover#pushButton_setting{color: black;background-color: rgba(160,160,165,40);}') + self.Ui.pushButton_about.setStyleSheet('QPushButton:hover#pushButton_about{color: black;background-color: rgba(160,160,165,40);}') except: signal.show_traceback_log(traceback.format_exc()) @@ -595,40 +578,28 @@ def label_version_clicked(self, test): # region 左侧切换页面 # 点左侧的主界面按钮 def pushButton_main_clicked(self): - self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #F5F5F6;border-right: 1px solid #EDEDED;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) + self.Ui.left_backgroud_widget.setStyleSheet(f'background: #F5F5F6;border-right: 1px solid #EDEDED;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') self.Ui.stackedWidget.setCurrentIndex(0) self.set_left_button_style() self.Ui.pushButton_main.setStyleSheet('font-weight: bold; background-color: rgba(160,160,165,60);') # 点左侧的日志按钮 def pushButton_show_log_clicked(self): - self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #EFFFFC;border-right: 1px solid #EDEDED;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) + self.Ui.left_backgroud_widget.setStyleSheet(f'background: #EFFFFC;border-right: 1px solid #EDEDED;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') self.Ui.stackedWidget.setCurrentIndex(1) self.set_left_button_style() - self.Ui.pushButton_log.setStyleSheet('font-weight: bold; background-color: rgba(160,160,165,60);') - # self.Ui.textBrowser_log_main.verticalScrollBar().setValue( - # self.Ui.textBrowser_log_main.verticalScrollBar().maximum()) - # self.Ui.textBrowser_log_main_2.verticalScrollBar().setValue( - # self.Ui.textBrowser_log_main_2.verticalScrollBar().maximum()) + self.Ui.pushButton_log.setStyleSheet('font-weight: bold; background-color: rgba(160,160,165,60);') # self.Ui.textBrowser_log_main.verticalScrollBar().setValue( # self.Ui.textBrowser_log_main.verticalScrollBar().maximum()) # self.Ui.textBrowser_log_main_2.verticalScrollBar().setValue( # self.Ui.textBrowser_log_main_2.verticalScrollBar().maximum()) # 点左侧的工具按钮 def pushButton_tool_clicked(self): - self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #FFEFF6;border-right: 1px solid #EDEDED;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) + self.Ui.left_backgroud_widget.setStyleSheet(f'background: #FFEFF6;border-right: 1px solid #EDEDED;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') self.Ui.stackedWidget.setCurrentIndex(3) self.set_left_button_style() self.Ui.pushButton_tool.setStyleSheet('font-weight: bold; background-color: rgba(160,160,165,60);') # 点左侧的设置按钮 def pushButton_setting_clicked(self): - self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #84CE9A;border-right: 1px solid #EDEDED;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) + self.Ui.left_backgroud_widget.setStyleSheet(f'background: #84CE9A;border-right: 1px solid #EDEDED;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') self.Ui.stackedWidget.setCurrentIndex(4) self.set_left_button_style() try: @@ -642,18 +613,14 @@ def pushButton_setting_clicked(self): # 点击左侧【检测网络】按钮,切换到检测网络页面 def pushButton_show_net_clicked(self): - self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #E1F2FF;border-right: 1px solid #EDEDED;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) + self.Ui.left_backgroud_widget.setStyleSheet(f'background: #E1F2FF;border-right: 1px solid #EDEDED;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') self.Ui.stackedWidget.setCurrentIndex(2) self.set_left_button_style() self.Ui.pushButton_net.setStyleSheet('font-weight: bold; background-color: rgba(160,160,165,60);') # 点左侧的关于按钮 def pushButton_about_clicked(self): - self.Ui.left_backgroud_widget.setStyleSheet( - 'background: #FFEFEF;border-right: 1px solid #EDEDED;border-top-left-radius: %spx;border-bottom-left-radius: %spx;' % ( - self.window_radius, self.window_radius)) + self.Ui.left_backgroud_widget.setStyleSheet(f'background: #FFEFEF;border-right: 1px solid #EDEDED;border-top-left-radius: {self.window_radius}px;border-bottom-left-radius: {self.window_radius}px;') self.Ui.stackedWidget.setCurrentIndex(5) self.set_left_button_style() self.Ui.pushButton_about.setStyleSheet('font-weight: bold; background-color: rgba(160,160,165,60);') @@ -703,8 +670,7 @@ def _show_stop_info(self): Flags.rest_time_convert = Flags.rest_time_convert_ if Flags.stop_other: signal.show_scrape_info('⛔️ 已手动停止!') - signal.show_log_text( - "⛔️ 已手动停止!\n================================================================================") + signal.show_log_text("⛔️ 已手动停止!\n================================================================================") self.set_label_file_path.emit('⛔️ 已手动停止!') return signal.exec_set_processbar.emit(0) @@ -715,18 +681,13 @@ def _show_stop_info(self): else: average_time = used_time signal.show_scrape_info('⛔️ 刮削已手动停止!') - self.set_label_file_path.emit( - '⛔️ 刮削已手动停止!\n 已刮削 %s 个视频,还剩余 %s 个!刮削用时 %s 秒' % ( - Flags.scrape_done, (Flags.total_count - Flags.scrape_done), used_time)) - signal.show_log_text( - '\n ⛔️ 刮削已手动停止!\n 😊 已刮削 %s 个视频,还剩余 %s 个!刮削用时 %s 秒,停止用时 %s 秒' % ( - Flags.scrape_done, (Flags.total_count - Flags.scrape_done), used_time, self.stop_used_time)) + self.set_label_file_path.emit('⛔️ 刮削已手动停止!\n 已刮削 %s 个视频,还剩余 %s 个!刮削用时 %s 秒' % ( + Flags.scrape_done, (Flags.total_count - Flags.scrape_done), used_time)) + signal.show_log_text('\n ⛔️ 刮削已手动停止!\n 😊 已刮削 %s 个视频,还剩余 %s 个!刮削用时 %s 秒,停止用时 %s 秒' % ( + Flags.scrape_done, (Flags.total_count - Flags.scrape_done), used_time, self.stop_used_time)) signal.show_log_text("================================================================================") - signal.show_log_text( - ' ⏰ Start time'.ljust(13) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", - time.localtime(Flags.start_time))) - signal.show_log_text( - ' 🏁 End time'.ljust(13) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time))) + signal.show_log_text(' ⏰ Start time'.ljust(13) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(Flags.start_time))) + signal.show_log_text(' 🏁 End time'.ljust(13) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time))) signal.show_log_text(' ⏱ Used time'.ljust(13) + ': %sS' % used_time) signal.show_log_text(' 🍕 Per time'.ljust(13) + ': %sS' % average_time) signal.show_log_text("================================================================================") @@ -750,17 +711,14 @@ def _kill_threads(self, ): Flags.total_kills = len(new_thread_list) Flags.now_kill = 0 start_time = time.time() - self.set_label_file_path.emit( - f'⛔️ 正在停止刮削...\n 正在停止已在运行的任务线程(1/{Flags.total_kills})...') - signal.show_log_text( - f'\n ⛔️ {get_current_time()} 已停止添加新的刮削任务,正在停止已在运行的任务线程({Flags.total_kills})...') + self.set_label_file_path.emit(f'⛔️ 正在停止刮削...\n 正在停止已在运行的任务线程(1/{Flags.total_kills})...') + signal.show_log_text(f'\n ⛔️ {get_current_time()} 已停止添加新的刮削任务,正在停止已在运行的任务线程({Flags.total_kills})...') signal.show_traceback_log(f"⛔️ 正在停止正在运行的任务线程 ({Flags.total_kills}) ...") i = 0 for each in new_thread_list: i += 1 signal.show_traceback_log(f'正在停止线程: {i}/{Flags.total_kills} {each.getName()} ...') - signal.show_traceback_log( - '线程正在停止中,请稍后...\n 🍯 停止时间与线程数量及线程正在执行的任务有关,比如正在执行网络请求、文件下载等IO操作时,需要等待其释放资源。。。\n') + signal.show_traceback_log('线程正在停止中,请稍后...\n 🍯 停止时间与线程数量及线程正在执行的任务有关,比如正在执行网络请求、文件下载等IO操作时,需要等待其释放资源。。。\n') signal.stop = True for each in new_thread_list: # 线程池的线程 if 'MDCx-Pool' not in each.getName(): @@ -770,8 +728,7 @@ def _kill_threads(self, ): signal.stop = False self.stop_used_time = get_used_time(start_time) - signal.show_log_text( - ' 🕷 %s 已停止线程:%s/%s %s' % (get_current_time(), Flags.total_kills, Flags.total_kills, other_name)) + signal.show_log_text(' 🕷 %s 已停止线程:%s/%s %s' % (get_current_time(), Flags.total_kills, Flags.total_kills, other_name)) signal.show_traceback_log(f'所有线程已停止!!!({self.stop_used_time}s)\n ⛔️ 刮削已手动停止!\n') signal.show_log_text(f' ⛔️ {get_current_time()} 所有线程已停止!({self.stop_used_time}s)') thread_remain_list = [] @@ -967,12 +924,7 @@ def add_label_info_Thread(self, json_data): signal.show_traceback_log(traceback.format_exc()) def set_pixmap_thread(self, poster_path='', thumb_path='', poster_from='', cover_from=''): - t = threading.Thread(target=self._set_pixmap, args=( - poster_path, - thumb_path, - poster_from, - cover_from, - )) + t = threading.Thread(target=self._set_pixmap, args=(poster_path, thumb_path, poster_from, cover_from,)) t.start() def _set_pixmap(self, poster_path='', thumb_path='', poster_from='', cover_from=''): @@ -1088,8 +1040,7 @@ def search_by_number_clicked(self): file_path = self.file_main_open_path main_file_name = split_path(file_path)[1] default_text = os.path.splitext(main_file_name)[0].upper() - text, ok = QInputDialog.getText(self, '输入番号重新刮削', f'文件名: {main_file_name}\n请输入番号:', - text=default_text) + text, ok = QInputDialog.getText(self, '输入番号重新刮削', f'文件名: {main_file_name}\n请输入番号:', text=default_text) if ok and text: Flags.again_dic[file_path] = [text, '', ''] signal.show_scrape_info('💡 已添加刮削!%s' % get_current_time()) @@ -1103,11 +1054,10 @@ def search_by_url_clicked(self): if self._check_main_file_path(): file_path = self.file_main_open_path main_file_name = split_path(file_path)[1] - text, ok = QInputDialog.getText(self, '输入网址重新刮削', - f'文件名: {main_file_name}\n支持网站:airav_cc、airav、avsex、avsox、dmm、getchu、fc2' - f'、fc2club、fc2hub、iqqtv、jav321、javbus、javdb、freejavbt、javlibrary、mdtv' - f'、madouqu、mgstage、7mmtv、xcity、mywife、giga、faleno、dahlia、fantastica' - f'、prestige、hdouban、lulubar、love6、cnmdb、theporndb、kin8\n请输入番号对应的网址(不是网站首页地址!!!是番号页面地址!!!):') + text, ok = QInputDialog.getText(self, '输入网址重新刮削', f'文件名: {main_file_name}\n支持网站:airav_cc、airav、avsex、avsox、dmm、getchu、fc2' + f'、fc2club、fc2hub、iqqtv、jav321、javbus、javdb、freejavbt、javlibrary、mdtv' + f'、madouqu、mgstage、7mmtv、xcity、mywife、giga、faleno、dahlia、fantastica' + f'、prestige、hdouban、lulubar、love6、cnmdb、theporndb、kin8\n请输入番号对应的网址(不是网站首页地址!!!是番号页面地址!!!):') if ok and text: website, url = deal_url(text) if website: @@ -1328,10 +1278,8 @@ def show_hide_logs(self, show): self.Ui.pushButton_show_hide_logs.setIcon(QIcon(resources.hide_logs_icon)) self.Ui.textBrowser_log_main_2.show() self.Ui.textBrowser_log_main.resize(790, 418) - self.Ui.textBrowser_log_main.verticalScrollBar().setValue( - self.Ui.textBrowser_log_main.verticalScrollBar().maximum()) - self.Ui.textBrowser_log_main_2.verticalScrollBar().setValue( - self.Ui.textBrowser_log_main_2.verticalScrollBar().maximum()) + self.Ui.textBrowser_log_main.verticalScrollBar().setValue(self.Ui.textBrowser_log_main.verticalScrollBar().maximum()) + self.Ui.textBrowser_log_main_2.verticalScrollBar().setValue(self.Ui.textBrowser_log_main_2.verticalScrollBar().maximum()) # self.Ui.textBrowser_log_main_2.moveCursor(self.Ui.textBrowser_log_main_2.textCursor().End) @@ -1339,8 +1287,7 @@ def show_hide_logs(self, show): self.Ui.pushButton_show_hide_logs.setIcon(QIcon(resources.show_logs_icon)) self.Ui.textBrowser_log_main_2.hide() self.Ui.textBrowser_log_main.resize(790, 689) - self.Ui.textBrowser_log_main.verticalScrollBar().setValue( - self.Ui.textBrowser_log_main.verticalScrollBar().maximum()) + self.Ui.textBrowser_log_main.verticalScrollBar().setValue(self.Ui.textBrowser_log_main.verticalScrollBar().maximum()) # 日志页点展开折叠失败列表 def pushButton_show_hide_failed_list_clicked(self): @@ -1355,8 +1302,7 @@ def show_hide_failed_list(self, show): self.Ui.textBrowser_log_main_3.show() self.Ui.pushButton_scraper_failed_list.show() self.Ui.pushButton_save_failed_list.show() - self.Ui.textBrowser_log_main_3.verticalScrollBar().setValue( - self.Ui.textBrowser_log_main_3.verticalScrollBar().maximum()) + self.Ui.textBrowser_log_main_3.verticalScrollBar().setValue(self.Ui.textBrowser_log_main_3.verticalScrollBar().maximum()) else: self.Ui.pushButton_save_failed_list.hide() @@ -1374,8 +1320,7 @@ def pushButton_save_failed_list_clicked(self): if len(Flags.failed_file_list) or True: log_name = 'failed_' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.txt' log_name = convert_path(os.path.join(get_movie_path_setting()[0], log_name)) - filename, filetype = QFileDialog.getSaveFileName(None, "保存失败文件列表", log_name, "Text Files (*.txt)", - options=self.options) + filename, filetype = QFileDialog.getSaveFileName(None, "保存失败文件列表", log_name, "Text Files (*.txt)", options=self.options) if filename: with open(filename, 'w', encoding='utf-8') as f: f.write(self.Ui.textBrowser_log_main_3.toPlainText().strip()) @@ -1419,7 +1364,7 @@ def show_log_text(self, text): self.logs_counts = 0 self.main_logs_clear.emit('') self.main_logs_show.emit(add_html(' 🗑️ 日志过多,已清屏!')) - # self.show_traceback_log(self.Ui.textBrowser_log_main.document().lineCount()) + # self.show_traceback_log(self.Ui.textBrowser_log_main.document().lineCount()) except: signal.show_traceback_log(traceback.format_exc()) @@ -1476,13 +1421,11 @@ def pushButton_creat_symlink_clicked(self): """ self.pushButton_show_log_clicked() # 点击按钮后跳转到日志页面 - if bool('copy_netdisk_nfo' in config.switch_on) != bool( - self.Ui.checkBox_copy_netdisk_nfo.isChecked()): + if bool('copy_netdisk_nfo' in config.switch_on) != bool(self.Ui.checkBox_copy_netdisk_nfo.isChecked()): self.pushButton_save_config_clicked() try: - t = threading.Thread(target=newtdisk_creat_symlink, - args=(bool(self.Ui.checkBox_copy_netdisk_nfo.isChecked()),)) + t = threading.Thread(target=newtdisk_creat_symlink, args=(bool(self.Ui.checkBox_copy_netdisk_nfo.isChecked()),)) Flags.threads_list.append(t) t.start() # 启动线程,即让线程开始执行 except: @@ -1497,8 +1440,7 @@ def pushButton_find_missing_number_clicked(self): self.pushButton_show_log_clicked() # 点击按钮后跳转到日志页面 # 如果本地资源库或演员与配置内容不同,则自动保存 - if self.Ui.lineEdit_actors_name.text() != config.actors_name \ - or self.Ui.lineEdit_local_library_path.text() != config.local_library: + if self.Ui.lineEdit_actors_name.text() != config.actors_name or self.Ui.lineEdit_local_library_path.text() != config.local_library: self.pushButton_save_config_clicked() try: t = threading.Thread(target=check_missing_number, args=(True,)) @@ -1513,7 +1455,9 @@ def pushButton_select_file_clicked(self): media_path = self.Ui.lineEdit_movie_path.text() # 获取待刮削目录作为打开目录 if not media_path: media_path = get_main_path() - file_path, filetype = QFileDialog.getOpenFileName(None, "选取视频文件", media_path, + file_path, filetype = QFileDialog.getOpenFileName(None, + "选取视频文件", + media_path, "Movie Files(*.mp4 " "*.avi *.rmvb *.wmv " "*.mov *.mkv *.flv *.ts " "*.webm *.MP4 *.AVI " "*.RMVB *.WMV *.MOV " "*.MKV *.FLV *.TS " "*.WEBM);;All Files(*)", options=self.options) if file_path: @@ -1555,9 +1499,7 @@ def pushButton_select_thumb_clicked(self): path = self.Ui.lineEdit_movie_path.text() if not path: path = get_main_path() - file_path, fileType = QFileDialog.getOpenFileName(None, "选取缩略图", path, - "Picture Files(*.jpg *.png);;All Files(*)", - options=self.options) + file_path, fileType = QFileDialog.getOpenFileName(None, "选取缩略图", path, "Picture Files(*.jpg *.png);;All Files(*)", options=self.options) if file_path != '': self.cutwindow.showimage(file_path) self.cutwindow.show() @@ -1956,8 +1898,7 @@ def switch_custom_website_change(self, new_website_name): def config_file_change(self, new_config_file): if new_config_file != config.file: new_config_path = os.path.join(config.folder, new_config_file) - signal.show_log_text( - '\n================================================================================\n切换配置:%s' % new_config_path) + signal.show_log_text('\n================================================================================\n切换配置:%s' % new_config_path) with open(config.get_mark_file_path(), 'w', encoding='UTF-8') as f: f.write(new_config_path) temp_dark = self.dark_mode @@ -2001,7 +1942,8 @@ def checkBox_i_agree_clean_clicked(self): def _check_mac_config_folder(self): if self.check_mac and not config.is_windows and '.app/Contents/Resources' in config.folder: self.check_mac = False - box = QMessageBox(QMessageBox.Warning, '选择配置文件目录', + box = QMessageBox(QMessageBox.Warning, + '选择配置文件目录', f'检测到当前配置文件目录为:\n {config.folder}\n\n由于 MacOS 平台在每次更新 APP 版本时会覆盖该目录的配置,因此请选择其他的配置目录!\n这样下次更新 APP 时,选择相同的配置目录即可读取你之前的配置!!!') box.setStandardButtons(QMessageBox.Yes | QMessageBox.No) box.button(QMessageBox.Yes).setText('选择目录') @@ -2044,73 +1986,75 @@ def network_check(self): # 检测网络连通性 signal.show_net_info(' 开始检测网络连通性...') - net_info = {'github': ['https://raw.githubusercontent.com', ''], - 'airav_cc': ['https://airav.io', ''], - 'iqqtv': ['https://iqq5.xyz', ''], - 'avsex': ['https://paycalling.com', ''], - 'freejavbt': ['https://freejavbt.com', ''], - 'javbus': ['https://www.javbus.com', ''], - 'javdb': ['https://javdb.com', ''], - 'jav321': ['https://www.jav321.com', ''], - 'javlibrary': ['https://www.javlibrary.com', ''], - 'dmm': ['https://www.dmm.co.jp', ''], - 'mgstage': ['https://www.mgstage.com', ''], - 'getchu': ['http://www.getchu.com', ''], - 'theporndb': ['https://api.theporndb.net', ''], - 'avsox': [get_avsox_domain(), ''], - 'xcity': ['https://xcity.jp', ''], - '7mmtv': ['https://7mmtv.sx', ''], - 'mdtv': ['https://www.mdpjzip.xyz', ''], - 'madouqu': ['https://madouqu.com', ''], - 'cnmdb': ['https://cnmdb.net', ''], - 'hscangku': ['https://hscangku.net', ''], - 'cableav': ['https://cableav.tv', ''], - 'lulubar': ['https://lulubar.co', ''], - 'love6': ['https://love6.tv', ''], - 'yesjav': ['http://www.yesjav.info', ''], - 'fc2': ['https://adult.contents.fc2.com', ''], - 'fc2club': ['https://fc2club.top', ''], - 'fc2hub': ['https://javten.com', ''], - 'airav': ['https://www.airav.wiki', ''], - 'av-wiki': ['https://av-wiki.net', ''], - 'seesaawiki': ['https://seesaawiki.jp', ''], - 'mywife': ['https://mywife.cc', ''], - 'giga': ['https://www.giga-web.jp', ''], - 'kin8': ['https://www.kin8tengoku.com', ''], - 'fantastica': ['http://fantastica-vr.com', ''], - 'faleno': ['https://faleno.jp', ''], - 'dahlia': ['https://dahlia-av.jp', ''], - 'prestige': ['https://www.prestige-av.com', ''], - 's1s1s1': ['https://s1s1s1.com', ''], - 'moodyz': ['https://moodyz.com', ''], - 'madonna': ['https://www.madonna-av.com', ''], - 'wanz-factory': ['https://www.wanz-factory.com', ''], - 'ideapocket': ['https://ideapocket.com', ''], - 'kirakira': ['https://kirakira-av.com', ''], - 'ebody': ['https://www.av-e-body.com', ''], - 'bi-av': ['https://bi-av.com', ''], - 'premium': ['https://premium-beauty.com', ''], - 'miman': ['https://miman.jp', ''], - 'tameikegoro': ['https://tameikegoro.jp', ''], - 'fitch': ['https://fitch-av.com', ''], - 'kawaiikawaii': ['https://kawaiikawaii.jp', ''], - 'befreebe': ['https://befreebe.com', ''], - 'muku': ['https://muku.tv', ''], - 'attackers': ['https://attackers.net', ''], - 'mko-labo': ['https://mko-labo.net', ''], - 'dasdas': ['https://dasdas.jp', ''], - 'mvg': ['https://mvg.jp', ''], - 'opera': ['https://av-opera.jp', ''], - 'oppai': ['https://oppai-av.com', ''], - 'v-av': ['https://v-av.com', ''], - 'to-satsu': ['https://to-satsu.com', ''], - 'bibian': ['https://bibian-av.com', ''], - 'honnaka': ['https://honnaka.jp', ''], - 'rookie': ['https://rookie-av.jp', ''], - 'nanpa': ['https://nanpa-japan.jp', ''], - 'hajimekikaku': ['https://hajimekikaku.com', ''], - 'hhh-av': ['https://hhh-av.com', '']} - + net_info = { + 'github': ['https://raw.githubusercontent.com', ''], + 'airav_cc': ['https://airav.io', ''], + 'iqqtv': ['https://iqq5.xyz', ''], + 'avsex': ['https://paycalling.com', ''], + 'freejavbt': ['https://freejavbt.com', ''], + 'javbus': ['https://www.javbus.com', ''], + 'javdb': ['https://javdb.com', ''], + 'jav321': ['https://www.jav321.com', ''], + 'javlibrary': ['https://www.javlibrary.com', ''], + 'dmm': ['https://www.dmm.co.jp', ''], + 'mgstage': ['https://www.mgstage.com', ''], + 'getchu': ['http://www.getchu.com', ''], + 'theporndb': ['https://api.theporndb.net', ''], + 'avsox': [get_avsox_domain(), ''], + 'xcity': ['https://xcity.jp', ''], + '7mmtv': ['https://7mmtv.sx', ''], + 'mdtv': ['https://www.mdpjzip.xyz', ''], + 'madouqu': ['https://madouqu.com', ''], + 'cnmdb': ['https://cnmdb.net', ''], + 'hscangku': ['https://hscangku.net', ''], + 'cableav': ['https://cableav.tv', ''], + 'lulubar': ['https://lulubar.co', ''], + 'love6': ['https://love6.tv', ''], + 'yesjav': ['http://www.yesjav.info', ''], + 'fc2': ['https://adult.contents.fc2.com', ''], + 'fc2club': ['https://fc2club.top', ''], + 'fc2hub': ['https://javten.com', ''], + 'airav': ['https://www.airav.wiki', ''], + 'av-wiki': ['https://av-wiki.net', ''], + 'seesaawiki': ['https://seesaawiki.jp', ''], + 'mywife': ['https://mywife.cc', ''], + 'giga': ['https://www.giga-web.jp', ''], + 'kin8': ['https://www.kin8tengoku.com', ''], + 'fantastica': ['http://fantastica-vr.com', ''], + 'faleno': ['https://faleno.jp', ''], + 'dahlia': ['https://dahlia-av.jp', ''], + 'prestige': ['https://www.prestige-av.com', ''], + 's1s1s1': ['https://s1s1s1.com', ''], + 'moodyz': ['https://moodyz.com', ''], + 'madonna': ['https://www.madonna-av.com', ''], + 'wanz-factory': ['https://www.wanz-factory.com', ''], + 'ideapocket': ['https://ideapocket.com', ''], + 'kirakira': ['https://kirakira-av.com', ''], + 'ebody': ['https://www.av-e-body.com', ''], + 'bi-av': ['https://bi-av.com', ''], + 'premium': ['https://premium-beauty.com', ''], + 'miman': ['https://miman.jp', ''], + 'tameikegoro': ['https://tameikegoro.jp', ''], + 'fitch': ['https://fitch-av.com', ''], + 'kawaiikawaii': ['https://kawaiikawaii.jp', ''], + 'befreebe': ['https://befreebe.com', ''], + 'muku': ['https://muku.tv', ''], + 'attackers': ['https://attackers.net', ''], + 'mko-labo': ['https://mko-labo.net', ''], + 'dasdas': ['https://dasdas.jp', ''], + 'mvg': ['https://mvg.jp', ''], + 'opera': ['https://av-opera.jp', ''], + 'oppai': ['https://oppai-av.com', ''], + 'v-av': ['https://v-av.com', ''], + 'to-satsu': ['https://to-satsu.com', ''], + 'bibian': ['https://bibian-av.com', ''], + 'honnaka': ['https://honnaka.jp', ''], + 'rookie': ['https://rookie-av.jp', ''], + 'nanpa': ['https://nanpa-japan.jp', ''], + 'hajimekikaku': ['https://hajimekikaku.com', ''], + 'hhh-av': ['https://hhh-av.com', ''] + } + for website in config.SUPPORTED_WEBSITES: if hasattr(config, f"{website}_website"): signal.show_net_info(f" ⚠️{website} 使用自定义网址:{getattr(config, f'{website}_website')}") @@ -2180,8 +2124,7 @@ def network_check(self): except: if signal.stop: signal.show_net_info('\n⛔️ 当前有刮削任务正在停止中,请等待刮削停止后再进行检测!') - signal.show_net_info( - "================================================================================\n") + signal.show_net_info("================================================================================\n") self.Ui.pushButton_check_net.setEnabled(True) self.Ui.pushButton_check_net.setText('开始检测') self.Ui.pushButton_check_net.setStyleSheet( @@ -2317,9 +2260,7 @@ def _check_javbus_cookie(self): new_cookie = {'cookie': input_cookie} cookies = config.javbus headers_o = config.headers - headers = { - 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6', - } + headers = {'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6', } headers.update(headers_o) javbus_url = getattr(config, 'javbus_website', 'https://javbus.com') + '/FSDSS-660' @@ -2473,11 +2414,9 @@ def auto_scrape(self): time.sleep(0.1) timed_interval = config.timed_interval self.atuo_scrape_count += 1 - signal.show_log_text( - f'\n\n 🍔 已启用「循环刮削」!间隔时间:{timed_interval}!即将开始第 {self.atuo_scrape_count} 次循环刮削!') + signal.show_log_text(f'\n\n 🍔 已启用「循环刮削」!间隔时间:{timed_interval}!即将开始第 {self.atuo_scrape_count} 次循环刮削!') if Flags.scrape_start_time: - signal.show_log_text( - ' ⏰ 上次刮削时间: ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(Flags.scrape_start_time))) + signal.show_log_text(' ⏰ 上次刮削时间: ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(Flags.scrape_start_time))) start_new_scrape(FileMode.Default) def auto_start(self): diff --git a/src/controllers/main_window/save_config.py b/src/controllers/main_window/save_config.py index fcf3f28..a005841 100644 --- a/src/controllers/main_window/save_config.py +++ b/src/controllers/main_window/save_config.py @@ -924,13 +924,12 @@ def save_config(self): scrape_like_text += " · 软连接开" elif config.soft_link == 2: scrape_like_text += " · 硬连接开" - signal.show_log_text( - f' 🛠 当前配置:{config.path} 保存完成!\n ' - f'📂 程序目录:{get_main_path()} \n ' - f'📂 刮削目录:{get_movie_path_setting()[0]} \n ' - f'💠 刮削模式:{Flags.main_mode_text} · {scrape_like_text} \n ' - f'🖥️ 系统信息:{platform.platform()} \n ' - f'🐰 软件版本:{self.localversion} \n') + signal.show_log_text(f' 🛠 当前配置:{config.path} 保存完成!\n ' + f'📂 程序目录:{get_main_path()} \n ' + f'📂 刮削目录:{get_movie_path_setting()[0]} \n ' + f'💠 刮削模式:{Flags.main_mode_text} · {scrape_like_text} \n ' + f'🖥️ 系统信息:{platform.platform()} \n ' + f'🐰 软件版本:{self.localversion} \n') except: signal.show_traceback_log(traceback.format_exc()) try: diff --git a/src/controllers/main_window/style.py b/src/controllers/main_window/style.py index e4fae6e..5cc5a4f 100644 --- a/src/controllers/main_window/style.py +++ b/src/controllers/main_window/style.py @@ -4,8 +4,7 @@ def set_style(self): return # 控件美化 左侧栏样式 - self.Ui.widget_setting.setStyleSheet( - f''' + self.Ui.widget_setting.setStyleSheet(f''' QWidget#widget_setting{{ background: #F5F5F6; border-top-left-radius: {self.window_radius}px; @@ -26,11 +25,9 @@ def set_style(self): color: rgba(20, 20, 20, 250); border: 0px solid rgba(255, 255, 255, 80); }} - ''' - ) + ''') # 主界面 - self.Ui.page_main.setStyleSheet( - ''' + self.Ui.page_main.setStyleSheet(''' QLabel#label_number1,#label_actor1,#label_title1,#label_poster1,#label_number,#label_actor,#label_title,#label_poster1{ font-size: 16px; font-weight: bold; @@ -55,8 +52,7 @@ def set_style(self): } ''') # 工具页 - self.Ui.page_tool.setStyleSheet( - ''' + self.Ui.page_tool.setStyleSheet(''' * { font-size: 13px; } @@ -86,11 +82,9 @@ def set_style(self): background-color: rgba(245,245,246,220); border-radius: 10px; } - ''' - ) + ''') # 使用帮助页 - self.Ui.page_about.setStyleSheet( - ''' + self.Ui.page_about.setStyleSheet(''' * { font-size: 13px; } @@ -103,8 +97,7 @@ def set_style(self): } ''') # 设置页 - self.Ui.page_setting.setStyleSheet( - ''' + self.Ui.page_setting.setStyleSheet(''' * { font-size:13px; } @@ -170,11 +163,9 @@ def set_style(self): background-color: rgba(245,245,246,220); border-radius: 10px; } - ''' - ) + ''') # 整个页面 - self.Ui.centralwidget.setStyleSheet( - f''' + self.Ui.centralwidget.setStyleSheet(f''' * {{ font-family: Consolas, 'PingFang SC', 'Microsoft YaHei UI', 'Noto Color Emoji', 'Segoe UI Emoji'; font-size:13px; @@ -312,14 +303,12 @@ def set_style(self): width: 3px; /*区块宽度*/ margin: 0px; }} - ''' - ) + ''') def set_dark_style(self): # 控件美化 左侧栏样式 暗黑模式 - self.Ui.widget_setting.setStyleSheet( - f''' + self.Ui.widget_setting.setStyleSheet(f''' QWidget#widget_setting{{ background: #1F272F; border-top-left-radius: {self.window_radius}px; @@ -340,11 +329,9 @@ def set_dark_style(self): color: rgba(210, 210, 210, 250); border: 0px solid rgba(255, 255, 255, 80); }} - ''' - ) + ''') # 主界面 - self.Ui.page_main.setStyleSheet( - ''' + self.Ui.page_main.setStyleSheet(''' QLabel#label_number1,#label_actor1,#label_title1,#label_poster1,#label_number,#label_actor,#label_title,#label_poster1{ font-size: 16px; font-weight: bold; @@ -369,8 +356,7 @@ def set_dark_style(self): } ''') # 工具页 - self.Ui.page_tool.setStyleSheet( - ''' + self.Ui.page_tool.setStyleSheet(''' * { font-size: 13px; } @@ -392,11 +378,9 @@ def set_dark_style(self): background-color: rgba(180, 180, 180, 20); border-radius: 10px; } - ''' - ) + ''') # 使用帮助页 - self.Ui.page_about.setStyleSheet( - ''' + self.Ui.page_about.setStyleSheet(''' * { font-size: 13px; } @@ -409,8 +393,7 @@ def set_dark_style(self): } ''') # 设置页 - self.Ui.page_setting.setStyleSheet( - ''' + self.Ui.page_setting.setStyleSheet(''' * { font-size:13px; } @@ -475,11 +458,9 @@ def set_dark_style(self): QPushButton#pushButton_scrape_note,#pushButton_field_tips_website,#pushButton_field_tips_nfo,#pushButton_check_javdb_cookie{ color: black; } - ''' - ) + ''') # 整个页面 - self.Ui.centralwidget.setStyleSheet( - f''' + self.Ui.centralwidget.setStyleSheet(f''' * {{ font-family: Consolas, 'PingFang SC', 'Microsoft YaHei UI', 'Noto Color Emoji', 'Segoe UI Emoji'; font-size:13px; @@ -658,5 +639,4 @@ def set_dark_style(self): width: 3px; /*区块宽度*/ margin: 0px; }} - ''' - ) + ''') diff --git a/src/models/base/image.py b/src/models/base/image.py index 2c39b04..c593e15 100644 --- a/src/models/base/image.py +++ b/src/models/base/image.py @@ -36,7 +36,7 @@ def get_pixmap(pic_path, poster=True, pic_from=''): else: w = int(220 * pic_width / pic_height) h = 220 - msg = '%s: %s*%s/%sKB' % (pic_from.title(), pic_width, pic_height, pic_file_size) + msg = f'{pic_from.title()}: {pic_width}*{pic_height}/{pic_file_size}KB' return [True, pix, msg, w, h] delete_file(pic_path) if poster: @@ -126,14 +126,11 @@ def cut_thumb_to_poster(json_data, thumb_path, poster_path, image_cut=''): img_new_png.save(poster_path, quality=95, subsampling=0) img.close() if check_pic(poster_path): - json_data['logs'] += "\n 🍀 Poster done! (%s)(%ss)" % ( - json_data['poster_from'], get_used_time(start_time)) + json_data['logs'] += f"\n 🍀 Poster done! ({json_data['poster_from']})({get_used_time(start_time)}s)" return True - json_data['logs'] += '\n 🥺 Poster cut failed! (%s)(%ss)' % ( - json_data['poster_from'], get_used_time(start_time)) + json_data['logs'] += f'\n 🥺 Poster cut failed! ({json_data["poster_from"]})({get_used_time(start_time)}s)' except Exception as e: - json_data['logs'] += '\n 🥺 Poster failed! (%s)(%ss)\n %s' % ( - json_data['poster_from'], get_used_time(start_time), str(e)) + json_data['logs'] += f'\n 🥺 Poster failed! ({json_data["poster_from"]})({get_used_time(start_time)}s)\n {str(e)}' signal.show_traceback_log(traceback.format_exc()) signal.show_log_text(traceback.format_exc()) return False diff --git a/src/models/base/number.py b/src/models/base/number.py index 7fa69fb..cb3d910 100644 --- a/src/models/base/number.py +++ b/src/models/base/number.py @@ -11,10 +11,9 @@ def is_uncensored(number): return True # 无码车牌BT,CT,EMP,CCDV,CWP,CWPBD,DSAM,DRC,DRG,GACHI,heydouga,JAV,LAF,LAFBD,HEYZO,KTG,KP,KG,LLDV,MCDV,MKD,MKBD,MMDV,NIP,PB,PT,QE,RED,RHJ,S2M,SKY,SKYHD,SMD,SSDV,SSKP,TRG,TS,xxx-av,YKB - key_start_word = ['BT-', 'CT-', 'EMP-', 'CCDV-', 'CWP-', 'CWPBD-', 'DSAM-', 'DRC-', 'DRG-', 'GACHI-', 'heydouga', - 'JAV-', 'LAF-', 'LAFBD-', 'HEYZO-', 'KTG-', 'KP-', 'KG-', 'LLDV-', 'MCDV-', 'MKD-', 'MKBD-', - 'MMDV-', 'NIP-', 'PB-', 'PT-', 'QE-', 'RED-', 'RHJ-', 'S2M-', 'SKY-', 'SKYHD-', 'SMD-', 'SSDV-', - 'SSKP-', 'TRG-', 'TS-', 'xxx-av-', 'YKB-', 'bird', 'bouga'] + key_start_word = ['BT-', 'CT-', 'EMP-', 'CCDV-', 'CWP-', 'CWPBD-', 'DSAM-', 'DRC-', 'DRG-', 'GACHI-', 'heydouga', 'JAV-', 'LAF-', 'LAFBD-', 'HEYZO-', 'KTG-', 'KP-', + 'KG-', 'LLDV-', 'MCDV-', 'MKD-', 'MKBD-', 'MMDV-', 'NIP-', 'PB-', 'PT-', 'QE-', 'RED-', 'RHJ-', 'S2M-', 'SKY-', 'SKYHD-', 'SMD-', 'SSDV-', 'SSKP-', + 'TRG-', 'TS-', 'xxx-av-', 'YKB-', 'bird', 'bouga'] for each in key_start_word: if number.upper().startswith(each.upper()): return True @@ -33,8 +32,8 @@ def is_suren(number): def get_number_letters(number): number_upper = number.upper() - if re.search(r'([A-Za-z0-9-\.]{3,})[-_\. ]{1}\d{2}\.\d{2}\.\d{2}', number): - return re.search(r'([A-Za-z0-9-\.]{3,})[-_\. ]{1}\d{2}\.\d{2}\.\d{2}', number)[1] + if re.search(r'([A-Za-z0-9-.]{3,})[-_. ]\d{2}\.\d{2}\.\d{2}', number): + return re.search(r'([A-Za-z0-9-.]{3,})[-_. ]\d{2}\.\d{2}\.\d{2}', number)[1] if number_upper.startswith('FC2'): return 'FC2' if number_upper.startswith('MYWIFE'): @@ -106,10 +105,9 @@ def remove_escape_string(filename, replace_char=''): for string in config.escape_string_list: if string: filename = filename.replace(string.upper(), replace_char) - short_strings = ['4K', '4KS', '8K', 'HD', 'LR', 'VR', 'DVD', 'FULL', 'HEVC', 'H264', 'H265', 'X264', 'X265', 'AAC', - 'XXX', 'PRT'] + short_strings = ['4K', '4KS', '8K', 'HD', 'LR', 'VR', 'DVD', 'FULL', 'HEVC', 'H264', 'H265', 'X264', 'X265', 'AAC', 'XXX', 'PRT'] for each in short_strings: - filename = re.sub(r'[-_ \.\[]%s[-_ \.\]]' % each.upper(), '-', filename) + filename = re.sub(r'[-_ .\[]%s[-_ .\]]' % each.upper(), '-', filename) return filename.replace('--', '-').strip('-_ .') @@ -120,12 +118,7 @@ def get_file_number(filepath): file_name = remove_escape_string(real_name) + '.' # 替换cd_part、EP、-C - filename = (file_name. - replace('-C.', '.'). - replace('.PART', '-CD'). - replace('-PART', '-CD'). - replace(' EP.', '.EP'). - replace('-CD-', '')) + filename = (file_name.replace('-C.', '.').replace('.PART', '-CD').replace('-PART', '-CD').replace(' EP.', '.EP').replace('-CD-', '')) # 去除分集 filename = re.sub(r'[-_ .]CD\d{1,2}', '', filename) # xxx-CD1.mp4 @@ -138,10 +131,7 @@ def get_file_number(filepath): filename = re.sub(r"[-\[]\d{2}[-_.]\d{2}[-_.]\d{2}]?", "", filename) # 去除文件名中时间 # 转换番号 - filename = (filename.replace('FC2-PPV', 'FC2-'). - replace('FC2PPV', 'FC2-'). - replace('--', '-'). - replace('GACHIPPV', 'GACHI')) + filename = (filename.replace('FC2-PPV', 'FC2-').replace('FC2PPV', 'FC2-').replace('--', '-').replace('GACHIPPV', 'GACHI')) # 提取番号 if 'MYWIFE' in filename and re.search(r'NO\.\d*', filename): # 提取 mywife No.1111 @@ -156,13 +146,12 @@ def get_file_number(filepath): file_number = re.search(r'MMR-?[A-Z]{2,}-?\d+[A-Z]*', filename).group() return file_number.replace('MMR-', 'MMR') - elif re.search(r'([^A-Z]|^)(MD[A-Z-]*\d{4,}(-\d{1})?)', file_name) and 'MDVR' not in file_name: # 提取番号 md-0165-1 - file_number = re.search(r'([^A-Z]|^)(MD[A-Z-]*\d{4,}(-\d{1})?)', file_name).group(2) + elif re.search(r'([^A-Z]|^)(MD[A-Z-]*\d{4,}(-\d)?)', file_name) and 'MDVR' not in file_name: # 提取番号 md-0165-1 + file_number = re.search(r'([^A-Z]|^)(MD[A-Z-]*\d{4,}(-\d)?)', file_name).group(2) return file_number - elif re.findall(r'([A-Z0-9_]{2,})[-\.]{1}2?0?(\d{2}[-\.]\d{2}[-\.]\d{2})', - oumei_filename): # 提取欧美番号 sexart.11.11.11 - result = re.findall(r'([A-Z0-9-]{2,})[-_\.]{1}2?0?(\d{2}[-\.]\d{2}[-\.]\d{2})', oumei_filename) + elif re.findall(r'([A-Z0-9_]{2,})[-.]2?0?(\d{2}[-.]\d{2}[-.]\d{2})', oumei_filename): # 提取欧美番号 sexart.11.11.11 + result = re.findall(r'([A-Z0-9-]{2,})[-_.]2?0?(\d{2}[-.]\d{2}[-.]\d{2})', oumei_filename) return (long_name(result[0][0].strip('-')) + '.' + result[0][1].replace('-', '.')).capitalize() elif re.search(r'XXX-AV-\d{4,}', filename): # 提取xxx-av-11111 @@ -246,7 +235,7 @@ def get_file_number(filepath): file_number = temp[0] + '-' + temp[1] else: - temp_name = re.sub(r'[【((\[].+?[\]))】]', '', file_name).strip('@. ') # 去除[] + temp_name = re.sub(r'[【((\[].+?[]))】]', '', file_name).strip('@. ') # 去除[] temp_name = unicodedata.normalize('NFC', temp_name) # Mac 把会拆成两个字符,即 NFD,而网页请求使用的是 NFC try: temp_name = temp_name.encode('cp932').decode('shift_jis') # 转换为常见日文,比如~ 转换成 〜 diff --git a/src/models/base/path.py b/src/models/base/path.py index b846e0f..79ce90e 100644 --- a/src/models/base/path.py +++ b/src/models/base/path.py @@ -6,8 +6,8 @@ import traceback from os.path import abspath, dirname, realpath -from models.signals import signal from models.config.config import config +from models.signals import signal def get_main_path(): diff --git a/src/models/base/utils.py b/src/models/base/utils.py index c6ecc26..bc70967 100644 --- a/src/models/base/utils.py +++ b/src/models/base/utils.py @@ -79,11 +79,10 @@ def kill_a_thread(t): def get_user_agent(): - temp_l = random.randint(109, 111) + temp_l = random.randint(109, 129) temp_m = random.randint(1, 5563) temp_n = random.randint(1, 180) - return 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s.0.%s.%s Safari/537.36' % ( - temp_l, temp_m, temp_n) + return f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{temp_l}.0.{temp_m}.{temp_n} Safari/537.36' def convert_path(path): diff --git a/src/models/base/web.py b/src/models/base/web.py index dec9883..d9b3e07 100644 --- a/src/models/base/web.py +++ b/src/models/base/web.py @@ -15,9 +15,8 @@ import urllib3.util.connection as urllib3_cn from PIL import Image from ping3 import ping -from requests.exceptions import ChunkedEncodingError, ConnectTimeout, ConnectionError, ContentDecodingError, HTTPError, \ - InvalidHeader, InvalidProxyURL, InvalidURL, ProxyError, ReadTimeout, SSLError, StreamConsumedError, Timeout, \ - TooManyRedirects, URLRequired +from requests.exceptions import ChunkedEncodingError, ConnectTimeout, ConnectionError, ContentDecodingError, HTTPError, InvalidHeader, InvalidProxyURL, InvalidURL, \ + ProxyError, ReadTimeout, SSLError, StreamConsumedError, Timeout, TooManyRedirects, URLRequired from models.base.utils import get_user_agent, singleton from models.config.config import config @@ -51,57 +50,53 @@ def __init__(self): self.pool = ThreadPoolExecutor(32) self.curl_session = curl_cffi.requests.Session(max_redirects=10) - def get_html(self, url: str, headers=None, cookies=None, proxies=True, allow_redirects=True, json_data=False, + def get_html(self, + url: str, + headers=None, + cookies=None, + proxies=True, + allow_redirects=True, + json_data=False, content=False, - res=False, keep=True, timeout=False, encoding='utf-8', back_cookie=False): + res=False, + keep=True, + timeout=False, + encoding='utf-8', + back_cookie=False): # 获取代理信息 retry_times = config.retry if proxies: proxies = config.proxies else: - proxies = { - "http": None, - "https": None, - } + proxies = {"http": None, "https": None, } if not headers: headers = config.headers if not timeout: timeout = config.timeout if 'getchu' in url: - headers_o = { - 'Referer': 'http://www.getchu.com/top.html', - } + headers_o = {'Referer': 'http://www.getchu.com/top.html', } headers.update(headers_o) elif 'xcity' in url: - headers_o = { - 'referer': 'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=2&sg=main&num=60', - } + headers_o = {'referer': 'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=2&sg=main&num=60', } headers.update(headers_o) # javbus封面图需携带refer,refer似乎没有做强校验,但须符合格式要求,否则403 elif 'javbus' in url: - headers_o = { - 'Referer': 'https://www.javbus.com/', - } + headers_o = {'Referer': 'https://www.javbus.com/', } headers.update(headers_o) elif 'giga' in url: # 搜索时需要携带refer,获取cookies时不能携带 giga_refer = '' if 'cookie_set.php' in url else 'https://www.giga-web.jp/top.html' - headers_o = { - 'Referer': giga_refer, - } + headers_o = {'Referer': giga_refer, } headers.update(headers_o) signal.add_log(f'🔎 请求 {url}') for i in range(int(retry_times)): try: if keep: - response = self.session_g.get(url, headers=headers, cookies=cookies, proxies=proxies, - timeout=timeout, - verify=False, allow_redirects=allow_redirects) + response = self.session_g.get(url, headers=headers, cookies=cookies, proxies=proxies, timeout=timeout, verify=False, allow_redirects=allow_redirects) else: - response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, timeout=timeout, - verify=False, allow_redirects=allow_redirects) + response = requests.get(url, headers=headers, cookies=cookies, proxies=proxies, timeout=timeout, verify=False, allow_redirects=allow_redirects) # print(response.headers.items()) # print(response.status_code, url) _header = response.headers @@ -130,8 +125,7 @@ def get_html(self, url: str, headers=None, cookies=None, proxies=True, allow_red signal.add_log(f"🔴 请求失败!{error_info}") return False, error_info - def post_html(self, url: str, data=None, json=None, headers=None, cookies=None, proxies=True, json_data=False, - keep=True): + def post_html(self, url: str, data=None, json=None, headers=None, cookies=None, proxies=True, json_data=False, keep=True): # 获取代理信息 timeout = config.timeout retry_times = config.retry @@ -140,20 +134,15 @@ def post_html(self, url: str, data=None, json=None, headers=None, cookies=None, if proxies: proxies = config.proxies else: - proxies = { - "http": None, - "https": None, - } + proxies = {"http": None, "https": None, } signal.add_log(f'🔎 POST请求 {url}') for i in range(int(retry_times)): try: if keep: - response = self.session_g.post(url=url, data=data, json=json, headers=headers, cookies=cookies, - proxies=proxies, timeout=timeout, verify=False) + response = self.session_g.post(url=url, data=data, json=json, headers=headers, cookies=cookies, proxies=proxies, timeout=timeout, verify=False) else: - response = requests.post(url=url, data=data, json=json, headers=headers, cookies=cookies, - proxies=proxies, timeout=timeout, verify=False) + response = requests.post(url=url, data=data, json=json, headers=headers, cookies=cookies, proxies=proxies, timeout=timeout, verify=False) if response.status_code > 299: error_info = f"{response.status_code} {url}" signal.add_log('🔴 重试 [%s/%s] %s' % (i + 1, retry_times, error_info)) @@ -287,20 +276,17 @@ def _start_download(self, task) -> bool: _headers['Range'] = f'bytes={start}-{end}' for _ in range(int(retry_times)): try: - response = self.session_g.get(url, headers=_headers, proxies=proxies, timeout=timeout, verify=False, - stream=True) + response = self.session_g.get(url, headers=_headers, proxies=proxies, timeout=timeout, verify=False, stream=True) chunk_size = 128 chunks = [] for chunk in response.iter_content(chunk_size=chunk_size): - chunks.append(chunk) - # bar.update(chunk_size) + chunks.append(chunk) # bar.update(chunk_size) self.lock.acquire() with open(file_path, "rb+") as fp: fp.seek(start) for chunk in chunks: fp.write(chunk) self.lock.release() - # 释放锁 del chunks return True except: @@ -316,16 +302,12 @@ def curl_html(self, url, headers=None, proxies=True, cookies=None): if proxies: proxies = config.proxies else: - proxies = { - "http": None, - "https": None, - } + proxies = {"http": None, "https": None, } signal.add_log(f'🔎 请求 {url}') for i in range(int(retry_times)): try: - response = self.curl_session.get(url_encode(url), headers=headers, cookies=cookies, proxies=proxies, - impersonate="chrome120") + response = self.curl_session.get(url_encode(url), headers=headers, cookies=cookies, proxies=proxies, impersonate="chrome120") if 'amazon' in url: response.encoding = 'Shift_JIS' else: @@ -377,22 +359,17 @@ def check_url(url, length=False, real_url=False): return 0 if 'getchu' in url: - headers_o = { - 'Referer': 'http://www.getchu.com/top.html', - } + headers_o = {'Referer': 'http://www.getchu.com/top.html', } headers.update(headers_o) # javbus封面图需携带refer,refer似乎没有做强校验,但须符合格式要求,否则403 elif 'javbus' in url: - headers_o = { - 'Referer': 'https://www.javbus.com/', - } + headers_o = {'Referer': 'https://www.javbus.com/', } headers.update(headers_o) for j in range(retry_times): try: - r = requests.head(url, headers=headers, proxies=proxies, timeout=timeout, verify=False, - allow_redirects=True) - + r = requests.head(url, headers=headers, proxies=proxies, timeout=timeout, verify=False, allow_redirects=True) + # 不输出获取 dmm预览视频(trailer) 最高分辨率的测试结果到日志中 # get_dmm_trailer() 函数在多条错误的链接中找最高分辨率的链接,错误没有必要输出,避免误解为网络或软件问题 if r.status_code == 404 and '_w.mp4' in url: @@ -435,8 +412,7 @@ def check_url(url, length=False, real_url=False): # 获取文件大小。如果没有获取到文件大小,尝试下载15k数据,如果失败,视为不可用 content_length = r.headers.get('Content-Length') if not content_length: - response = requests.get(true_url, headers=headers, proxies=proxies, timeout=timeout, verify=False, - stream=True) + response = requests.get(true_url, headers=headers, proxies=proxies, timeout=timeout, verify=False, stream=True) i = 0 chunk_size = 5120 for _ in response.iter_content(chunk_size): @@ -507,11 +483,7 @@ def get_amazon_data(req_url): """ 获取 Amazon 数据,修改地区为540-0002 """ - headers = { - "accept-encoding": "gzip, deflate, br", - 'Host': 'www.amazon.co.jp', - 'User-Agent': get_user_agent(), - } + headers = {"accept-encoding": "gzip, deflate, br", 'Host': 'www.amazon.co.jp', 'User-Agent': get_user_agent(), } try: result, html_info = curl_html(req_url) except: @@ -522,18 +494,13 @@ def get_amazon_data(req_url): session_id = x[0] if x := re.findall(r'ubid-acbjp=([^ ]+)', html_info): ubid_acbjp = x[0] - headers_o = { - 'cookie': f'session-id={session_id}; ubid_acbjp={ubid_acbjp}', - } + headers_o = {'cookie': f'session-id={session_id}; ubid_acbjp={ubid_acbjp}', } headers.update(headers_o) result, html_info = curl_html(req_url, headers=headers) if not result: if '503 http' in html_info: - headers = { - 'Host': 'www.amazon.co.jp', - 'User-Agent': get_user_agent(), - } + headers = {'Host': 'www.amazon.co.jp', 'User-Agent': get_user_agent(), } result, html_info = get_html(req_url, headers=headers, keep=False, back_cookie=True) if not result: @@ -553,10 +520,7 @@ def get_amazon_data(req_url): ubid_acbjp = re.findall(r'ubid-acbjp=([^ ]+)', str(result))[0] except: pass - headers_o = { - 'Anti-csrftoken-a2z': anti_csrftoken_a2z, - 'cookie': f'session-id={session_id}; ubid_acbjp={ubid_acbjp}', - } + headers_o = {'Anti-csrftoken-a2z': anti_csrftoken_a2z, 'cookie': f'session-id={session_id}; ubid_acbjp={ubid_acbjp}', } headers.update(headers_o) mid_url = 'https://www.amazon.co.jp/portal-migration/hz/glow/get-rendered-toaster' \ '?pageType=Search&aisTransitionState=in&rancorLocationSource=REALM_DEFAULT&_=' @@ -576,15 +540,11 @@ def get_amazon_data(req_url): } headers.update(headers_o) post_url = 'https://www.amazon.co.jp/portal-migration/hz/glow/address-change?actionSource=glow' - data = {"locationType": "LOCATION_INPUT", "zipCode": "540-0002", "storeContext": "generic", - "deviceType": "web", "pageType": "Search", "actionSource": "glow"} + data = {"locationType": "LOCATION_INPUT", "zipCode": "540-0002", "storeContext": "generic", "deviceType": "web", "pageType": "Search", "actionSource": "glow"} result, html = post_html(post_url, json=data, headers=headers) if result: if '540-0002' in str(html): - headers = { - 'Host': 'www.amazon.co.jp', - 'User-Agent': get_user_agent(), - } + headers = {'Host': 'www.amazon.co.jp', 'User-Agent': get_user_agent(), } result, html_info = curl_html(req_url, headers=headers) else: print('Amazon 修改地区失败: ', req_url, str(result), str(html)) @@ -600,16 +560,13 @@ def get_amazon_data(req_url): if "__main__" == __name__: # 测试下载文件 - list1 = [ - 'https://issuecdn.baidupcs.com/issue/netdisk/yunguanjia/BaiduNetdisk_7.2.8.9.exe', - 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw015/118abw015_mhb_w.mp4', - 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00016/118abw00016_mhb_w.mp4', - 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00017/118abw00017_mhb_w.mp4', - 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00018/118abw00018_mhb_w.mp4', - 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00019/118abw00019_mhb_w.mp4', - 'https://www.prestige-av.com/images/corner/goods/prestige/tktabw/018/pb_tktabw-018.jpg', - 'https://iqq1.one/preview/80/b/3SBqI8OjheI-800.jpg?v=1636404497', - ] + list1 = ['https://issuecdn.baidupcs.com/issue/netdisk/yunguanjia/BaiduNetdisk_7.2.8.9.exe', + 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw015/118abw015_mhb_w.mp4', + 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00016/118abw00016_mhb_w.mp4', + 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00017/118abw00017_mhb_w.mp4', + 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00018/118abw00018_mhb_w.mp4', + 'https://cc3001.dmm.co.jp/litevideo/freepv/1/118/118abw00019/118abw00019_mhb_w.mp4', + 'https://www.prestige-av.com/images/corner/goods/prestige/tktabw/018/pb_tktabw-018.jpg', 'https://iqq1.one/preview/80/b/3SBqI8OjheI-800.jpg?v=1636404497', ] for each in list1: url = each file_path = each.split('/')[-1] @@ -705,8 +662,7 @@ def ping_host(host_address): for i in range(count): thread_list[i].join() new_list = [each for each in result_list if each] - return f' ⏱ Ping {int(sum(new_list) / len(new_list))} ms ({len(new_list)}/{count})' \ - if new_list else f' 🔴 Ping - ms (0/{count})' + return f' ⏱ Ping {int(sum(new_list) / len(new_list))} ms ({len(new_list)}/{count})' if new_list else f' 🔴 Ping - ms (0/{count})' def check_version(): @@ -729,12 +685,7 @@ def check_theporndb_api_token(): timeout = config.timeout api_token = config.theporndb_api_token url = 'https://api.theporndb.net/scenes/hash/8679fcbdd29fa735' - headers = { - 'Authorization': f'Bearer {api_token}', - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'User-Agent': get_user_agent(), - } + headers = {'Authorization': f'Bearer {api_token}', 'Content-Type': 'application/json', 'Accept': 'application/json', 'User-Agent': get_user_agent(), } if not api_token: tips = '❌ 未填写 API Token,影响欧美刮削!可在「设置」-「网络」添加!' else: @@ -816,14 +767,12 @@ def _get_pic_by_google(pic_url): def get_big_pic_by_google(pic_url, poster=False): url, pic_size, big_pic = _get_pic_by_google(pic_url) if not poster: - if big_pic or ( - pic_size and int(pic_size[0]) > 800 and int(pic_size[1]) > 539): # cover 有大图时或者图片高度 > 800 时使用该图片 + if big_pic or (pic_size and int(pic_size[0]) > 800 and int(pic_size[1]) > 539): # cover 有大图时或者图片高度 > 800 时使用该图片 return url, pic_size return '', '' if url and int(pic_size[1]) < 1000: # poster,图片高度小于 1500,重新搜索一次 url, pic_size, big_pic = _get_pic_by_google(url) - if pic_size and (big_pic or 'blogger.googleusercontent.com' in url or int( - pic_size[1]) > 560): # poster,大图或高度 > 560 时,使用该图片 + if pic_size and (big_pic or 'blogger.googleusercontent.com' in url or int(pic_size[1]) > 560): # poster,大图或高度 > 560 时,使用该图片 return url, pic_size else: return '', '' diff --git a/src/models/config/config.py b/src/models/config/config.py index 5bde580..085c4db 100644 --- a/src/models/config/config.py +++ b/src/models/config/config.py @@ -1,12 +1,11 @@ import os import os.path import platform -import random import re import time from configparser import RawConfigParser -from models.base.utils import singleton +from models.base.utils import get_user_agent, singleton from models.config.config_generated import GeneratedConfig from models.config.config_manual import ManualConfig @@ -323,15 +322,9 @@ def init_config(self): def update_config(self): # 获取proxies if self.type == 'http': - self.proxies = { - "http": "http://" + self.proxy, - "https": "http://" + self.proxy, - } + self.proxies = {"http": "http://" + self.proxy, "https": "http://" + self.proxy, } elif self.type == 'socks5': - self.proxies = { - "http": "socks5h://" + self.proxy, - "https": "socks5h://" + self.proxy, - } + self.proxies = {"http": "socks5h://" + self.proxy, "https": "socks5h://" + self.proxy, } else: self.proxies = None @@ -339,13 +332,7 @@ def update_config(self): self.theporndb_no_hash = 'theporndb_no_hash' in self.switch_on # 获取User-Agent - temp_l = random.randint(110, 117) - temp_m = random.randint(1, 5563) - temp_n = random.randint(1, 180) - self.headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s.0.%s.%s Safari/537.36' % ( - temp_l, temp_m, temp_n), - } + self.headers = {'User-Agent': get_user_agent(), } # 去掉^符号!!! self.cnword_style = self.cnword_style.strip('^') @@ -363,17 +350,13 @@ def update_config(self): # 是否清理文件以及清理列表 can_clean = True if 'i_know' in self.clean_enable and 'i_agree' in self.clean_enable else False can_clean_auto = True if can_clean and 'clean_auto' in self.clean_enable else False - clean_ext_list = re.split(r'[||,,]', self.clean_ext) \ - if can_clean and self.clean_ext and 'clean_ext' in self.clean_enable else [] - clean_name_list = re.split(r'[||,,]', self.clean_name) \ - if can_clean and self.clean_name and 'clean_name' in self.clean_enable else [] - clean_contains_list = re.split(r'[||,,]', self.clean_contains) \ - if can_clean and self.clean_contains and 'clean_contains' in self.clean_enable else [] + clean_ext_list = re.split(r'[||,,]', self.clean_ext) if can_clean and self.clean_ext and 'clean_ext' in self.clean_enable else [] + clean_name_list = re.split(r'[||,,]', self.clean_name) if can_clean and self.clean_name and 'clean_name' in self.clean_enable else [] + clean_contains_list = re.split(r'[||,,]', self.clean_contains) if can_clean and self.clean_contains and 'clean_contains' in self.clean_enable else [] clean_size_list = self.clean_size if can_clean and 'clean_size' in self.clean_enable else '' - clean_ignore_ext_list = re.split(r'[||,,]', self.clean_ignore_ext) \ - if can_clean and self.clean_ignore_ext and 'clean_ignore_ext' in self.clean_enable else [] - clean_ignore_contains_list = re.split(r'[||,,]', self.clean_ignore_contains) \ - if can_clean and self.clean_ignore_contains and 'clean_ignore_contains' in self.clean_enable else [] + clean_ignore_ext_list = re.split(r'[||,,]', self.clean_ignore_ext) if can_clean and self.clean_ignore_ext and 'clean_ignore_ext' in self.clean_enable else [] + clean_ignore_contains_list = re.split(r'[||,,]', + self.clean_ignore_contains) if can_clean and self.clean_ignore_contains and 'clean_ignore_contains' in self.clean_enable else [] self.can_clean = can_clean self.can_clean_auto = can_clean_auto self.clean_ext_list = clean_ext_list diff --git a/src/models/config/config_manual.py b/src/models/config/config_manual.py index d033308..9919926 100644 --- a/src/models/config/config_manual.py +++ b/src/models/config/config_manual.py @@ -3,78 +3,17 @@ class ManualConfig: local_version = 120240924 # 定义配置值类型 - INT_KEY = [ - 'version', - 'thread_number', - 'thread_time', - 'javdb_time', - 'main_mode', - 'soft_link', - 'success_file_move', - 'failed_file_move', - 'success_file_rename', - 'del_empty_folder', - 'show_poster', - 'folder_name_max', - 'file_name_max', - 'actor_name_max', - 'cd_name', - 'pic_name', - 'trailer_name', - 'use_database', - 'poster_mark', - 'thumb_mark', - 'fanart_mark', - 'mark_size', - 'timeout', - 'retry', - 'rest_count', - 'statement', - 'actor_photo_kodi_auto', - 'auto_link', - ] - FLOAT_KEY = [ - 'file_size', - 'clean_size', + INT_KEY = ['version', 'thread_number', 'thread_time', 'javdb_time', 'main_mode', 'soft_link', 'success_file_move', 'failed_file_move', 'success_file_rename', + 'del_empty_folder', 'show_poster', 'folder_name_max', 'file_name_max', 'actor_name_max', 'cd_name', 'pic_name', 'trailer_name', 'use_database', + 'poster_mark', 'thumb_mark', 'fanart_mark', 'mark_size', 'timeout', 'retry', 'rest_count', 'statement', 'actor_photo_kodi_auto', 'auto_link', ] + FLOAT_KEY = ['file_size', 'clean_size', - ] + ] # 支持的网站, 用于支持自定义域名 - SUPPORTED_WEBSITES = [ - '7mmtv', - 'airav', - 'airav_cc', - 'avsex', - 'avsox', - 'cnmdb', - 'dahlia', - 'dmm', - 'faleno', - 'fantastica', - 'fc2', - 'fc2club', - 'fc2hub', - 'freejavbt', - 'getchu', - 'giga', - 'iqqtv', - 'jav321', - 'javday', - 'javbus', - 'javdb', - 'javlibrary', - 'kin8', - 'love6', - 'lulubar', - 'madouqu', - 'mdtv', - 'hscangku', - 'cableav', - 'mgstage', - 'mywife', - 'prestige', - 'theporndb', - 'xcity'] + SUPPORTED_WEBSITES = ['7mmtv', 'airav', 'airav_cc', 'avsex', 'avsox', 'cnmdb', 'dahlia', 'dmm', 'faleno', 'fantastica', 'fc2', 'fc2club', 'fc2hub', 'freejavbt', + 'getchu', 'giga', 'iqqtv', 'jav321', 'javday', 'javbus', 'javdb', 'javlibrary', 'kin8', 'love6', 'lulubar', 'madouqu', 'mdtv', 'hscangku', + 'cableav', 'mgstage', 'mywife', 'prestige', 'theporndb', 'xcity'] # 刮削过程所需预设值 oumei_name = { @@ -371,8 +310,7 @@ class ManualConfig: 'zb': 'ZoliBoy', } official = { - 'https://s1s1s1.com': 'sivr|ssis|ssni|snis|soe|oned|one|onsd|ofje|sps|tksoe', - # https://s1s1s1.com/search/list?keyword=soe + 'https://s1s1s1.com': 'sivr|ssis|ssni|snis|soe|oned|one|onsd|ofje|sps|tksoe', # https://s1s1s1.com/search/list?keyword=soe 'https://moodyz.com': 'mdvr|midv|mide|midd|mibd|mimk|miid|migd|mifd|miae|miad|miaa|mdl|mdj|mdi|mdg|mdf|mde|mdld|mded|mizd|mird|mdjd|rmid|mdid|mdmd|mimu|mdpd|mivd|mdud|mdgd|mdvd|mias|miqd|mint|rmpd|mdrd|tkmide|tkmidd|kmide|tkmigd|mdfd|rmwd|miab', 'https://www.madonna-av.com': 'juvr|jusd|juq|juy|jux|jul|juk|juc|jukd|jusd|oba|jufd|roeb|roe|ure|mdon|jfb|obe|jums', 'https://www.wanz-factory.com': 'wavr|waaa|bmw|wanz', @@ -403,101 +341,43 @@ class ManualConfig: 'https://hhh-av.com': 'huntb|hunta|hunt|hunbl|royd|tysf', 'https://www.prestige-av.com': 'abp|mbm|ezd|docp|onez|yrh|abw|abs|chn|mgt|tre|edd|ult|cmi|mbd|dnw|sga|rdd|dcx|evo|rdt|ppt|gets|sim|kil|tus|dtt|gnab|man|mas|tbl|rtp|ctd|fiv|dic|esk|kbi|tem|ama|kfne|trd|har|yrz|srs|mzq|zzr|gzap|tgav|rix|aka|bgn|lxv|afs|goal|giro|cpde|nmp|mct|abc|inu|shl|mbms|pxh|nrs|ftn|prdvr|fst|blo|shs|kum|gsx|ndx|atd|dld|kbh|bcv|raw|soud|job|chs|yok|bsd|fsb|nnn|hyk|sor|hsp|jbs|xnd|mei|day|mmy|kzd|jan|gyan|tdt|tok|dms|fnd|cdc|jcn|pvrbst|sdvr|docvr|fcp|abf', } - suren_dic = { - 'SHN-': '116', # 116SHN-045 - 'GANA': '200', # 200GANA-2556 - 'CUTE-': '229', # 229SCUTE-953 - 'LUXU': '259', # 200LUXU-2556 - 'ARA-': '261', # 261ARA-094 - 'DCV-': '277', # 277DCV-102 - 'EWDX': '299', # 299EWDX-400 - 'MAAN': '300', # 300MAAN-673 - 'MIUM': '300', # 300MIUM-745 - 'NTK-': '300', # 300NTK-635 - 'KIRAY-': '314', # 314KIRAY-128 - 'KJO-': '326', # 326KJO-002 - 'NAMA-': '332', # 332NAMA-077 - 'KNB-': '336', # 336KNB-172 - 'SIMM-': '345', # 345SIMM-662 - 'NTR-': '348', # 348NTR-001 - 'JAC-': '390', # 390JAC-034 - 'KIWVR': '408', # 408KIWVR-254 - 'INST': '413', # 413INST-202 - 'SRYA': '417', # 417SRYA-015 - 'SUKE-': '428', # 428SUKE-086 - 'MFC-': '435', # 435MFC-142 - 'HHH-': '451', # 451HHH-027 - 'TEN-': '459', # 459TEN-024 - 'MLA-': '476', # 476MLA-043 - 'SGK-': '483', # 483SGK-054 - 'GCB-': '485', # 485GCB-015 - 'SEI-': '502', # 502SEI-001 - 'STCV': '529', # 529STCV-009 - 'MY-': '292', # 292MY-425 - 'DANDY': '104', # 104DANDY-852A - 'ICHK': '368', # 368ICHK-018 - } - repl_list = [ - 'HEYDOUGA', - 'CARIBBEANCOM', - 'CARIB', - '1PONDO', - '1PON', - 'PACOMA', - 'PACO', - '10MUSUME', - '-10MU', - 'Tokyo Hot', - 'Tokyo_Hot', - 'TOKYO-HOT', - 'TOKYOHOT', - '(S1)', - '[THZU.CC]', - '「麻豆」', - '(', - ')', - '.PRT', - 'MP4-KTR', - 'rarbg', - 'WEBDL', - 'x2160x', - 'x1080x', - 'x2160p', - 'x1080p', - 'x264 aac', - 'x264_aac', - 'x264-aac', - 'x265 aac', - 'x265_aac', - 'x265-aac', - 'H.264', - 'H.265', - 'DVDRIP', - 'DVD ', - '2160P', - '1440P', - '1080P', - '960P', - '720P', - '540P', - '480P', - '360P', - '4096x2160', - '1920x1080', - '1280x720', - '960x720', - '640x480', - '4096×2160', - '1920×1080', - '1280×720', - '960×720', - '640×480', - '90fps', - '60fps', - '30fps', - '.cht', - '.chs', - ] + suren_dic = {'SHN-': '116', # 116SHN-045 + 'GANA': '200', # 200GANA-2556 + 'CUTE-': '229', # 229SCUTE-953 + 'LUXU': '259', # 200LUXU-2556 + 'ARA-': '261', # 261ARA-094 + 'DCV-': '277', # 277DCV-102 + 'EWDX': '299', # 299EWDX-400 + 'MAAN': '300', # 300MAAN-673 + 'MIUM': '300', # 300MIUM-745 + 'NTK-': '300', # 300NTK-635 + 'KIRAY-': '314', # 314KIRAY-128 + 'KJO-': '326', # 326KJO-002 + 'NAMA-': '332', # 332NAMA-077 + 'KNB-': '336', # 336KNB-172 + 'SIMM-': '345', # 345SIMM-662 + 'NTR-': '348', # 348NTR-001 + 'JAC-': '390', # 390JAC-034 + 'KIWVR': '408', # 408KIWVR-254 + 'INST': '413', # 413INST-202 + 'SRYA': '417', # 417SRYA-015 + 'SUKE-': '428', # 428SUKE-086 + 'MFC-': '435', # 435MFC-142 + 'HHH-': '451', # 451HHH-027 + 'TEN-': '459', # 459TEN-024 + 'MLA-': '476', # 476MLA-043 + 'SGK-': '483', # 483SGK-054 + 'GCB-': '485', # 485GCB-015 + 'SEI-': '502', # 502SEI-001 + 'STCV': '529', # 529STCV-009 + 'MY-': '292', # 292MY-425 + 'DANDY': '104', # 104DANDY-852A + 'ICHK': '368', # 368ICHK-018 + } + repl_list = ['HEYDOUGA', 'CARIBBEANCOM', 'CARIB', '1PONDO', '1PON', 'PACOMA', 'PACO', '10MUSUME', '-10MU', 'Tokyo Hot', 'Tokyo_Hot', 'TOKYO-HOT', 'TOKYOHOT', '(S1)', + '[THZU.CC]', '「麻豆」', '(', ')', '.PRT', 'MP4-KTR', 'rarbg', 'WEBDL', 'x2160x', 'x1080x', 'x2160p', 'x1080p', 'x264 aac', 'x264_aac', 'x264-aac', + 'x265 aac', 'x265_aac', 'x265-aac', 'H.264', 'H.265', 'DVDRIP', 'DVD ', '2160P', '1440P', '1080P', '960P', '720P', '540P', '480P', '360P', '4096x2160', + '1920x1080', '1280x720', '960x720', '640x480', '4096×2160', '1920×1080', '1280×720', '960×720', '640×480', '90fps', '60fps', '30fps', '.cht', '.chs', ] web_dic = { 'airav.io': 'airav_cc', 'airav.wiki': 'airav', @@ -539,181 +419,127 @@ class ManualConfig: 'theporndb': 'theporndb', 'prestige': 'prestige', } - char_list = [ - '[高清] (中文字幕)', - '[高清 (中文字幕)', - ' (中文字幕)', - ' (中文字幕)', - '[高清中文字幕]', - '[高清中文字幕', - '高清中文字幕]', - '【高清中文字幕】', - '[高清]', - '无码流出版', - '无码流出', - '无码破解版', - '无码破解', - 'TOKYO-HOT-', - '韩文转译版', - '独家听译版', - '完整版', - '特别版', - '完全版', - '时间轴修复版', - '导演剪辑最终版', - '堂友', - ] - all_key_word = ['title', 'originaltitle', 'outline', 'originalplot', 'series', 'director', 'studio', - 'publisher'] - all_rep_word = { - '&': '&', # 将网页中存在二次抓取的&(实际意义为&)的字符全局替换成&(大写的&,不会被emby误判,显示更美观) - '<': '<', # 将网页中存在二次抓取的<(实际意义为<)的字符全局替换成< - '>': '>', # 将网页中存在二次抓取的>(实际意义为>)的字符替全局换成> - ''': "'", # 将网页中存在二次抓取的'(实际意义为')的字符全局替换成' - '"': '"', # 将网页中存在二次抓取的"(实际意义为")的字符替全局换成" - '‘': '「', # 将网页中存在二次抓取的‘(实际意义为「)的字符全局替换成「 - '’': '」', # 将网页中存在二次抓取的’(实际意义为」)的字符全局替换成」 - '…': '…', # 将网页中存在二次抓取的…(实际意义为…)的字符全局替换成… - '→': '→', # 将网页中存在二次抓取的→(实际意义为→)的字符全局替换成→ - '
': '', # 将网页中存在的隐藏换行符全局替换成空白 - '&': '&', # 将网页本身抓取到的&全局替换成&(大写的&,不会被emby误判,避免Emby的nfo读取错误,显示更美观) - '—': '—', # 将网页中存在二次抓取的—(实际意义为—破折号)的字符全局替换成—,破折号”—“不等于数字“一” - '<': '<', # 将网页中存在二次抓取的<字符全局替换成<(大写的<,不会被emby误判,避免Emby的nfo读取错误,显示更美观) - '>': '>', # 将网页中存在二次抓取的>字符全局替换成>(大写的>,不会被emby误判,避免Emby的nfo读取错误,显示更美观) - '・': '·', # 将网页本身的・(人名间隔号)全局替换成· - '“': '「', # 将前双引号“全局替换为「,更美观 - '”': '」', # 将后双引号”全局替换为」,更美观 - '...': '…', # 将非标准省略号...全局替换成标准省略号… - '……': '…', # 将并列的两个省略号……全局替换成单个省略号… 解决......替换成……后出现两个省略号 - '’s': "'s", # 将非标准英文单引号的’替换全局为标准英文单引号',避免’s被以下规则替换成」s,例如:love’s替换成love's - '‘': '「', # 将前单引号‘全局替换为「,更美观 - '’': '」', # 将后单引号’全局替换为」,更美观 - ',': ',', # 将英文逗号,全局替换成中文逗号, - '?': '?', # 将英文问号?全局替换成中文问号? - '! ': '!', # 去除感叹号后面不必要的空格 - '!': '!', # 将英文感叹号!全局替换成中文感叹号! - 'A': 'A', # 将全角大写英文替换成半角大写英文 - 'B': 'B', # 将全角大写英文替换成半角大写英文 - 'C': 'C', # 将全角大写英文替换成半角大写英文 - 'D': 'D', # 将全角大写英文替换成半角大写英文 - 'E': 'E', # 将全角大写英文替换成半角大写英文 - 'F': 'F', # 将全角大写英文替换成半角大写英文 - 'G': 'G', # 将全角大写英文替换成半角大写英文 - 'H': 'H', # 将全角大写英文替换成半角大写英文 - 'I': 'I', # 将全角大写英文替换成半角大写英文 - 'J': 'J', # 将全角大写英文替换成半角大写英文 - 'K': 'K', # 将全角大写英文替换成半角大写英文 - 'L': 'L', # 将全角大写英文替换成半角大写英文 - 'M': 'M', # 将全角大写英文替换成半角大写英文 - 'N': 'N', # 将全角大写英文替换成半角大写英文 - 'O': 'O', # 将全角大写英文替换成半角大写英文 - 'P': 'P', # 将全角大写英文替换成半角大写英文 - 'Q': 'Q', # 将全角大写英文替换成半角大写英文 - 'R': 'R', # 将全角大写英文替换成半角大写英文 - 'S': 'S', # 将全角大写英文替换成半角大写英文 - 'T': 'T', # 将全角大写英文替换成半角大写英文 - 'U': 'U', # 将全角大写英文替换成半角大写英文 - 'V': 'V', # 将全角大写英文替换成半角大写英文 - 'W': 'W', # 将全角大写英文替换成半角大写英文 - 'X': 'X', # 将全角大写英文替换成半角大写英文 - 'Y': 'Y', # 将全角大写英文替换成半角大写英文 - 'Z': 'Z', # 将全角大写英文替换成半角大写英文 - 'a': 'a', # 将全角小写英文替换成半角小写英文 - 'b': 'b', # 将全角小写英文替换成半角小写英文 - 'c': 'c', # 将全角小写英文替换成半角小写英文 - 'd': 'd', # 将全角小写英文替换成半角小写英文 - 'e': 'e', # 将全角小写英文替换成半角小写英文 - 'f': 'f', # 将全角小写英文替换成半角小写英文 - 'g': 'g', # 将全角小写英文替换成半角小写英文 - 'h': 'h', # 将全角小写英文替换成半角小写英文 - 'i': 'i', # 将全角小写英文替换成半角小写英文 - 'j': 'j', # 将全角小写英文替换成半角小写英文 - 'k': 'k', # 将全角小写英文替换成半角小写英文 - 'l': 'l', # 将全角小写英文替换成半角小写英文 - 'm': 'm', # 将全角小写英文替换成半角小写英文 - 'n': 'n', # 将全角小写英文替换成半角小写英文 - 'o': 'o', # 将全角小写英文替换成半角小写英文 - 'p': 'p', # 将全角小写英文替换成半角小写英文 - 'q': 'q', # 将全角小写英文替换成半角小写英文 - 'r': 'r', # 将全角小写英文替换成半角小写英文 - 's': 's', # 将全角小写英文替换成半角小写英文 - 't': 't', # 将全角小写英文替换成半角小写英文 - 'u': 'u', # 将全角小写英文替换成半角小写英文 - 'v': 'v', # 将全角小写英文替换成半角小写英文 - 'w': 'w', # 将全角小写英文替换成半角小写英文 - 'x': 'x', # 将全角小写英文替换成半角小写英文 - 'y': 'y', # 将全角小写英文替换成半角小写英文 - 'z': 'z', # 将全角小写英文替换成半角小写英文 - '1': '1', # 将全角数字替换成半角数字 - '2': '2', # 将全角数字替换成半角数字 - '3': '3', # 将全角数字替换成半角数字 - '4': '4', # 将全角数字替换成半角数字 - '5': '5', # 将全角数字替换成半角数字 - '6': '6', # 将全角数字替换成半角数字 - '7': '7', # 将全角数字替换成半角数字 - '8': '8', # 将全角数字替换成半角数字 - '9': '9', # 将全角数字替换成半角数字 - '0': '0', # 将全角数字替换成半角数字 - '\t': ' ', # 将制表符替换为空格 - } - chinese_rep_word = { - '姊': '姐', # 中文简体常见错字全局替换 - '著': '着', # 中文简体常见错字全局替换 - '慾': '欲', # 中文简体常见错字全局替换 - '肏': '操', # 中文简体常见错字全局替换 - '裡': '里', # 中文简体常见错字全局替换 - '係': '系', # 中文简体常见错字全局替换 - '繫': '联', # 中文简体常见错字全局替换 - '豔': '艳', # 中文简体常见错字全局替换 - '妳': '你', # 中文简体常见错字全局替换 - '歳': '岁', # 中文简体常见错字全局替换 - '廿': '二十', # 中文简体常见错字全局替换 - '卅': '三十', # 中文简体常见错字全局替换 - '卌': '四十', # 中文简体常见错字全局替换 - } - title_rep = ['第一集', '第二集', ' - 上', ' - 下', ' 上集', ' 下集', ' -上', ' -下', 'Part.1 (HD)', - '(蓝光碟版)', '(蓝光版)', '(ブルーレイ版)'] - show_key = [ - 'number', - 'letters', - 'has_sub', - 'cd_part', - 'mosaic', - 'title', - 'originaltitle', - 'actor', - 'outline', - 'originalplot', - 'tag', - 'release', - 'year', - 'runtime', - 'score', - 'wanted', - 'series', - 'director', - 'studio', - 'publisher', - 'trailer', - 'website', - 'javdbid' - ] - full_half_char = [ - (u"・", u"·"), (u".", u"."), (u",", u","), (u"!", u"!"), (u"?", u"?"), (u"”", u'"'), (u"’", u"'"), - (u"‘", u"`"), (u"@", u"@"), (u"_", u"_"), (u":", u":"), (u";", u";"), (u"#", u"#"), (u"$", u"$"), - (u"%", u"%"), (u"&", u"&"), (u"(", u"("), (u")", u")"), (u"‐", u"-"), (u"=", u"="), (u"*", u"*"), - (u"+", u"+"), (u"-", u"-"), (u"/", u"/"), (u"<", u"<"), (u">", u">"), (u"[", u"["), (u"¥", u"\\"), - (u"]", u"]"), (u"^", u"^"), (u"{", u"{"), (u"|", u"|"), (u"}", u"}"), (u"~", u"~"), (u"a", u"a"), - (u"b", u"b"), (u"c", u"c"), (u"d", u"d"), (u"e", u"e"), (u"f", u"f"), (u"g", u"g"), (u"h", u"h"), - (u"i", u"i"), (u"j", u"j"), (u"k", u"k"), (u"l", u"l"), (u"m", u"m"), (u"n", u"n"), (u"o", u"o"), - (u"p", u"p"), (u"q", u"q"), (u"r", u"r"), (u"s", u"s"), (u"t", u"t"), (u"u", u"u"), (u"v", u"v"), - (u"w", u"w"), (u"x", u"x"), (u"y", u"y"), (u"z", u"z"), (u"A", u"A"), (u"B", u"B"), (u"C", u"C"), - (u"D", u"D"), (u"E", u"E"), (u"F", u"F"), (u"G", u"G"), (u"H", u"H"), (u"I", u"I"), (u"J", u"J"), - (u"K", u"K"), (u"L", u"L"), (u"M", u"M"), (u"N", u"N"), (u"O", u"O"), (u"P", u"P"), (u"Q", u"Q"), - (u"R", u"R"), (u"S", u"S"), (u"T", u"T"), (u"U", u"U"), (u"V", u"V"), (u"W", u"W"), (u"X", u"X"), - (u"Y", u"Y"), (u"Z", u"Z"), (u"0", u"0"), (u"1", u"1"), (u"2", u"2"), (u"3", u"3"), (u"4", u"4"), - (u"5", u"5"), (u"6", u"6"), (u"7", u"7"), (u"8", u"8"), (u"9", u"9"), (u" ", u" ") - ] + char_list = ['[高清] (中文字幕)', '[高清 (中文字幕)', ' (中文字幕)', ' (中文字幕)', '[高清中文字幕]', '[高清中文字幕', '高清中文字幕]', '【高清中文字幕】', '[高清]', + '无码流出版', '无码流出', '无码破解版', '无码破解', 'TOKYO-HOT-', '韩文转译版', '独家听译版', '完整版', '特别版', '完全版', '时间轴修复版', + '导演剪辑最终版', '堂友', ] + all_key_word = ['title', 'originaltitle', 'outline', 'originalplot', 'series', 'director', 'studio', 'publisher'] + all_rep_word = {'&': '&', # 将网页中存在二次抓取的&(实际意义为&)的字符全局替换成&(大写的&,不会被emby误判,显示更美观) + '<': '<', # 将网页中存在二次抓取的<(实际意义为<)的字符全局替换成< + '>': '>', # 将网页中存在二次抓取的>(实际意义为>)的字符替全局换成> + ''': "'", # 将网页中存在二次抓取的'(实际意义为')的字符全局替换成' + '"': '"', # 将网页中存在二次抓取的"(实际意义为")的字符替全局换成" + '‘': '「', # 将网页中存在二次抓取的‘(实际意义为「)的字符全局替换成「 + '’': '」', # 将网页中存在二次抓取的’(实际意义为」)的字符全局替换成」 + '…': '…', # 将网页中存在二次抓取的…(实际意义为…)的字符全局替换成… + '→': '→', # 将网页中存在二次抓取的→(实际意义为→)的字符全局替换成→ + '
': '', # 将网页中存在的隐藏换行符全局替换成空白 + '&': '&', # 将网页本身抓取到的&全局替换成&(大写的&,不会被emby误判,避免Emby的nfo读取错误,显示更美观) + '—': '—', # 将网页中存在二次抓取的—(实际意义为—破折号)的字符全局替换成—,破折号”—“不等于数字“一” + '<': '<', # 将网页中存在二次抓取的<字符全局替换成<(大写的<,不会被emby误判,避免Emby的nfo读取错误,显示更美观) + '>': '>', # 将网页中存在二次抓取的>字符全局替换成>(大写的>,不会被emby误判,避免Emby的nfo读取错误,显示更美观) + '・': '·', # 将网页本身的・(人名间隔号)全局替换成· + '“': '「', # 将前双引号“全局替换为「,更美观 + '”': '」', # 将后双引号”全局替换为」,更美观 + '...': '…', # 将非标准省略号...全局替换成标准省略号… + '……': '…', # 将并列的两个省略号……全局替换成单个省略号… 解决......替换成……后出现两个省略号 + '’s': "'s", # 将非标准英文单引号的’替换全局为标准英文单引号',避免’s被以下规则替换成」s,例如:love’s替换成love's + '‘': '「', # 将前单引号‘全局替换为「,更美观 + '’': '」', # 将后单引号’全局替换为」,更美观 + ',': ',', # 将英文逗号,全局替换成中文逗号, + '?': '?', # 将英文问号?全局替换成中文问号? + '! ': '!', # 去除感叹号后面不必要的空格 + '!': '!', # 将英文感叹号!全局替换成中文感叹号! + 'A': 'A', # 将全角大写英文替换成半角大写英文 + 'B': 'B', # 将全角大写英文替换成半角大写英文 + 'C': 'C', # 将全角大写英文替换成半角大写英文 + 'D': 'D', # 将全角大写英文替换成半角大写英文 + 'E': 'E', # 将全角大写英文替换成半角大写英文 + 'F': 'F', # 将全角大写英文替换成半角大写英文 + 'G': 'G', # 将全角大写英文替换成半角大写英文 + 'H': 'H', # 将全角大写英文替换成半角大写英文 + 'I': 'I', # 将全角大写英文替换成半角大写英文 + 'J': 'J', # 将全角大写英文替换成半角大写英文 + 'K': 'K', # 将全角大写英文替换成半角大写英文 + 'L': 'L', # 将全角大写英文替换成半角大写英文 + 'M': 'M', # 将全角大写英文替换成半角大写英文 + 'N': 'N', # 将全角大写英文替换成半角大写英文 + 'O': 'O', # 将全角大写英文替换成半角大写英文 + 'P': 'P', # 将全角大写英文替换成半角大写英文 + 'Q': 'Q', # 将全角大写英文替换成半角大写英文 + 'R': 'R', # 将全角大写英文替换成半角大写英文 + 'S': 'S', # 将全角大写英文替换成半角大写英文 + 'T': 'T', # 将全角大写英文替换成半角大写英文 + 'U': 'U', # 将全角大写英文替换成半角大写英文 + 'V': 'V', # 将全角大写英文替换成半角大写英文 + 'W': 'W', # 将全角大写英文替换成半角大写英文 + 'X': 'X', # 将全角大写英文替换成半角大写英文 + 'Y': 'Y', # 将全角大写英文替换成半角大写英文 + 'Z': 'Z', # 将全角大写英文替换成半角大写英文 + 'a': 'a', # 将全角小写英文替换成半角小写英文 + 'b': 'b', # 将全角小写英文替换成半角小写英文 + 'c': 'c', # 将全角小写英文替换成半角小写英文 + 'd': 'd', # 将全角小写英文替换成半角小写英文 + 'e': 'e', # 将全角小写英文替换成半角小写英文 + 'f': 'f', # 将全角小写英文替换成半角小写英文 + 'g': 'g', # 将全角小写英文替换成半角小写英文 + 'h': 'h', # 将全角小写英文替换成半角小写英文 + 'i': 'i', # 将全角小写英文替换成半角小写英文 + 'j': 'j', # 将全角小写英文替换成半角小写英文 + 'k': 'k', # 将全角小写英文替换成半角小写英文 + 'l': 'l', # 将全角小写英文替换成半角小写英文 + 'm': 'm', # 将全角小写英文替换成半角小写英文 + 'n': 'n', # 将全角小写英文替换成半角小写英文 + 'o': 'o', # 将全角小写英文替换成半角小写英文 + 'p': 'p', # 将全角小写英文替换成半角小写英文 + 'q': 'q', # 将全角小写英文替换成半角小写英文 + 'r': 'r', # 将全角小写英文替换成半角小写英文 + 's': 's', # 将全角小写英文替换成半角小写英文 + 't': 't', # 将全角小写英文替换成半角小写英文 + 'u': 'u', # 将全角小写英文替换成半角小写英文 + 'v': 'v', # 将全角小写英文替换成半角小写英文 + 'w': 'w', # 将全角小写英文替换成半角小写英文 + 'x': 'x', # 将全角小写英文替换成半角小写英文 + 'y': 'y', # 将全角小写英文替换成半角小写英文 + 'z': 'z', # 将全角小写英文替换成半角小写英文 + '1': '1', # 将全角数字替换成半角数字 + '2': '2', # 将全角数字替换成半角数字 + '3': '3', # 将全角数字替换成半角数字 + '4': '4', # 将全角数字替换成半角数字 + '5': '5', # 将全角数字替换成半角数字 + '6': '6', # 将全角数字替换成半角数字 + '7': '7', # 将全角数字替换成半角数字 + '8': '8', # 将全角数字替换成半角数字 + '9': '9', # 将全角数字替换成半角数字 + '0': '0', # 将全角数字替换成半角数字 + '\t': ' ', # 将制表符替换为空格 + } + chinese_rep_word = {'姊': '姐', # 中文简体常见错字全局替换 + '著': '着', # 中文简体常见错字全局替换 + '慾': '欲', # 中文简体常见错字全局替换 + '肏': '操', # 中文简体常见错字全局替换 + '裡': '里', # 中文简体常见错字全局替换 + '係': '系', # 中文简体常见错字全局替换 + '繫': '联', # 中文简体常见错字全局替换 + '豔': '艳', # 中文简体常见错字全局替换 + '妳': '你', # 中文简体常见错字全局替换 + '歳': '岁', # 中文简体常见错字全局替换 + '廿': '二十', # 中文简体常见错字全局替换 + '卅': '三十', # 中文简体常见错字全局替换 + '卌': '四十', # 中文简体常见错字全局替换 + } + title_rep = ['第一集', '第二集', ' - 上', ' - 下', ' 上集', ' 下集', ' -上', ' -下', 'Part.1 (HD)', '(蓝光碟版)', '(蓝光版)', '(ブルーレイ版)'] + show_key = ['number', 'letters', 'has_sub', 'cd_part', 'mosaic', 'title', 'originaltitle', 'actor', 'outline', 'originalplot', 'tag', 'release', 'year', 'runtime', + 'score', 'wanted', 'series', 'director', 'studio', 'publisher', 'trailer', 'website', 'javdbid'] + full_half_char = [(u"・", u"·"), (u".", u"."), (u",", u","), (u"!", u"!"), (u"?", u"?"), (u"”", u'"'), (u"’", u"'"), (u"‘", u"`"), (u"@", u"@"), (u"_", u"_"), + (u":", u":"), (u";", u";"), (u"#", u"#"), (u"$", u"$"), (u"%", u"%"), (u"&", u"&"), (u"(", u"("), (u")", u")"), (u"‐", u"-"), (u"=", u"="), + (u"*", u"*"), (u"+", u"+"), (u"-", u"-"), (u"/", u"/"), (u"<", u"<"), (u">", u">"), (u"[", u"["), (u"¥", u"\\"), (u"]", u"]"), (u"^", u"^"), + (u"{", u"{"), (u"|", u"|"), (u"}", u"}"), (u"~", u"~"), (u"a", u"a"), (u"b", u"b"), (u"c", u"c"), (u"d", u"d"), (u"e", u"e"), (u"f", u"f"), + (u"g", u"g"), (u"h", u"h"), (u"i", u"i"), (u"j", u"j"), (u"k", u"k"), (u"l", u"l"), (u"m", u"m"), (u"n", u"n"), (u"o", u"o"), (u"p", u"p"), + (u"q", u"q"), (u"r", u"r"), (u"s", u"s"), (u"t", u"t"), (u"u", u"u"), (u"v", u"v"), (u"w", u"w"), (u"x", u"x"), (u"y", u"y"), (u"z", u"z"), + (u"A", u"A"), (u"B", u"B"), (u"C", u"C"), (u"D", u"D"), (u"E", u"E"), (u"F", u"F"), (u"G", u"G"), (u"H", u"H"), (u"I", u"I"), (u"J", u"J"), + (u"K", u"K"), (u"L", u"L"), (u"M", u"M"), (u"N", u"N"), (u"O", u"O"), (u"P", u"P"), (u"Q", u"Q"), (u"R", u"R"), (u"S", u"S"), (u"T", u"T"), + (u"U", u"U"), (u"V", u"V"), (u"W", u"W"), (u"X", u"X"), (u"Y", u"Y"), (u"Z", u"Z"), (u"0", u"0"), (u"1", u"1"), (u"2", u"2"), (u"3", u"3"), + (u"4", u"4"), (u"5", u"5"), (u"6", u"6"), (u"7", u"7"), (u"8", u"8"), (u"9", u"9"), (u" ", u" ")] special_word = { '強●': '強制', '犯●': '犯さ', @@ -740,6 +566,5 @@ class ManualConfig: 'メイド': 'メイド', 'ホールド': 'ホールド', } - actress_wiki_keywords = ['AV idol', 'pornographic', 'pornoactrice', 'Japanese idol', 'Japanese actress', - 'AV actress', 'porn star', 'gravure', 'director', 'voice actor', 'gravure idol', 'model', - 'Porn actresses'] + actress_wiki_keywords = ['AV idol', 'pornographic', 'pornoactrice', 'Japanese idol', 'Japanese actress', 'AV actress', 'porn star', 'gravure', 'director', + 'voice actor', 'gravure idol', 'model', 'Porn actresses'] diff --git a/src/models/config/resources.py b/src/models/config/resources.py index a7b03b6..689807b 100644 --- a/src/models/config/resources.py +++ b/src/models/config/resources.py @@ -70,14 +70,7 @@ def __init__(self): def get_actor_data(self, actor): # 初始化数据 - actor_data = { - 'zh_cn': actor, - 'zh_tw': actor, - 'jp': actor, - 'keyword': [actor], - 'href': '', - 'has_name': False, - } + actor_data = {'zh_cn': actor, 'zh_tw': actor, 'jp': actor, 'keyword': [actor], 'href': '', 'has_name': False, } # 查询映射表 xml_actor = self.actor_mapping_data @@ -100,13 +93,7 @@ def get_actor_data(self, actor): def get_info_data(self, info): # 初始化数据 - info_data = { - 'zh_cn': info, - 'zh_tw': info, - 'jp': info, - 'keyword': [info], - 'has_name': False, - } + info_data = {'zh_cn': info, 'zh_tw': info, 'jp': info, 'keyword': [info], 'has_name': False, } # 查询映射表 xml_info = self.info_mapping_data @@ -114,11 +101,9 @@ def get_info_data(self, info): info_name = ',%s,' % info.upper() for each in config.full_half_char: info_name = info_name.replace(each[0], each[1]) - info_ob = xml_info.xpath( - '//a[contains(translate(@keyword, ' - '"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ・", ' - '"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ·"), $name)]', - name=info_name) + info_ob = xml_info.xpath('//a[contains(translate(@keyword, ' + '"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ・", ' + '"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ·"), $name)]', name=info_name) if info_ob: info_ob = info_ob[0] info_data['zh_cn'] = info_ob.get('zh_cn').replace('删除', '') @@ -186,8 +171,7 @@ def _get_or_generate_local_data(self): content = f.read() self.info_mapping_data = etree.HTML(content.encode('utf-8'), parser=parser) except Exception as e: - signal.show_log_text(' %s 读取失败!请检查该文件是否存在问题!如需重置请删除该文件!错误信息:\n%s' % ( - actor_map_local_path, str(e))) + signal.show_log_text(f' {actor_map_local_path} 读取失败!请检查该文件是否存在问题!如需重置请删除该文件!错误信息:\n{str(e)}') signal.show_traceback_log(traceback.format_exc()) signal.show_log_text(traceback.format_exc()) self.actor_mapping_data = {} diff --git a/src/models/core/crawler.py b/src/models/core/crawler.py index 29a9a9b..7c04330 100644 --- a/src/models/core/crawler.py +++ b/src/models/core/crawler.py @@ -9,9 +9,9 @@ from models.base.number import get_number_letters, is_uncensored from models.config.config import config from models.core.flags import Flags -from models.crawlers import airav, airav_cc, avsex, avsox, cableav, cnmdb, dahlia, dmm, faleno, fantastica, fc2, \ - fc2club, fc2hub, freejavbt, getchu, getchu_dmm, giga, hdouban, hscangku, iqqtv_new, jav321, javbus, javdb, \ - javlibrary_new, kin8, love6, lulubar, madouqu, mdtv, mgstage, mmtv, mywife, official, prestige, theporndb, xcity , javday +from models.crawlers import airav, airav_cc, avsex, avsox, cableav, cnmdb, dahlia, dmm, faleno, fantastica, fc2, fc2club, fc2hub, freejavbt, getchu, getchu_dmm, giga, \ + hdouban, hscangku, iqqtv_new, jav321, javbus, javday, javdb, javlibrary_new, kin8, love6, lulubar, madouqu, mdtv, mgstage, mmtv, mywife, official, prestige, \ + theporndb, xcity from models.entity.enums import FileMode @@ -62,9 +62,9 @@ def _get_new_website_list(field_website_list, number_website_list, file_number, same_list = _deal_some_list(field, 'dahlia', same_list) # fantastica 番号检查 FAVI、FAAP、FAPL、FAKG、FAHO、FAVA、FAKY、FAMI、FAIT、FAKA、FAMO、FASO、FAIH、FASH、FAKS、FAAN - elif re.search(r'FA[A-Z]{2}-?\d+', file_number.upper()) or file_number.upper().startswith( - 'CLASS') or file_number.upper().startswith('FADRV') or file_number.upper().startswith( - 'FAPRO') or file_number.upper().startswith('FAKWM') or file_number.upper().startswith('PDS'): + elif re.search(r'FA[A-Z]{2}-?\d+', + file_number.upper()) or file_number.upper().startswith('CLASS') or file_number.upper().startswith('FADRV') or file_number.upper().startswith('FAPRO') or file_number.upper().startswith( + 'FAKWM') or file_number.upper().startswith('PDS'): same_list = _deal_some_list(field, 'fantastica', same_list) return same_list @@ -132,17 +132,13 @@ def _call_crawler(json_data, website, language, file_number, short_number, mosai elif website == 'fc2club': json_data = json.loads(fc2club.main(file_number, appoint_url, log_info, req_web, language)) elif website == 'mdtv': - json_data = json.loads( - mdtv.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) + json_data = json.loads(mdtv.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) elif website == 'madouqu': - json_data = json.loads( - madouqu.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) + json_data = json.loads(madouqu.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) elif website == 'hscangku': - json_data = json.loads( - hscangku.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) + json_data = json.loads(hscangku.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) elif website == 'cableav': - json_data = json.loads( - cableav.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) + json_data = json.loads(cableav.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) elif website == 'getchu': json_data = json.loads(getchu.main(file_number, appoint_url, log_info, req_web, language)) elif website == 'getchu_dmm': @@ -152,15 +148,13 @@ def _call_crawler(json_data, website, language, file_number, short_number, mosai elif website == 'giga': json_data = json.loads(giga.main(file_number, appoint_url, log_info, req_web, language)) elif website == 'hdouban': - json_data = json.loads( - hdouban.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number, mosaic)) + json_data = json.loads(hdouban.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number, mosaic)) elif website == 'lulubar': json_data = json.loads(lulubar.main(file_number, appoint_url, log_info, req_web, language)) elif website == 'love6': json_data = json.loads(love6.main(file_number, appoint_url, log_info, req_web, language)) elif website == 'cnmdb': - json_data = json.loads( - cnmdb.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) + json_data = json.loads(cnmdb.main(file_number, appoint_url, log_info, req_web, language, file_path, appoint_number)) elif website == 'faleno': json_data = json.loads(faleno.main(file_number, appoint_url, log_info, req_web, language)) elif website == 'fantastica': @@ -209,42 +203,24 @@ def _decide_websites(json_data, number_website_list): studio_website_list = config.studio_website.split(',') publisher_website_list = config.publisher_website.split(',') wanted_website_list = config.wanted_website.split(',') - title_jp_website_new_list = _get_new_website_list(title_jp_website_list, number_website_list, file_number, - short_number, 'title') - title_zh_website_new_list = _get_new_website_list(title_zh_website_list, number_website_list, file_number, - short_number, 'title_zh') - outline_jp_website_new_list = _get_new_website_list(outline_jp_website_list, number_website_list, file_number, - short_number, 'outline') - outline_zh_website_new_list = _get_new_website_list(outline_zh_website_list, number_website_list, file_number, - short_number, 'outline_zh') - actor_website_new_list = _get_new_website_list(actor_website_list, number_website_list, file_number, short_number, - 'actor') - thumb_website_new_list = _get_new_website_list(thumb_website_list, number_website_list, file_number, short_number, - 'thumb') - poster_website_new_list = _get_new_website_list(poster_website_list, number_website_list, file_number, short_number, - 'poster') - extrafanart_website_new_list = _get_new_website_list(extrafanart_website_list, number_website_list, file_number, - short_number, 'extrafanart') - trailer_website_new_list = _get_new_website_list(trailer_website_list, number_website_list, file_number, - short_number, 'trailer') - tag_website_new_list = _get_new_website_list(tag_website_list, number_website_list, file_number, short_number, - 'tag') - release_website_new_list = _get_new_website_list(release_website_list, number_website_list, file_number, - short_number, 'release') - runtime_website_new_list = _get_new_website_list(runtime_website_list, number_website_list, file_number, - short_number, 'runtime') - score_website_new_list = _get_new_website_list(score_website_list, number_website_list, file_number, short_number, - 'score') - director_website_new_list = _get_new_website_list(director_website_list, number_website_list, file_number, - short_number, 'director') - series_website_new_list = _get_new_website_list(series_website_list, number_website_list, file_number, short_number, - 'series') - studio_website_new_list = _get_new_website_list(studio_website_list, number_website_list, file_number, short_number, - 'studio') - publisher_website_new_list = _get_new_website_list(publisher_website_list, number_website_list, file_number, - short_number, 'publisher') - wanted_website_new_list = _get_new_website_list(wanted_website_list, number_website_list, file_number, short_number, - 'wanted') + title_jp_website_new_list = _get_new_website_list(title_jp_website_list, number_website_list, file_number, short_number, 'title') + title_zh_website_new_list = _get_new_website_list(title_zh_website_list, number_website_list, file_number, short_number, 'title_zh') + outline_jp_website_new_list = _get_new_website_list(outline_jp_website_list, number_website_list, file_number, short_number, 'outline') + outline_zh_website_new_list = _get_new_website_list(outline_zh_website_list, number_website_list, file_number, short_number, 'outline_zh') + actor_website_new_list = _get_new_website_list(actor_website_list, number_website_list, file_number, short_number, 'actor') + thumb_website_new_list = _get_new_website_list(thumb_website_list, number_website_list, file_number, short_number, 'thumb') + poster_website_new_list = _get_new_website_list(poster_website_list, number_website_list, file_number, short_number, 'poster') + extrafanart_website_new_list = _get_new_website_list(extrafanart_website_list, number_website_list, file_number, short_number, 'extrafanart') + trailer_website_new_list = _get_new_website_list(trailer_website_list, number_website_list, file_number, short_number, 'trailer') + tag_website_new_list = _get_new_website_list(tag_website_list, number_website_list, file_number, short_number, 'tag') + release_website_new_list = _get_new_website_list(release_website_list, number_website_list, file_number, short_number, 'release') + runtime_website_new_list = _get_new_website_list(runtime_website_list, number_website_list, file_number, short_number, 'runtime') + score_website_new_list = _get_new_website_list(score_website_list, number_website_list, file_number, short_number, 'score') + director_website_new_list = _get_new_website_list(director_website_list, number_website_list, file_number, short_number, 'director') + series_website_new_list = _get_new_website_list(series_website_list, number_website_list, file_number, short_number, 'series') + studio_website_new_list = _get_new_website_list(studio_website_list, number_website_list, file_number, short_number, 'studio') + publisher_website_new_list = _get_new_website_list(publisher_website_list, number_website_list, file_number, short_number, 'publisher') + wanted_website_new_list = _get_new_website_list(wanted_website_list, number_website_list, file_number, short_number, 'wanted') # 初始化变量 all_json_data = {} @@ -255,26 +231,16 @@ def _decide_websites(json_data, number_website_list): else: if 'official' in config.website_set: title_jp_website_new_list.insert(0, 'official') - request_field_list = [ - ['title', '标题', 'title_language', title_jp_website_new_list], - ['title_zh', '中文标题', 'title_language', title_zh_website_new_list], - ['outline', '简介', 'outline_language', outline_jp_website_new_list], - ['outline_zh', '中文简介', 'outline_language', outline_zh_website_new_list], - ['actor', '演员', 'actor_language', actor_website_new_list], - ['cover', '背景图', 'title_language', thumb_website_new_list], - ['poster', '封面图', 'title_language', poster_website_new_list], - ['extrafanart', '剧照', 'title_language', extrafanart_website_new_list], - ['tag', '标签', 'tag_language', tag_website_new_list], - ['release', '发行日期', 'title_language', release_website_new_list], - ['runtime', '时长', 'title_language', runtime_website_new_list], - ['score', '评分', 'title_language', score_website_new_list], - ['director', '导演', 'director_language', director_website_new_list], - ['series', '系列', 'series_language', series_website_new_list], - ['studio', '片商', 'studio_language', studio_website_new_list], - ['publisher', '发行商', 'publisher_language', publisher_website_new_list], - ['trailer', '预告片', 'title_language', trailer_website_new_list], - ['wanted', '想看人数', 'title_language', wanted_website_new_list], - ] + request_field_list = [['title', '标题', 'title_language', title_jp_website_new_list], ['title_zh', '中文标题', 'title_language', title_zh_website_new_list], + ['outline', '简介', 'outline_language', outline_jp_website_new_list], + ['outline_zh', '中文简介', 'outline_language', outline_zh_website_new_list], ['actor', '演员', 'actor_language', actor_website_new_list], + ['cover', '背景图', 'title_language', thumb_website_new_list], ['poster', '封面图', 'title_language', poster_website_new_list], + ['extrafanart', '剧照', 'title_language', extrafanart_website_new_list], ['tag', '标签', 'tag_language', tag_website_new_list], + ['release', '发行日期', 'title_language', release_website_new_list], ['runtime', '时长', 'title_language', runtime_website_new_list], + ['score', '评分', 'title_language', score_website_new_list], ['director', '导演', 'director_language', director_website_new_list], + ['series', '系列', 'series_language', series_website_new_list], ['studio', '片商', 'studio_language', studio_website_new_list], + ['publisher', '发行商', 'publisher_language', publisher_website_new_list], + ['trailer', '预告片', 'title_language', trailer_website_new_list], ['wanted', '想看人数', 'title_language', wanted_website_new_list], ] if config.outline_language == 'jp': request_field_list.pop(3) if config.title_language == 'jp': @@ -286,8 +252,7 @@ def _decide_websites(json_data, number_website_list): field_name, field_cnname, field_language, website_list = each_f if field_name in none_fields: continue - _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnname, field_language, config, - file_number, short_number, json_data['mosaic']) + _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnname, field_language, config, file_number, short_number, json_data['mosaic']) if field_name == 'title' and not json_data['title']: return json_data @@ -305,62 +270,33 @@ def _decide_websites(json_data, number_website_list): title_website_list = title_zh_website_list + title_jp_website_list if config.outline_language != 'jp': outline_website_list = outline_zh_website_list + outline_jp_website_list - title_website_new_list = _get_new_website_list(title_website_list, new_number_website_list, file_number, - short_number, 'title', all=True) - title_jp_website_new_list = _get_new_website_list(title_jp_website_list, new_number_website_list, file_number, - short_number, 'title', all=True) - outline_website_new_list = _get_new_website_list(outline_website_list, new_number_website_list, file_number, - short_number, 'outline', all=True) - outline_jp_website_new_list = _get_new_website_list(outline_jp_website_list, new_number_website_list, file_number, - short_number, 'outline', all=True) - actor_website_new_list = _get_new_website_list(actor_website_list, number_website_list, file_number, short_number, - 'actor', all=True) - thumb_website_new_list = _get_new_website_list(thumb_website_list, number_website_list, file_number, short_number, - 'thumb', all=True) - poster_website_new_list = _get_new_website_list(poster_website_list, number_website_list, file_number, short_number, - 'poster', all=True) - extrafanart_website_new_list = _get_new_website_list(extrafanart_website_list, number_website_list, file_number, - short_number, 'extrafanart', all=True) - tag_website_new_list = _get_new_website_list(tag_website_list, number_website_list, file_number, short_number, - 'tag', all=True) - release_website_new_list = _get_new_website_list(release_website_list, number_website_list, file_number, - short_number, 'release', all=True) - runtime_website_new_list = _get_new_website_list(runtime_website_list, number_website_list, file_number, - short_number, 'runtime', all=True) - score_website_new_list = _get_new_website_list(score_website_list, number_website_list, file_number, short_number, - 'score', all=True) - director_website_new_list = _get_new_website_list(director_website_list, number_website_list, file_number, - short_number, 'director', all=True) - series_website_new_list = _get_new_website_list(series_website_list, number_website_list, file_number, short_number, - 'series', all=True) - studio_website_new_list = _get_new_website_list(studio_website_list, number_website_list, file_number, short_number, - 'studio', all=True) - publisher_website_new_list = _get_new_website_list(publisher_website_list, number_website_list, file_number, - short_number, 'publisher', all=True) - trailer_website_new_list = _get_new_website_list(trailer_website_list, number_website_list, file_number, - short_number, 'trailer', all=True) - wanted_website_new_list = _get_new_website_list(wanted_website_list, number_website_list, file_number, short_number, - 'wanted') - deal_field_list = [ - ['title', '标题', 'title_language', title_website_new_list], - ['originaltitle', '原标题', 'outline_language', title_jp_website_new_list], - ['outline', '简介', 'outline_language', outline_website_new_list], - ['originalplot', '原简介', 'outline_language', outline_jp_website_new_list], - ['actor', '演员', 'actor_language', actor_website_new_list], - ['cover', '背景图', 'title_language', thumb_website_new_list], - ['poster', '封面图', 'title_language', poster_website_new_list], - ['extrafanart', '剧照', 'title_language', extrafanart_website_new_list], - ['tag', '标签', 'tag_language', tag_website_new_list], - ['release', '发行日期', 'title_language', release_website_new_list], - ['runtime', '时长', 'title_language', runtime_website_new_list], - ['score', '评分', 'title_language', score_website_new_list], - ['director', '导演', 'director_language', director_website_new_list], - ['series', '系列', 'series_language', series_website_new_list], - ['studio', '片商', 'studio_language', studio_website_new_list], - ['publisher', '发行商', 'publisher_language', publisher_website_new_list], - ['trailer', '预告片', 'title_language', trailer_website_new_list], - ['wanted', '想看人数', 'title_language', wanted_website_list], - ] + title_website_new_list = _get_new_website_list(title_website_list, new_number_website_list, file_number, short_number, 'title', all=True) + title_jp_website_new_list = _get_new_website_list(title_jp_website_list, new_number_website_list, file_number, short_number, 'title', all=True) + outline_website_new_list = _get_new_website_list(outline_website_list, new_number_website_list, file_number, short_number, 'outline', all=True) + outline_jp_website_new_list = _get_new_website_list(outline_jp_website_list, new_number_website_list, file_number, short_number, 'outline', all=True) + actor_website_new_list = _get_new_website_list(actor_website_list, number_website_list, file_number, short_number, 'actor', all=True) + thumb_website_new_list = _get_new_website_list(thumb_website_list, number_website_list, file_number, short_number, 'thumb', all=True) + poster_website_new_list = _get_new_website_list(poster_website_list, number_website_list, file_number, short_number, 'poster', all=True) + extrafanart_website_new_list = _get_new_website_list(extrafanart_website_list, number_website_list, file_number, short_number, 'extrafanart', all=True) + tag_website_new_list = _get_new_website_list(tag_website_list, number_website_list, file_number, short_number, 'tag', all=True) + release_website_new_list = _get_new_website_list(release_website_list, number_website_list, file_number, short_number, 'release', all=True) + runtime_website_new_list = _get_new_website_list(runtime_website_list, number_website_list, file_number, short_number, 'runtime', all=True) + score_website_new_list = _get_new_website_list(score_website_list, number_website_list, file_number, short_number, 'score', all=True) + director_website_new_list = _get_new_website_list(director_website_list, number_website_list, file_number, short_number, 'director', all=True) + series_website_new_list = _get_new_website_list(series_website_list, number_website_list, file_number, short_number, 'series', all=True) + studio_website_new_list = _get_new_website_list(studio_website_list, number_website_list, file_number, short_number, 'studio', all=True) + publisher_website_new_list = _get_new_website_list(publisher_website_list, number_website_list, file_number, short_number, 'publisher', all=True) + trailer_website_new_list = _get_new_website_list(trailer_website_list, number_website_list, file_number, short_number, 'trailer', all=True) + wanted_website_new_list = _get_new_website_list(wanted_website_list, number_website_list, file_number, short_number, 'wanted') + deal_field_list = [['title', '标题', 'title_language', title_website_new_list], ['originaltitle', '原标题', 'outline_language', title_jp_website_new_list], + ['outline', '简介', 'outline_language', outline_website_new_list], ['originalplot', '原简介', 'outline_language', outline_jp_website_new_list], + ['actor', '演员', 'actor_language', actor_website_new_list], ['cover', '背景图', 'title_language', thumb_website_new_list], + ['poster', '封面图', 'title_language', poster_website_new_list], ['extrafanart', '剧照', 'title_language', extrafanart_website_new_list], + ['tag', '标签', 'tag_language', tag_website_new_list], ['release', '发行日期', 'title_language', release_website_new_list], + ['runtime', '时长', 'title_language', runtime_website_new_list], ['score', '评分', 'title_language', score_website_new_list], + ['director', '导演', 'director_language', director_website_new_list], ['series', '系列', 'series_language', series_website_new_list], + ['studio', '片商', 'studio_language', studio_website_new_list], ['publisher', '发行商', 'publisher_language', publisher_website_new_list], + ['trailer', '预告片', 'title_language', trailer_website_new_list], ['wanted', '想看人数', 'title_language', wanted_website_list], ] if not wanted_website_new_list or (scrape_like == 'speed' and json_data['source'] not in wanted_website_new_list): deal_field_list.pop() @@ -384,8 +320,7 @@ def _decide_websites(json_data, number_website_list): actor_amazon_list_cn = [] actor_amazon_list_tw = [] actor_new_website = [] - [actor_new_website.append(i) for i in title_jp_website_new_list + title_website_new_list + actor_website_new_list if - i not in actor_new_website] + [actor_new_website.append(i) for i in title_jp_website_new_list + title_website_new_list + actor_website_new_list if i not in actor_new_website] for each_website in actor_new_website: if each_website in all_json_data.keys() and all_json_data[each_website]['jp']['title']: temp_actor = all_json_data[each_website]['jp']['actor'] @@ -435,8 +370,7 @@ def _deal_each_field(all_json_data, json_data, website_list, field_name, field_c return backup_data = '' - json_data['log_info'] += '\n\n 🙋🏻‍ %s \n ====================================\n 🌐 来源优先级:%s' % ( - field_cnname, ' -> '.join(website_list)) + json_data['log_info'] += '\n\n 🙋🏻‍ %s \n ====================================\n 🌐 来源优先级:%s' % (field_cnname, ' -> '.join(website_list)) for website in website_list: title_language = getattr(config, field_language) if website not in ['airav_cc', 'iqqtv', 'airav', 'avsex', 'javlibrary', 'mdtv', 'madouqu', 'lulubar']: @@ -475,10 +409,8 @@ def _deal_each_field(all_json_data, json_data, website_list, field_name, field_c elif field_name == 'outline': json_data['outline_from'] = website elif field_name == 'actor': - json_data['all_actor'] = json_data['all_actor'] if json_data.get('all_actor') else web_data_json[ - 'actor'] - json_data['all_actor_photo'] = json_data['all_actor_photo'] if json_data.get('all_actor_photo') else \ - web_data_json['actor_photo'] + json_data['all_actor'] = json_data['all_actor'] if json_data.get('all_actor') else web_data_json['actor'] + json_data['all_actor_photo'] = json_data['all_actor_photo'] if json_data.get('all_actor_photo') else web_data_json['actor_photo'] elif field_name == 'originaltitle': if web_data_json['actor']: json_data['amazon_orginaltitle_actor'] = web_data_json['actor'].split(',')[0] @@ -497,8 +429,7 @@ def _deal_each_field(all_json_data, json_data, website_list, field_name, field_c json_data['fields_info'] += '\n ' + f"{field_name:<13}" + f': {"-----"} ({"not found"})' -def _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnname, field_language, config, - file_number, short_number, mosaic): # 4 +def _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnname, field_language, config, file_number, short_number, mosaic): # 4 """ 按照设置的网站顺序获取各个字段信息 """ @@ -508,8 +439,7 @@ def _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnn backup_jsondata = {} for website in website_list: - if (website in ['avsox', 'mdtv'] and mosaic in ['有码', '无码破解', '流出', '里番', '动漫']) or ( - website == 'mdtv' and mosaic == '无码'): + if (website in ['avsox', 'mdtv'] and mosaic in ['有码', '无码破解', '流出', '里番', '动漫']) or (website == 'mdtv' and mosaic == '无码'): if field_name != 'title': continue if field_name in ['title_zh', 'outline_zh']: @@ -525,8 +455,7 @@ def _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnn try: web_data_json = all_json_data[website][title_language] except: - web_data = _call_crawler(json_data, website, title_language, file_number, short_number, mosaic, - config.title_language) + web_data = _call_crawler(json_data, website, title_language, file_number, short_number, mosaic, config.title_language) all_json_data.update(web_data) web_data_json = all_json_data.get(website).get(title_language) json_data['req_web'] = web_data_json['req_web'] @@ -551,12 +480,10 @@ def _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnn if field_name in ['title', 'outline', 'originaltitle', 'originalplot']: if langid.classify(web_data_json[field_name])[0] != 'ja': if title_language == 'jp': - json_data[ - 'log_info'] += f'\n 🔴 {field_cnname} 检测为非日文,跳过!({website})\n ↳ {web_data_json[field_name]}' + json_data['log_info'] += f'\n 🔴 {field_cnname} 检测为非日文,跳过!({website})\n ↳ {web_data_json[field_name]}' continue elif title_language != 'jp': - json_data[ - 'log_info'] += f'\n 🔴 {field_cnname} 检测为日文,跳过!({website})\n ↳ {web_data_json[field_name]}' + json_data['log_info'] += f'\n 🔴 {field_cnname} 检测为日文,跳过!({website})\n ↳ {web_data_json[field_name]}' continue elif website == 'official': website = all_json_data['official']['jp']['source'] @@ -564,8 +491,7 @@ def _call_crawlers(all_json_data, json_data, website_list, field_name, field_cnn break else: if len(backup_jsondata): - json_data[ - 'log_info'] += f'\n 🟢 {field_cnname} 使用备用数据!({backup_website})\n ↳ {backup_jsondata[field_name]} ' + json_data['log_info'] += f'\n 🟢 {field_cnname} 使用备用数据!({backup_website})\n ↳ {backup_jsondata[field_name]} ' if field_cnname == '标题': json_data.update(backup_jsondata) else: @@ -652,13 +578,11 @@ def _call_specific_crawler(json_data, website): if short_number: json_data['number'] = file_number - temp_actor = web_data[website]['jp']['actor'] + ',' + web_data[website]['zh_cn']['actor'] + ',' + \ - web_data[website]['zh_tw']['actor'] + temp_actor = web_data[website]['jp']['actor'] + ',' + web_data[website]['zh_cn']['actor'] + ',' + web_data[website]['zh_tw']['actor'] json_data['actor_amazon'] = [] [json_data['actor_amazon'].append(i) for i in temp_actor.split(',') if i and i not in json_data['actor_amazon']] json_data['all_actor'] = json_data['all_actor'] if json_data.get('all_actor') else web_data_json['actor'] - json_data['all_actor_photo'] = json_data['all_actor_photo'] if json_data.get('all_actor_photo') else web_data_json[ - 'actor_photo'] + json_data['all_actor_photo'] = json_data['all_actor_photo'] if json_data.get('all_actor_photo') else web_data_json['actor_photo'] return json_data @@ -691,9 +615,8 @@ def _crawl(json_data, website_name): # 从JSON返回元数据 if website_name == 'all': # 从全部网站刮削 # =======================================================================先判断是不是国产,避免浪费时间 - if mosaic == '国产' or mosaic == '國產' or ( - re.search(r'([^A-Z]|^)MD[A-Z-]*\d{4,}', file_number) and 'MDVR' not in file_number) or re.search( - r'MKY-[A-Z]+-\d{3,}', file_number): + if mosaic == '国产' or mosaic == '國產' or (re.search(r'([^A-Z]|^)MD[A-Z-]*\d{4,}', file_number) and 'MDVR' not in file_number) or re.search(r'MKY-[A-Z]+-\d{3,}', + file_number): json_data['mosaic'] = '国产' website_list = config.website_guochan.split(',') json_data = _decide_websites(json_data, website_list) @@ -729,8 +652,7 @@ def _crawl(json_data, website_name): # 从JSON返回元数据 json_data['error_info'] = '未识别到FC2番号:%s' % file_number # =======================================================================sexart.15.06.14 - elif re.search(r'[^.]+\.\d{2}\.\d{2}\.\d{2}', file_number) or ( - '欧美' in file_path and '东欧美' not in file_path): + elif re.search(r'[^.]+\.\d{2}\.\d{2}\.\d{2}', file_number) or ('欧美' in file_path and '东欧美' not in file_path): website_list = config.website_oumei.split(',') json_data = _decide_websites(json_data, website_list) @@ -854,9 +776,8 @@ def _deal_json_data(json_data): return json_data # 演员 - json_data['actor'] = str(json_data['actor']).strip(" [ ]").replace("'", '').replace(', ', ',').replace('<', - '(').replace( - '>', ')').strip(',') # 列表转字符串(避免个别网站刮削返回的是列表) + json_data['actor'] = str(json_data['actor']).strip(" [ ]").replace("'", '').replace(', ', ',').replace('<', '(').replace('>', + ')').strip(',') # 列表转字符串(避免个别网站刮削返回的是列表) # 标签 tag = str(json_data['tag']).strip(" [ ]").replace("'", '').replace(', ', ',') # 列表转字符串(避免个别网站刮削返回的是列表) @@ -928,8 +849,7 @@ def _deal_json_data(json_data): json_data['wanted'] = '' # 字符转义,避免显示问题 - key_word = ['title', 'originaltitle', 'number', 'outline', 'originalplot', 'actor', 'tag', 'series', 'director', - 'studio', 'publisher'] + key_word = ['title', 'originaltitle', 'number', 'outline', 'originalplot', 'actor', 'tag', 'series', 'director', 'studio', 'publisher'] rep_word = { '&': '&', '<': '<', diff --git a/src/models/core/file.py b/src/models/core/file.py index 48bbc78..050897f 100644 --- a/src/models/core/file.py +++ b/src/models/core/file.py @@ -8,8 +8,7 @@ import traceback from models.base.file import copy_file, delete_file, move_file, read_link, split_path -from models.base.number import deal_actor_more, get_file_number, get_info, get_number_first_letter, \ - get_number_letters, is_uncensored, remove_escape_string +from models.base.number import deal_actor_more, get_file_number, get_info, get_number_first_letter, get_number_letters, is_uncensored, remove_escape_string from models.base.path import showFilePath from models.base.utils import convert_path, get_current_time, get_used_time from models.config.config import config @@ -59,8 +58,7 @@ def _need_clean(file_path, file_name, file_ext): return False -def creat_folder(json_data, folder_new_path, file_path, file_new_path, thumb_new_path_with_filename, - poster_new_path_with_filename): +def creat_folder(json_data, folder_new_path, file_path, file_new_path, thumb_new_path_with_filename, poster_new_path_with_filename): """判断是否创建文件夹,目标文件是否有重复文件。file_new_path是最终路径""" json_data['dont_move_movie'] = False # 不需要移动和重命名视频 @@ -223,14 +221,12 @@ def move_torrent(json_data, folder_old_path, folder_new_path, file_name, movie_n torrent_file2 = os.path.join(folder_old_path, (movie_number + '.torrent')) torrent_file1_new_path = os.path.join(folder_new_path, (naming_rule + '.torrent')) torrent_file2_new_path = os.path.join(folder_new_path, (movie_number + '.torrent')) - if os.path.exists(torrent_file1) and torrent_file1 != torrent_file1_new_path and not os.path.exists( - torrent_file1_new_path): + if os.path.exists(torrent_file1) and torrent_file1 != torrent_file1_new_path and not os.path.exists(torrent_file1_new_path): move_file(torrent_file1, torrent_file1_new_path) json_data['logs'] += "\n 🍀 Torrent done!" if torrent_file2 != torrent_file1: - if os.path.exists(torrent_file2) and torrent_file2 != torrent_file2_new_path and not os.path.exists( - torrent_file2_new_path): + if os.path.exists(torrent_file2) and torrent_file2 != torrent_file2_new_path and not os.path.exists(torrent_file2_new_path): move_file(torrent_file2, torrent_file2_new_path) json_data['logs'] += "\n 🍀 Torrent done!" @@ -250,8 +246,7 @@ def check_file(json_data, file_path, file_escape_size): if 'no_skip_small_file' not in config.no_escape: file_size = os.path.getsize(file_path) / float(1024 * 1024) if file_size < file_escape_size: - json_data['error_info'] = '文件小于 %s MB 被过滤!(实际大小 %s MB)已跳过刮削!' % ( - file_escape_size, round(file_size, 2)) + json_data['error_info'] = '文件小于 %s MB 被过滤!(实际大小 %s MB)已跳过刮削!' % (file_escape_size, round(file_size, 2)) json_data['req_web'] = 'do_not_update_json_data_dic' json_data['outline'] = split_path(file_path)[1] json_data['tag'] = file_path @@ -332,8 +327,7 @@ def move_other_file(json_data, folder_old_path, folder_new_path, file_name, nami if '-cd' not in old_file.lower(): # 避免多分集时,其他分级的内容被移走 old_file_old_path = os.path.join(folder_old_path, old_file) old_file_new_path = os.path.join(folder_new_path, old_file) - if old_file_old_path != old_file_new_path and os.path.exists( - old_file_old_path) and not os.path.exists(old_file_new_path): + if old_file_old_path != old_file_new_path and os.path.exists(old_file_old_path) and not os.path.exists(old_file_new_path): move_file(old_file_old_path, old_file_new_path) json_data['logs'] += "\n 🍀 Move %s done!" % old_file @@ -436,8 +430,7 @@ def move_movie(json_data, file_path, file_new_path): temp_path = file_path # 自身是软链接时,获取真实路径 if os.path.islink(file_path): - file_path = read_link(file_path) - # delete_file(temp_path) + file_path = read_link(file_path) # delete_file(temp_path) # 删除目标路径存在的文件,否则会创建失败, delete_file(file_new_path) try: @@ -488,8 +481,7 @@ def move_movie(json_data, file_path, file_new_path): if result: json_data['logs'] += f"\n 🍀 Movie done! \n 🙉 [Movie] {file_new_path}" if os.path.islink(file_new_path): - json_data['logs'] += f"\n It's a symlink file! Source file: \n {read_link(file_new_path)}" - # win 不能用os.path.realpath(),返回的结果不准 + json_data['logs'] += f"\n It's a symlink file! Source file: \n {read_link(file_new_path)}" # win 不能用os.path.realpath(),返回的结果不准 json_data['file_path'] = file_new_path return True else: @@ -615,12 +607,10 @@ def _get_folder_path(file_path, success_folder, json_data): temp_4k = definition.replace('UHD8', 'UHD') # 替换文件夹名称 - repl_list = [['4K', temp_4k.strip('-')], ['originaltitle', originaltitle], ['title', title], - ['outline', outline], ['number', number], ['first_actor', first_actor], ['all_actor', all_actor], - ['actor', actor], ['release', release], ['year', str(year)], ['runtime', str(runtime)], - ['director', director], ['series', series], ['studio', studio], ['publisher', publisher], - ['mosaic', mosaic], ['definition', definition.replace('UHD8', 'UHD')], ['cnword', cnword], - ['moword', moword], ['first_letter', first_letter], ['letters', letters], ['filename', filename], + repl_list = [['4K', temp_4k.strip('-')], ['originaltitle', originaltitle], ['title', title], ['outline', outline], ['number', number], ['first_actor', first_actor], + ['all_actor', all_actor], ['actor', actor], ['release', release], ['year', str(year)], ['runtime', str(runtime)], ['director', director], + ['series', series], ['studio', studio], ['publisher', publisher], ['mosaic', mosaic], ['definition', definition.replace('UHD8', 'UHD')], + ['cnword', cnword], ['moword', moword], ['first_letter', first_letter], ['letters', letters], ['filename', filename], ['wanted', str(json_data['wanted'])], ['score', str(score)]] folder_new_name = folder_name for each_key in repl_list: @@ -637,16 +627,13 @@ def _get_folder_path(file_path, success_folder, json_data): if len(folder_new_name) > folder_name_max: cut_index = folder_name_max - len(folder_new_name) if 'originaltitle' in folder_name: - json_data['logs'] += '\n 💡 当前目录名长度:%s,最大允许长度:%s,目录命名时将去除原标题后%s个字符!' % ( - len(folder_new_name), folder_name_max, abs(cut_index)) + json_data['logs'] += '\n 💡 当前目录名长度:%s,最大允许长度:%s,目录命名时将去除原标题后%s个字符!' % (len(folder_new_name), folder_name_max, abs(cut_index)) folder_new_name = folder_new_name.replace(originaltitle, originaltitle[0:cut_index]) elif 'title' in folder_name: - json_data['logs'] += '\n 💡 当前目录名长度:%s,最大允许长度:%s,目录命名时将去除标题后%s个字符!' % ( - len(folder_new_name), folder_name_max, abs(cut_index)) + json_data['logs'] += '\n 💡 当前目录名长度:%s,最大允许长度:%s,目录命名时将去除标题后%s个字符!' % (len(folder_new_name), folder_name_max, abs(cut_index)) folder_new_name = folder_new_name.replace(title, title[0:cut_index]) elif 'outline' in folder_name: - json_data['logs'] += '\n 💡 当前目录名长度:%s,最大允许长度:%s,目录命名时将去除简介后%s个字符!' % ( - len(folder_new_name), folder_name_max, abs(cut_index)) + json_data['logs'] += '\n 💡 当前目录名长度:%s,最大允许长度:%s,目录命名时将去除简介后%s个字符!' % (len(folder_new_name), folder_name_max, abs(cut_index)) folder_new_name = folder_new_name.replace(outline, outline[0:cut_index]) # 替换一些字符 @@ -656,8 +643,7 @@ def _get_folder_path(file_path, success_folder, json_data): folder_new_name = re.sub(r'[\\:*?"<>|\r\n]+', '', folder_new_name).strip(' /') # 过滤文件夹名字前后的空格 - folder_new_name = folder_new_name.replace(' /', '/').replace(' \\', '\\').replace('/ ', '/').replace('\\ ', - '\\') + folder_new_name = folder_new_name.replace(' /', '/').replace(' \\', '\\').replace('/ ', '/').replace('\\ ', '\\') # 日文浊音转换(mac的坑,osx10.12以下使用nfd) folder_new_name = nfd2c(folder_new_name) @@ -759,12 +745,10 @@ def _generate_file_name(file_path, json_data): temp_4k = definition.replace('UHD8', 'UHD') # 替换文件名 - repl_list = [['4K', temp_4k.strip('-')], ['originaltitle', originaltitle], ['title', title], - ['outline', outline], ['number', number], ['first_actor', first_actor], ['all_actor', all_actor], - ['actor', actor], ['release', release], ['year', str(year)], ['runtime', str(runtime)], - ['director', director], ['series', series], ['studio', studio], ['publisher', publisher], - ['mosaic', mosaic], ['definition', definition.replace('UHD8', 'UHD')], ['cnword', cnword], - ['moword', moword], ['first_letter', first_letter], ['letters', letters], ['filename', filename], + repl_list = [['4K', temp_4k.strip('-')], ['originaltitle', originaltitle], ['title', title], ['outline', outline], ['number', number], ['first_actor', first_actor], + ['all_actor', all_actor], ['actor', actor], ['release', release], ['year', str(year)], ['runtime', str(runtime)], ['director', director], + ['series', series], ['studio', studio], ['publisher', publisher], ['mosaic', mosaic], ['definition', definition.replace('UHD8', 'UHD')], + ['cnword', cnword], ['moword', moword], ['first_letter', first_letter], ['letters', letters], ['filename', filename], ['wanted', str(json_data['wanted'])], ['score', str(score)]] for each_key in repl_list: file_name = file_name.replace(each_key[0], each_key[1]) @@ -1036,32 +1020,44 @@ def movie_lists(escape_folder_list, movie_type, movie_path): found_count = len(total) if found_count >= i: i = found_count + 100 - signal.show_traceback_log( - f"✅ Found ({found_count})! " - f"Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! " - f"({get_used_time(start_time)}s)... Still searching, please wait... \u3000") - signal.show_log_text( - f' {get_current_time()} Found ({found_count})! ' - f'Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! ' - f'({get_used_time(start_time)}s)... Still searching, please wait... \u3000') + signal.show_traceback_log(f"✅ Found ({found_count})! " + f"Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! " + f"({get_used_time(start_time)}s)... Still searching, please wait... \u3000") + signal.show_log_text(f' {get_current_time()} Found ({found_count})! ' + f'Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! ' + f'({get_used_time(start_time)}s)... Still searching, please wait... \u3000') total.sort() - signal.show_traceback_log( - f"🎉 Done!!! Found ({len(total)})! " - f"Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! " - f"({get_used_time(start_time)}s) \u3000") - signal.show_log_text( - f' Done!!! Found ({len(total)})! ' - f'Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! ' - f'({get_used_time(start_time)}s) \u3000') + signal.show_traceback_log(f"🎉 Done!!! Found ({len(total)})! " + f"Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! " + f"({get_used_time(start_time)}s) \u3000") + signal.show_log_text(f' Done!!! Found ({len(total)})! ' + f'Skip successfully scraped ({skip}) repeat softlink ({skip_repeat_softlink})! ' + f'({get_used_time(start_time)}s) \u3000') return total def get_file_info(file_path, copy_sub=True): - json_data = {'version': config.version, 'logs': '', 'req_web': '', 'image_download': '', 'outline_from': '', - 'cover_from': '', 'poster_from': '', 'extrafanart_from': '', 'trailer_from': '', - 'short_number': '', 'appoint_number': '', 'appoint_url': '', 'website_name': '', 'fields_info': '', - 'poster_path': '', 'thumb_path': '', 'fanart_path': '', 'cover_list': []} + json_data = { + 'version': config.version, + 'logs': '', + 'req_web': '', + 'image_download': '', + 'outline_from': '', + 'cover_from': '', + 'poster_from': '', + 'extrafanart_from': '', + 'trailer_from': '', + 'short_number': '', + 'appoint_number': '', + 'appoint_url': '', + 'website_name': '', + 'fields_info': '', + 'poster_path': '', + 'thumb_path': '', + 'fanart_path': '', + 'cover_list': [] + } movie_number = '' has_sub = False c_word = '' @@ -1150,8 +1146,7 @@ def get_file_info(file_path, copy_sub=True): if len(cd_path_2[0]) == 1 or 'digital' in cd_char: cd_part = str(int(cd_path_2[0])) elif cd_path_3 and 'letter' in cd_char: - letter_list = ['', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', - 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] + letter_list = ['', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] if cd_path_3[0][1] != 'c' or 'endc' in cd_char: cd_part = letter_list.index(cd_path_3[0][1]) elif cd_path_4 and 'middle_number' in cd_char: @@ -1181,9 +1176,8 @@ def get_file_info(file_path, copy_sub=True): if '国产' in file_path or '麻豆' in file_path or '國產' in file_path: mosaic = '国产' else: - md_list = ['国产', '國產', '麻豆', '传媒', '傳媒', '皇家华人', '皇家華人', '精东', '精東', - '猫爪影像', '貓爪影像', '91CM', '91MS', '导演系列', '導演系列', 'MDWP', 'MMZ', 'MLT', - 'MSM', 'LAA', 'MXJ', 'SWAG'] + md_list = ['国产', '國產', '麻豆', '传媒', '傳媒', '皇家华人', '皇家華人', '精东', '精東', '猫爪影像', '貓爪影像', '91CM', '91MS', '导演系列', '導演系列', + 'MDWP', 'MMZ', 'MLT', 'MSM', 'LAA', 'MXJ', 'SWAG'] for each in md_list: if each in file_path: mosaic = '国产' @@ -1198,8 +1192,7 @@ def get_file_info(file_path, copy_sub=True): # 判断是否无码 wuma_style = str(config.wuma_style) if not mosaic: - if '无码' in file_path or '無碼' in file_path or '無修正' in file_path or 'uncensored' in file_path.lower() or is_uncensored( - movie_number): + if '无码' in file_path or '無碼' in file_path or '無修正' in file_path or 'uncensored' in file_path.lower() or is_uncensored(movie_number): wuma = wuma_style mosaic = '无码' @@ -1435,8 +1428,7 @@ def _clean_empty_fodlers(path, file_mode): def get_success_list(): Flags.success_save_time = time.time() if os.path.isfile(resources.userdata_path('success.txt')): - with open(resources.userdata_path('success.txt'), 'r', encoding='utf-8', - errors='ignore') as f: + with open(resources.userdata_path('success.txt'), 'r', encoding='utf-8', errors='ignore') as f: temp = f.read() Flags.success_list = set(temp.split('\n')) if temp.strip() else set() if '' in Flags.success_list: @@ -1445,9 +1437,19 @@ def get_success_list(): signal.view_success_file_settext.emit(f'查看 ({len(Flags.success_list)})') -def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_new_path, - thumb_new_path_with_filename, poster_new_path_with_filename, fanart_new_path_with_filename, - nfo_new_path, file_ex, poster_final_path, thumb_final_path, fanart_final_path): +def deal_old_files(json_data, + folder_old_path, + folder_new_path, + file_path, + file_new_path, + thumb_new_path_with_filename, + poster_new_path_with_filename, + fanart_new_path_with_filename, + nfo_new_path, + file_ex, + poster_final_path, + thumb_final_path, + fanart_final_path): """ 处理本地已存在的thumb、poster、fanart、nfo """ @@ -1481,15 +1483,12 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ poster_old_path_no_filename = convert_path(os.path.join(folder_old_path, 'poster.jpg')) thumb_old_path_no_filename = convert_path(os.path.join(folder_old_path, 'thumb.jpg')) fanart_old_path_no_filename = convert_path(os.path.join(folder_old_path, 'fanart.jpg')) - file_path_list = {nfo_old_path, nfo_new_path, thumb_old_path_with_filename, thumb_old_path_no_filename, - thumb_new_path_with_filename, thumb_final_path, poster_old_path_with_filename, - poster_old_path_no_filename, poster_new_path_with_filename, poster_final_path, - fanart_old_path_with_filename, fanart_old_path_no_filename, fanart_new_path_with_filename, - fanart_final_path, trailer_old_file_path_with_filename, trailer_new_file_path_with_filename} - folder_path_list = {extrafanart_old_path, extrafanart_new_path, extrafanart_copy_old_path, - extrafanart_copy_new_path, trailer_old_folder_path, trailer_new_folder_path, - theme_videos_old_path, theme_videos_new_path, extrafanart_extra_old_path, - extrafanart_extra_new_path} + file_path_list = {nfo_old_path, nfo_new_path, thumb_old_path_with_filename, thumb_old_path_no_filename, thumb_new_path_with_filename, thumb_final_path, + poster_old_path_with_filename, poster_old_path_no_filename, poster_new_path_with_filename, poster_final_path, fanart_old_path_with_filename, + fanart_old_path_no_filename, fanart_new_path_with_filename, fanart_final_path, trailer_old_file_path_with_filename, + trailer_new_file_path_with_filename} + folder_path_list = {extrafanart_old_path, extrafanart_new_path, extrafanart_copy_old_path, extrafanart_copy_new_path, trailer_old_folder_path, + trailer_new_folder_path, theme_videos_old_path, theme_videos_new_path, extrafanart_extra_old_path, extrafanart_extra_new_path} # 视频模式进行清理 main_mode = config.main_mode @@ -1553,8 +1552,7 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ done_poster_path_copy = True try: # 图片最终路径等于已下载路径时,图片是已下载的,不需要处理 - if done_poster_path and os.path.exists(done_poster_path) and split_path(done_poster_path)[0] == \ - split_path(poster_final_path)[0]: # 如果存在已下载完成的文件,尝试复制 + if done_poster_path and os.path.exists(done_poster_path) and split_path(done_poster_path)[0] == split_path(poster_final_path)[0]: # 如果存在已下载完成的文件,尝试复制 done_poster_path_copy = False # 标记未复制!此处不复制,在poster download中复制 elif os.path.exists(poster_final_path): pass # windows、mac大小写不敏感,暂不解决 @@ -1570,14 +1568,11 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ if poster_exists: Flags.file_done_dic[json_data['number']].update({'local_poster': poster_final_path}) # 清理旧图片 - if poster_old_path_with_filename.lower() != poster_final_path.lower() and os.path.exists( - poster_old_path_with_filename): + if poster_old_path_with_filename.lower() != poster_final_path.lower() and os.path.exists(poster_old_path_with_filename): delete_file(poster_old_path_with_filename) - if poster_old_path_no_filename.lower() != poster_final_path.lower() and os.path.exists( - poster_old_path_no_filename): + if poster_old_path_no_filename.lower() != poster_final_path.lower() and os.path.exists(poster_old_path_no_filename): delete_file(poster_old_path_no_filename) - if poster_new_path_with_filename.lower() != poster_final_path.lower() and os.path.exists( - poster_new_path_with_filename): + if poster_new_path_with_filename.lower() != poster_final_path.lower() and os.path.exists(poster_new_path_with_filename): delete_file(poster_new_path_with_filename) elif Flags.file_done_dic[json_data['number']]['local_poster']: copy_file(Flags.file_done_dic[json_data['number']]['local_poster'], poster_final_path) @@ -1590,8 +1585,7 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ done_thumb_path_copy = True try: # 图片最终路径等于已下载路径时,图片是已下载的,不需要处理 - if done_thumb_path and os.path.exists(done_thumb_path) and split_path(done_thumb_path)[0] == \ - split_path(thumb_final_path)[0]: + if done_thumb_path and os.path.exists(done_thumb_path) and split_path(done_thumb_path)[0] == split_path(thumb_final_path)[0]: done_thumb_path_copy = False # 标记未复制!此处不复制,在 thumb download中复制 elif os.path.exists(thumb_final_path): pass @@ -1607,14 +1601,11 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ if thumb_exists: Flags.file_done_dic[json_data['number']].update({'local_thumb': thumb_final_path}) # 清理旧图片 - if thumb_old_path_with_filename.lower() != thumb_final_path.lower() and os.path.exists( - thumb_old_path_with_filename): + if thumb_old_path_with_filename.lower() != thumb_final_path.lower() and os.path.exists(thumb_old_path_with_filename): delete_file(thumb_old_path_with_filename) - if thumb_old_path_no_filename.lower() != thumb_final_path.lower() and os.path.exists( - thumb_old_path_no_filename): + if thumb_old_path_no_filename.lower() != thumb_final_path.lower() and os.path.exists(thumb_old_path_no_filename): delete_file(thumb_old_path_no_filename) - if thumb_new_path_with_filename.lower() != thumb_final_path.lower() and os.path.exists( - thumb_new_path_with_filename): + if thumb_new_path_with_filename.lower() != thumb_final_path.lower() and os.path.exists(thumb_new_path_with_filename): delete_file(thumb_new_path_with_filename) elif Flags.file_done_dic[json_data['number']]['local_thumb']: copy_file(Flags.file_done_dic[json_data['number']]['local_thumb'], thumb_final_path) @@ -1627,8 +1618,7 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ done_fanart_path_copy = True try: # 图片最终路径等于已下载路径时,图片是已下载的,不需要处理 - if done_fanart_path and os.path.exists(done_fanart_path) and split_path(done_fanart_path)[0] == \ - split_path(fanart_final_path)[0]: + if done_fanart_path and os.path.exists(done_fanart_path) and split_path(done_fanart_path)[0] == split_path(fanart_final_path)[0]: done_fanart_path_copy = False # 标记未复制!此处不复制,在 fanart download中复制 elif os.path.exists(fanart_final_path): pass @@ -1644,14 +1634,11 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ if fanart_exists: Flags.file_done_dic[json_data['number']].update({'local_fanart': fanart_final_path}) # 清理旧图片 - if fanart_old_path_with_filename.lower() != fanart_final_path.lower() and os.path.exists( - fanart_old_path_with_filename): + if fanart_old_path_with_filename.lower() != fanart_final_path.lower() and os.path.exists(fanart_old_path_with_filename): delete_file(fanart_old_path_with_filename) - if fanart_old_path_no_filename.lower() != fanart_final_path.lower() and os.path.exists( - fanart_old_path_no_filename): + if fanart_old_path_no_filename.lower() != fanart_final_path.lower() and os.path.exists(fanart_old_path_no_filename): delete_file(fanart_old_path_no_filename) - if fanart_new_path_with_filename.lower() != fanart_final_path.lower() and os.path.exists( - fanart_new_path_with_filename): + if fanart_new_path_with_filename.lower() != fanart_final_path.lower() and os.path.exists(fanart_new_path_with_filename): delete_file(fanart_new_path_with_filename) elif Flags.file_done_dic[json_data['number']]['local_fanart']: copy_file(Flags.file_done_dic[json_data['number']]['local_fanart'], fanart_final_path) @@ -1701,17 +1688,14 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ # 删除带文件名文件,用不到了 if os.path.exists(trailer_old_file_path_with_filename): delete_file(trailer_old_file_path_with_filename) - if trailer_new_file_path_with_filename != trailer_old_file_path_with_filename and os.path.exists( - trailer_new_file_path_with_filename): + if trailer_new_file_path_with_filename != trailer_old_file_path_with_filename and os.path.exists(trailer_new_file_path_with_filename): delete_file(trailer_new_file_path_with_filename) else: # 目标文件带文件名 if os.path.exists(trailer_new_file_path_with_filename): - if trailer_old_file_path_with_filename != trailer_new_file_path_with_filename and os.path.exists( - trailer_old_file_path_with_filename): + if trailer_old_file_path_with_filename != trailer_new_file_path_with_filename and os.path.exists(trailer_old_file_path_with_filename): delete_file(trailer_old_file_path_with_filename) - elif trailer_old_file_path_with_filename != trailer_new_file_path_with_filename and os.path.exists( - trailer_old_file_path_with_filename): + elif trailer_old_file_path_with_filename != trailer_new_file_path_with_filename and os.path.exists(trailer_old_file_path_with_filename): move_file(trailer_old_file_path_with_filename, trailer_new_file_path_with_filename) elif os.path.exists(trailer_old_file_path): move_file(trailer_old_file_path, trailer_new_file_path_with_filename) @@ -1728,8 +1712,7 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ if trailer_new_folder_path != trailer_old_folder_path and os.path.exists(trailer_new_folder_path): shutil.rmtree(trailer_new_folder_path, ignore_errors=True) # 删除带文件名旧文件,用不到了 - if trailer_old_file_path_with_filename != trailer_new_file_path_with_filename and os.path.exists( - trailer_old_file_path_with_filename): + if trailer_old_file_path_with_filename != trailer_new_file_path_with_filename and os.path.exists(trailer_old_file_path_with_filename): delete_file(trailer_old_file_path_with_filename) else: local_trailer = Flags.file_done_dic.get(json_data['number']).get('local_trailer') @@ -1741,8 +1724,7 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ # 处理 extrafanart try: if os.path.exists(extrafanart_new_path): - if extrafanart_old_path.lower() != extrafanart_new_path.lower() and os.path.exists( - extrafanart_old_path): + if extrafanart_old_path.lower() != extrafanart_new_path.lower() and os.path.exists(extrafanart_old_path): shutil.rmtree(extrafanart_old_path, ignore_errors=True) elif os.path.exists(extrafanart_old_path): move_file(extrafanart_old_path, extrafanart_new_path) @@ -1752,8 +1734,7 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ # extrafanart副本 try: if os.path.exists(extrafanart_copy_new_path): - if extrafanart_copy_old_path.lower() != extrafanart_copy_new_path.lower() and os.path.exists( - extrafanart_copy_old_path): + if extrafanart_copy_old_path.lower() != extrafanart_copy_new_path.lower() and os.path.exists(extrafanart_copy_old_path): shutil.rmtree(extrafanart_copy_old_path, ignore_errors=True) elif os.path.exists(extrafanart_copy_old_path): move_file(extrafanart_copy_old_path, extrafanart_copy_new_path) @@ -1762,16 +1743,14 @@ def deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_ # 主题视频 if os.path.exists(theme_videos_new_path): - if theme_videos_old_path.lower() != theme_videos_new_path.lower() and os.path.exists( - theme_videos_old_path): + if theme_videos_old_path.lower() != theme_videos_new_path.lower() and os.path.exists(theme_videos_old_path): shutil.rmtree(theme_videos_old_path, ignore_errors=True) elif os.path.exists(theme_videos_old_path): move_file(theme_videos_old_path, theme_videos_new_path) # 附加视频 if os.path.exists(extrafanart_extra_new_path): - if extrafanart_extra_old_path.lower() != extrafanart_extra_new_path.lower() and os.path.exists( - extrafanart_extra_old_path): + if extrafanart_extra_old_path.lower() != extrafanart_extra_new_path.lower() and os.path.exists(extrafanart_extra_old_path): shutil.rmtree(extrafanart_extra_old_path, ignore_errors=True) elif os.path.exists(extrafanart_extra_old_path): move_file(extrafanart_extra_old_path, extrafanart_extra_new_path) @@ -1866,7 +1845,6 @@ def check_and_clean_files(): signal.show_log_text('================================================================================') _clean_empty_fodlers(movie_path, '') signal.set_label_file_path.emit('🗑 清理完成!') - signal.show_log_text( - f" 🎉🎉🎉 All finished!!!({get_used_time(start_time)}s) Total {total} , Success {succ} , Failed {fail} ") + signal.show_log_text(f" 🎉🎉🎉 All finished!!!({get_used_time(start_time)}s) Total {total} , Success {succ} , Failed {fail} ") signal.show_log_text('================================================================================') signal.reset_buttons_status.emit() diff --git a/src/models/core/image.py b/src/models/core/image.py index e6a8467..7cca0be 100644 --- a/src/models/core/image.py +++ b/src/models/core/image.py @@ -127,41 +127,17 @@ def _add_to_pic(pic_path, img_pic, mark_size, count, mark_name): # 固定一个位置 if mark_fixed == 'corner': corner_top_left = [(0, 0), (scroll_width, 0), (scroll_width * 2, 0)] - corner_bottom_left = [(0, img_pic.height - scroll_high), (scroll_width, img_pic.height - scroll_high), - (scroll_width * 2, img_pic.height - scroll_high)] - corner_top_right = [(img_pic.width - scroll_width * 4, 0), (img_pic.width - scroll_width * 2, 0), - (img_pic.width - scroll_width, 0)] - corner_bottom_right = [(img_pic.width - scroll_width * 4, img_pic.height - scroll_high), - (img_pic.width - scroll_width * 2, img_pic.height - scroll_high), + corner_bottom_left = [(0, img_pic.height - scroll_high), (scroll_width, img_pic.height - scroll_high), (scroll_width * 2, img_pic.height - scroll_high)] + corner_top_right = [(img_pic.width - scroll_width * 4, 0), (img_pic.width - scroll_width * 2, 0), (img_pic.width - scroll_width, 0)] + corner_bottom_right = [(img_pic.width - scroll_width * 4, img_pic.height - scroll_high), (img_pic.width - scroll_width * 2, img_pic.height - scroll_high), (img_pic.width - scroll_width, img_pic.height - scroll_high)] - corner_dic = { - 'top_left': corner_top_left, - 'bottom_left': corner_bottom_left, - 'top_right': corner_top_right, - 'bottom_right': corner_bottom_right, - } + corner_dic = {'top_left': corner_top_left, 'bottom_left': corner_bottom_left, 'top_right': corner_top_right, 'bottom_right': corner_bottom_right, } mark_postion = corner_dic[mark_pos_corner][count] # 封面四个角的位置 else: - pos = [ - { - 'x': 0, - 'y': 0 - }, - { - 'x': img_pic.width - scroll_width, - 'y': 0 - }, - { - 'x': img_pic.width - scroll_width, - 'y': img_pic.height - scroll_high - }, - { - 'x': 0, - 'y': img_pic.height - scroll_high - }, - ] + pos = [{'x': 0, 'y': 0}, {'x': img_pic.width - scroll_width, 'y': 0}, {'x': img_pic.width - scroll_width, 'y': img_pic.height - scroll_high}, + {'x': 0, 'y': img_pic.height - scroll_high}, ] mark_postion = (pos[count]['x'], pos[count]['y']) try: # 图片如果下载不完整时,这里会崩溃,跳过 img_pic.paste(img_subt, mark_postion, mask=a) @@ -201,12 +177,7 @@ def add_mark_thread(pic_path, mark_list): _add_to_pic(pic_path, img_pic, mark_size, count, mark_name) count += 1 else: - pos = { - 'top_left': 0, - 'top_right': 1, - 'bottom_right': 2, - 'bottom_left': 3, - } + pos = {'top_left': 0, 'top_right': 1, 'bottom_right': 2, 'bottom_left': 3, } mark_pos_count = pos.get(mark_pos) # 获取自定义位置, 取余配合pos达到顺时针添加的效果 count_hd = '' for mark_name in mark_list: diff --git a/src/models/core/nfo.py b/src/models/core/nfo.py index e87ff08..3fcd424 100644 --- a/src/models/core/nfo.py +++ b/src/models/core/nfo.py @@ -39,8 +39,7 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal # 字符转义,避免emby无法解析 json_data_nfo = json_data.copy() - key_word = ['title', 'originaltitle', 'outline', 'originalplot', 'actor', 'series', 'director', 'studio', - 'publisher', 'tag', 'website', 'cover', 'poster', 'trailer'] + key_word = ['title', 'originaltitle', 'outline', 'originalplot', 'actor', 'series', 'director', 'studio', 'publisher', 'tag', 'website', 'cover', 'poster', 'trailer'] rep_word = { '&': '&', '<': '<', @@ -92,13 +91,10 @@ def write_nfo(json_data, nfo_new_path, folder_new_path, file_path, edit_mode=Fal temp_all_actor = deal_actor_more(json_data['all_actor']) temp_actor = deal_actor_more(actor) - repl_list = [['4K', temp_4k], ['originaltitle', originaltitle], ['title', title], ['outline', outline], - ['number', number], ['first_actor', first_actor], ['all_actor', temp_all_actor], - ['actor', temp_actor], ['release', temp_release], ['year', year], ['runtime', runtime], - ['director', director], ['series', series], ['studio', studio], ['publisher', publisher], - ['mosaic', mosaic], ['definition', definition.replace('UHD8', 'UHD')], ['cnword', c_word], - ['first_letter', first_letter], ['letters', letters], ['filename', filename], - ['wanted', json_data['wanted']]] + repl_list = [['4K', temp_4k], ['originaltitle', originaltitle], ['title', title], ['outline', outline], ['number', number], ['first_actor', first_actor], + ['all_actor', temp_all_actor], ['actor', temp_actor], ['release', temp_release], ['year', year], ['runtime', runtime], ['director', director], + ['series', series], ['studio', studio], ['publisher', publisher], ['mosaic', mosaic], ['definition', definition.replace('UHD8', 'UHD')], + ['cnword', c_word], ['first_letter', first_letter], ['letters', letters], ['filename', filename], ['wanted', json_data['wanted']]] for each_key in repl_list: nfo_title = nfo_title.replace(each_key[0], each_key[1]) diff --git a/src/models/core/scraper.py b/src/models/core/scraper.py index 13b18c4..5acbca6 100644 --- a/src/models/core/scraper.py +++ b/src/models/core/scraper.py @@ -13,15 +13,13 @@ from models.config.config import config from models.config.resources import resources from models.core.crawler import crawl -from models.core.file import _clean_empty_fodlers, _pic_some_deal, check_file, copy_trailer_to_theme_videos, \ - creat_folder, deal_old_files, get_file_info, get_movie_list, get_output_name, move_bif, move_file_to_failed_folder, \ - move_movie, move_other_file, move_torrent, newtdisk_creat_symlink, save_success_list +from models.core.file import _clean_empty_fodlers, _pic_some_deal, check_file, copy_trailer_to_theme_videos, creat_folder, deal_old_files, get_file_info, get_movie_list, \ + get_output_name, move_bif, move_file_to_failed_folder, move_movie, move_other_file, move_torrent, newtdisk_creat_symlink, save_success_list from models.core.flags import Flags from models.core.image import add_mark, extrafanart_copy2, extrafanart_extras_copy from models.core.nfo import get_nfo_data, write_nfo from models.core.translate import translate_actor, translate_info, translate_title_outline -from models.core.utils import deal_some_field, get_movie_path_setting, get_video_size, \ - replace_special_word, replace_word, show_data_result, show_movie_info +from models.core.utils import deal_some_field, get_movie_path_setting, get_video_size, replace_special_word, replace_word, show_data_result, show_movie_info from models.core.web import extrafanart_download, fanart_download, poster_download, thumb_download, trailer_download from models.entity.enums import FileMode from models.signals import signal @@ -41,8 +39,7 @@ def _scrape_one_file(file_path, file_info, file_mode): json_data, movie_number, folder_old_path, file_name, file_ex, sub_list, file_show_name, file_show_path = file_info # 获取设置的媒体目录、失败目录、成功目录 - movie_path, success_folder, failed_folder, escape_folder_list, \ - extrafanart_folder, softlink_path = get_movie_path_setting(file_path) + movie_path, success_folder, failed_folder, escape_folder_list, extrafanart_folder, softlink_path = get_movie_path_setting(file_path) json_data['failed_folder'] = failed_folder # 检查文件大小 @@ -62,7 +59,7 @@ def _scrape_one_file(file_path, file_info, file_mode): if 'has_nfo_update' not in read_mode: # 不更新并返回 show_data_result(json_data, start_time) show_movie_info(json_data) - json_data['logs'] += "\n 🙉 [Movie] %s" % file_path + json_data['logs'] += f"\n 🙉 [Movie] {file_path}" save_success_list(file_path, file_path) # 保存成功列表 return True, json_data @@ -104,8 +101,7 @@ def _scrape_one_file(file_path, file_info, file_mode): json_data_new['4K'] = '' def deal_tag_data(tag): - for each in ['中文字幕', '无码流出', '無碼流出', '无码破解', '無碼破解', '无码', '無碼', '有码', '有碼', - '国产', '國產', '里番', '裏番', '动漫', '動漫']: + for each in ['中文字幕', '无码流出', '無碼流出', '无码破解', '無碼破解', '无码', '無碼', '有码', '有碼', '国产', '國產', '里番', '裏番', '动漫', '動漫']: tag = tag.replace(each, '') return tag.replace(',,', ',') @@ -144,9 +140,11 @@ def deal_tag_data(tag): show_movie_info(json_data) # 生成输出文件夹和输出文件的路径 - folder_new_path, file_new_path, nfo_new_path, poster_new_path_with_filename, \ - thumb_new_path_with_filename, fanart_new_path_with_filename, naming_rule, poster_final_path, \ - thumb_final_path, fanart_final_path = get_output_name(json_data, file_path, success_folder, file_ex) + folder_new_path, file_new_path, nfo_new_path, poster_new_path_with_filename, thumb_new_path_with_filename, fanart_new_path_with_filename, naming_rule, poster_final_path, thumb_final_path, fanart_final_path = get_output_name( + json_data, + file_path, + success_folder, + file_ex) # 判断输出文件的路径是否重复 if config.soft_link == 0: @@ -156,8 +154,7 @@ def deal_tag_data(tag): else: done_file_new_path_list.append(file_path) # 已存在时,添加到列表,停止刮削 done_file_new_path_list.sort(reverse=True) - json_data['error_info'] = '存在重复文件(指刮削后的文件路径相同!),请检查:\n 🍁 %s' % '\n 🍁 '.join( - done_file_new_path_list) + json_data['error_info'] = '存在重复文件(指刮削后的文件路径相同!),请检查:\n 🍁 ' + '\n 🍁 '.join(done_file_new_path_list) # json_data['req_web'] = 'do_not_update_json_data_dic' # do_not_update_json_data_dic 是不要更新json_data的标识,表示这个文件的数据有问题 json_data['outline'] = split_path(file_path)[1] @@ -165,8 +162,7 @@ def deal_tag_data(tag): return False, json_data # 判断输出文件夹和文件是否已存在,如无则创建输出文件夹 - if not creat_folder(json_data, folder_new_path, file_path, file_new_path, thumb_new_path_with_filename, - poster_new_path_with_filename): + if not creat_folder(json_data, folder_new_path, file_path, file_new_path, thumb_new_path_with_filename, poster_new_path_with_filename): return False, json_data # 返回MDCx1_1main, 继续处理下一个文件 # 初始化图片已下载地址的字典 @@ -188,10 +184,19 @@ def deal_tag_data(tag): # 移动文件 if move_movie(json_data, file_path, file_new_path): if 'sort_del' in config.switch_on: - deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_new_path, - thumb_new_path_with_filename, poster_new_path_with_filename, - fanart_new_path_with_filename, nfo_new_path, file_ex, poster_final_path, - thumb_final_path, fanart_final_path) # 清理旧的thumb、poster、fanart、nfo + deal_old_files(json_data, + folder_old_path, + folder_new_path, + file_path, + file_new_path, + thumb_new_path_with_filename, + poster_new_path_with_filename, + fanart_new_path_with_filename, + nfo_new_path, + file_ex, + poster_final_path, + thumb_final_path, + fanart_final_path) # 清理旧的thumb、poster、fanart、nfo save_success_list(file_path, file_new_path) # 保存成功列表 return True, json_data else: @@ -200,10 +205,19 @@ def deal_tag_data(tag): return False, json_data # 清理旧的thumb、poster、fanart、extrafanart、nfo - pic_final_catched, single_folder_catched = \ - deal_old_files(json_data, folder_old_path, folder_new_path, file_path, file_new_path, - thumb_new_path_with_filename, poster_new_path_with_filename, fanart_new_path_with_filename, - nfo_new_path, file_ex, poster_final_path, thumb_final_path, fanart_final_path) + pic_final_catched, single_folder_catched = deal_old_files(json_data, + folder_old_path, + folder_new_path, + file_path, + file_new_path, + thumb_new_path_with_filename, + poster_new_path_with_filename, + fanart_new_path_with_filename, + nfo_new_path, + file_ex, + poster_final_path, + thumb_final_path, + fanart_final_path) # 如果 final_pic_path 没处理过,这时才需要下载和加水印 if pic_final_catched: @@ -223,8 +237,7 @@ def deal_tag_data(tag): _pic_some_deal(json_data, thumb_final_path, fanart_final_path) # 加水印 - add_mark(json_data, json_data['poster_marked'], json_data['thumb_marked'], - json_data['fanart_marked']) + add_mark(json_data, json_data['poster_marked'], json_data['thumb_marked'], json_data['fanart_marked']) # 下载剧照和剧照副本 if single_folder_catched: @@ -276,8 +289,7 @@ def _scrape_exec_thread(task): file_name_temp = file_name_temp[:40] + '...' # 处理间歇任务 - while config.main_mode != 4 and 'rest_scrape' in config.switch_on \ - and count - Flags.rest_now_begin_count > config.rest_count: + while config.main_mode != 4 and 'rest_scrape' in config.switch_on and count - Flags.rest_now_begin_count > config.rest_count: _check_stop(file_name_temp) time.sleep(1) @@ -287,8 +299,7 @@ def _scrape_exec_thread(task): thread_time = config.thread_time if count == 1 or thread_time == 0 or config.main_mode == 4: Flags.next_start_time = time.time() - signal.show_log_text( - f' 🕷 {get_current_time()} 开始刮削:{Flags.scrape_starting}/{count_all} {file_name_temp}') + signal.show_log_text(f' 🕷 {get_current_time()} 开始刮削:{Flags.scrape_starting}/{count_all} {file_name_temp}') thread_time = 0 else: Flags.next_start_time += thread_time @@ -303,8 +314,7 @@ def _scrape_exec_thread(task): Flags.scrape_started += 1 if count > 1 and thread_time != 0: - signal.show_log_text( - f' 🕷 {get_current_time()} 开始刮削:{Flags.scrape_started}/{count_all} {file_name_temp}') + signal.show_log_text(f' 🕷 {get_current_time()} 开始刮削:{Flags.scrape_started}/{count_all} {file_name_temp}') start_time = time.time() file_mode = Flags.file_mode @@ -315,12 +325,10 @@ def _scrape_exec_thread(task): # 显示刮削信息 progress_value = Flags.scrape_started / count_all * 100 - progress_percentage = '%.2f' % progress_value + '%' + progress_percentage = f'{progress_value:.2f}%' signal.exec_set_processbar.emit(int(progress_value)) - signal.set_label_file_path.emit( - f'正在刮削: {Flags.scrape_started}/{count_all} {progress_percentage} \n {convert_path(file_show_path)}') - signal.label_result.emit(f' 刮削中:{Flags.scrape_started - Flags.succ_count - Flags.fail_count} ' - f'成功:{Flags.succ_count} 失败:{Flags.fail_count}') + signal.set_label_file_path.emit(f'正在刮削: {Flags.scrape_started}/{count_all} {progress_percentage} \n {convert_path(file_show_path)}') + signal.label_result.emit(f' 刮削中:{Flags.scrape_started - Flags.succ_count - Flags.fail_count} 成功:{Flags.succ_count} 失败:{Flags.fail_count}') json_data['logs'] += '\n' + "👆" * 50 json_data['logs'] += "\n 🙈 [Movie] " + convert_path(file_path) json_data['logs'] += "\n 🚘 [Number] " + movie_number @@ -328,8 +336,7 @@ def _scrape_exec_thread(task): # 如果指定了单一网站,进行提示 website_single = config.website_single if config.scrape_like == 'single' and file_mode != FileMode.Single and config.main_mode != 4: - json_data['logs'] += \ - "\n 😸 [Note] You specified 「 %s 」, some videos may not have results! " % website_single + json_data['logs'] += f"\n 😸 [Note] You specified 「 {website_single} 」, some videos may not have results! " # 获取刮削数据 try: @@ -348,13 +355,11 @@ def _scrape_exec_thread(task): try: if result: Flags.succ_count += 1 - succ_show_name = str(Flags.count_claw) + '-' + str(Flags.succ_count) + '.' + file_show_name.replace( - movie_number, json_data['number']) + json_data['4K'] + succ_show_name = str(Flags.count_claw) + '-' + str(Flags.succ_count) + '.' + file_show_name.replace(movie_number, json_data['number']) + json_data['4K'] signal.show_list_name(succ_show_name, 'succ', json_data, movie_number) else: Flags.fail_count += 1 - fail_show_name = str(Flags.count_claw) + '-' + str(Flags.fail_count) + '.' + file_show_name.replace( - movie_number, json_data['number']) + json_data['4K'] + fail_show_name = str(Flags.count_claw) + '-' + str(Flags.fail_count) + '.' + file_show_name.replace(movie_number, json_data['number']) + json_data['4K'] signal.show_list_name(fail_show_name, 'fail', json_data, movie_number) if json_data['error_info']: json_data['logs'] += f'\n 🔴 [Failed] Reason: {json_data["error_info"]}' @@ -377,14 +382,11 @@ def _scrape_exec_thread(task): Flags.scrape_done += 1 count = Flags.scrape_done progress_value = count / count_all * 100 - progress_percentage = '%.2f' % progress_value + '%' + progress_percentage = f'{progress_value:.2f}%' used_time = get_used_time(start_time) - scrape_info_begin = '%d/%d (%s) round(%s) %s 新的刮削线程' % ( - count, count_all, progress_percentage, Flags.count_claw, split_path(file_path)[1]) - scrape_info_begin = '\n\n\n' + '👇'*50 + '\n' + scrape_info_begin - scrape_info_after = f'\n ' \ - f'🕷 {get_current_time()} {count}/{count_all} ' \ - f'{split_path(file_path)[1]} 刮削完成!用时 {used_time} 秒!' + scrape_info_begin = f'{count:d}/{count_all:d} ({progress_percentage}) round({Flags.count_claw}) {split_path(file_path)[1]} 新的刮削线程' + scrape_info_begin = '\n\n\n' + '👇' * 50 + '\n' + scrape_info_begin + scrape_info_after = f'\n 🕷 {get_current_time()} {count}/{count_all} {split_path(file_path)[1]} 刮削完成!用时 {used_time} 秒!' json_data['logs'] = scrape_info_begin + json_data['logs'] + scrape_info_after signal.show_log_text(json_data['logs']) remain_count = Flags.scrape_started - count @@ -459,7 +461,7 @@ def scrape(file_mode: FileMode, movie_list): signal.add_label_info({}) # 清空主界面显示信息 thread_number = config.thread_number # 线程数量 thread_time = config.thread_time # 线程延时 - signal.label_result.emit(' 刮削中:%s 成功:%s 失败:%s' % (0, Flags.succ_count, Flags.fail_count)) + signal.label_result.emit(f' 刮削中:{0} 成功:{Flags.succ_count} 失败:{Flags.fail_count}') signal.logs_failed_settext.emit('\n\n\n') # 日志页面显示开始时间 @@ -477,8 +479,7 @@ def scrape(file_mode: FileMode, movie_list): signal.show_log_text(f'{n} 🖥 File path: {each_f}\n 🌐 File url: {each_i[1]}') # 获取设置的媒体目录、失败目录、成功目录 - movie_path, success_folder, failed_folder, escape_folder_list, \ - extrafanart_folder, softlink_path = get_movie_path_setting() + movie_path, success_folder, failed_folder, escape_folder_list, extrafanart_folder, softlink_path = get_movie_path_setting() # 获取待刮削文件列表的相关信息 if not movie_list: @@ -510,9 +511,7 @@ def scrape(file_mode: FileMode, movie_list): thread_number = count_all signal.show_log_text(f' 🕷 开启多线程,线程数量({thread_number}),线程延时({thread_time})秒...') if 'rest_scrape' in config.switch_on and config.main_mode != 4: - signal.show_log_text( - f' 🍯 间歇刮削 已启用,连续刮削 {config.rest_count} 个文件后,' - f'将自动休息 {Flags.rest_time_convert} 秒...') + signal.show_log_text(f' 🍯 间歇刮削 已启用,连续刮削 {config.rest_count} 个文件后,将自动休息 {Flags.rest_time_convert} 秒...') # 在启动前点了停止按钮 if Flags.stop_flag: @@ -528,7 +527,7 @@ def scrape(file_mode: FileMode, movie_list): # self.extrafanart_pool.shutdown(wait=True) Flags.pool.shutdown(wait=True) - signal.label_result.emit(' 刮削中:%s 成功:%s 失败:%s' % (0, Flags.succ_count, Flags.fail_count)) + signal.label_result.emit(f' 刮削中:0 成功:{Flags.succ_count} 失败:{Flags.fail_count}') save_success_list() # 保存成功列表 if signal.stop: return @@ -542,35 +541,29 @@ def scrape(file_mode: FileMode, movie_list): else: average_time = used_time signal.exec_set_processbar.emit(0) - signal.set_label_file_path.emit('🎉 恭喜!全部刮削完成!共 %s 个文件!用时 %s 秒' % (count_all, used_time)) - signal.show_traceback_log( - "🎉 All finished!!! Total %s , Success %s , Failed %s " % (count_all, Flags.succ_count, Flags.fail_count)) - signal.show_log_text( - " 🎉🎉🎉 All finished!!! Total %s , Success %s , Failed %s " % (count_all, Flags.succ_count, Flags.fail_count)) + signal.set_label_file_path.emit(f'🎉 恭喜!全部刮削完成!共 {count_all} 个文件!用时 {used_time} 秒') + signal.show_traceback_log(f"🎉 All finished!!! Total {count_all} , Success {Flags.succ_count} , Failed {Flags.fail_count} ") + signal.show_log_text(f" 🎉🎉🎉 All finished!!! Total {count_all} , Success {Flags.succ_count} , Failed {Flags.fail_count} ") signal.show_log_text("================================================================================") if Flags.failed_list: signal.show_log_text(" *** Failed results ****") for i in range(len(Flags.failed_list)): fail_path, fail_reson = Flags.failed_list[i] - signal.show_log_text(" 🔴 %s %s\n %s" % (i + 1, fail_path, fail_reson)) + signal.show_log_text(f" 🔴 {i + 1} {fail_path}\n {fail_reson}") signal.show_log_text("================================================================================") - signal.show_log_text( - ' ⏰ Start time'.ljust(15) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(Flags.start_time))) - signal.show_log_text( - ' 🏁 End time'.ljust(15) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time))) - signal.show_log_text(' ⏱ Used time'.ljust(15) + ': %sS' % used_time) - signal.show_log_text(' 📺 Movies num'.ljust(15) + ': %s' % count_all) - signal.show_log_text(' 🍕 Per time'.ljust(15) + ': %sS' % average_time) + signal.show_log_text(' ⏰ Start time'.ljust(15) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(Flags.start_time))) + signal.show_log_text(' 🏁 End time'.ljust(15) + ': ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(end_time))) + signal.show_log_text(' ⏱ Used time'.ljust(15) + f': {used_time}S') + signal.show_log_text(' 📺 Movies num'.ljust(15) + f': {count_all}') + signal.show_log_text(' 🍕 Per time'.ljust(15) + f': {average_time}S') signal.show_log_text("================================================================================") - signal.show_scrape_info('🎉 刮削完成 %s/%s' % (count_all, count_all)) + signal.show_scrape_info(f'🎉 刮削完成 {count_all}/{count_all}') # auto run after scrape if 'actor_photo_auto' in config.emby_on: update_emby_actor_photo() if config.actor_photo_kodi_auto: creat_kodi_actors(True) - if config.auto_link: - newtdisk_creat_symlink('copy_netdisk_nfo' in config.switch_on) signal.reset_buttons_status.emit() if len(Flags.again_dic): @@ -604,10 +597,8 @@ def start_new_scrape(file_mode: FileMode, movie_list=None): def _check_stop(file_name_temp): if signal.stop: Flags.now_kill += 1 - signal.show_log_text( - f' 🕷 {get_current_time()} 已停止刮削:{Flags.now_kill}/{Flags.total_kills} {file_name_temp}') - signal.set_label_file_path.emit( - f'⛔️ 正在停止刮削...\n 正在停止已在运行的任务线程({Flags.now_kill}/{Flags.total_kills})...') + signal.show_log_text(f' 🕷 {get_current_time()} 已停止刮削:{Flags.now_kill}/{Flags.total_kills} {file_name_temp}') + signal.set_label_file_path.emit(f'⛔️ 正在停止刮削...\n 正在停止已在运行的任务线程({Flags.now_kill}/{Flags.total_kills})...') # exceptions must derive from BaseException raise '手动停止刮削' @@ -649,7 +640,8 @@ def get_remain_list(): movie_path = convert_path(movie_path) temp_remain_path = convert_path(Flags.remain_list[0]) if movie_path not in temp_remain_path: - box = QMessageBox(QMessageBox.Warning, '提醒', + box = QMessageBox(QMessageBox.Warning, + '提醒', f'很重要!!请注意:\n当前待刮削目录:{movie_path}\n剩余任务文件路径:{temp_remain_path}\n剩余任务的文件路径,并不在当前待刮削目录中!\n剩余任务很可能是使用其他配置扫描的!\n请确认成功输出目录和失败目录是否正确!如果配置不正确,继续刮削可能会导致文件被移动到新配置的输出位置!\n是否继续刮削?') box.setStandardButtons(QMessageBox.Yes | QMessageBox.No) box.button(QMessageBox.Yes).setText('继续') @@ -658,8 +650,7 @@ def get_remain_list(): reply = box.exec() if reply == QMessageBox.No: return True - signal.show_log_text( - f'🍯 🍯 🍯 NOTE: 继续刮削未完成任务!!! 剩余未刮削文件数量({len(Flags.remain_list)})') + signal.show_log_text(f'🍯 🍯 🍯 NOTE: 继续刮削未完成任务!!! 剩余未刮削文件数量({len(Flags.remain_list)})') start_new_scrape(FileMode.Default, Flags.remain_list) return True return False diff --git a/src/models/core/subtitle.py b/src/models/core/subtitle.py index 662fbc8..41f3eaf 100644 --- a/src/models/core/subtitle.py +++ b/src/models/core/subtitle.py @@ -18,8 +18,7 @@ def add_sub_for_all_video(): signal.show_log_text("字幕文件夹不存在!\n只能检查无字幕视频,无法添加字幕!") signal.show_log_text("================================================================================") - movie_path, success_folder, failed_folder, escape_folder_list, \ - extrafanart_folder, softlink_path = get_movie_path_setting() + movie_path, success_folder, failed_folder, escape_folder_list, extrafanart_folder, softlink_path = get_movie_path_setting() signal.show_log_text(f' 🖥 Movie path: {movie_path} \n 🔎 正在检查所有视频,请稍候...') if config.subtitle_add_chs == 'on': signal.show_log_text(" 如果字幕文件名不以 .chs 结尾,则会自动添加!\n") @@ -63,14 +62,12 @@ def add_sub_for_all_video(): if config.subtitle_add_chs == 'on': if '.chs' not in sub_old_path and not os.path.exists(sub_new_path): move_file(sub_old_path, sub_new_path) - signal.show_log_text( - f" 🍀 字幕文件: '{file_name + sub_type}' 已被重命名为: '{file_name + '.chs' + sub_type}' ") + signal.show_log_text(f" 🍀 字幕文件: '{file_name + sub_type}' 已被重命名为: '{file_name + '.chs' + sub_type}' ") else: sub_old_path_no_chs = sub_old_path.replace('.chs', '') if '.chs' in sub_old_path and not os.path.exists(sub_old_path_no_chs): move_file(sub_old_path, sub_old_path_no_chs) - signal.show_log_text( - f" 🍀 字幕文件: '{file_name + sub_type}' 已被重命名为: '{split_path(sub_old_path_no_chs)[1]}' ") + signal.show_log_text(f" 🍀 字幕文件: '{file_name + sub_type}' 已被重命名为: '{split_path(sub_old_path_no_chs)[1]}' ") cnword_style = config.cnword_style if cnword_style and cnword_style not in sub_new_path: @@ -78,12 +75,10 @@ def add_sub_for_all_video(): file_cnword = config.file_cnword folder_name = config.folder_name naming_file = config.naming_file - if folder_cnword == 'on' or file_cnword == 'on' \ - or 'cnword' in folder_name or 'cnword' in naming_file: + if folder_cnword == 'on' or file_cnword == 'on' or 'cnword' in folder_name or 'cnword' in naming_file: new_sub_movie_list.append(movie) - signal.show_log_text( - f'\nDone! \n成功添加字幕影片数量: {add_count} \n仍无字幕影片数量: {no_sub_count - add_count} ') + signal.show_log_text(f'\nDone! \n成功添加字幕影片数量: {add_count} \n仍无字幕影片数量: {no_sub_count - add_count} ') signal.show_log_text("================================================================================") # 重新刮削新添加字幕的影片 list2 = list(set(new_sub_movie_list)) # 去重 @@ -93,5 +88,3 @@ def add_sub_for_all_video(): signal.show_log_text('开始对新添加字幕的视频重新刮削...') start_new_scrape(FileMode.Default, movie_list=list3) signal.reset_buttons_status.emit() - - diff --git a/src/models/core/translate.py b/src/models/core/translate.py index e69a182..490ae7d 100644 --- a/src/models/core/translate.py +++ b/src/models/core/translate.py @@ -48,14 +48,9 @@ def youdao_translate(title, outline): 'action': 'FY_BY_CLICKBUTTION', } headers = { - 'Cookie': random.choice([ - "OUTFOX_SEARCH_USER_ID=833904829@10.169.0.84", - "OUTFOX_SEARCH_USER_ID=-10218418@11.136.67.24;", - "OUTFOX_SEARCH_USER_ID=1989505748@10.108.160.19;", - "OUTFOX_SEARCH_USER_ID=2072418438@218.82.240.196;", - "OUTFOX_SEARCH_USER_ID=1768574849@220.181.76.83;", - "OUTFOX_SEARCH_USER_ID=-2153895048@10.168.8.76;", - ]), + 'Cookie': random.choice(["OUTFOX_SEARCH_USER_ID=833904829@10.169.0.84", "OUTFOX_SEARCH_USER_ID=-10218418@11.136.67.24;", + "OUTFOX_SEARCH_USER_ID=1989505748@10.108.160.19;", "OUTFOX_SEARCH_USER_ID=2072418438@218.82.240.196;", + "OUTFOX_SEARCH_USER_ID=1768574849@220.181.76.83;", "OUTFOX_SEARCH_USER_ID=-2153895048@10.168.8.76;", ]), 'Referer': 'https://fanyi.youdao.com/?keyfrom=dict2.top', } headers_o = config.headers @@ -129,14 +124,8 @@ def deepl_translate(title, outline, ls='JA', json_data=None): deepl_url = 'https://api-free.deepl.com' if ':fx' in deepl_key else 'https://api.deepl.com' url = f'{deepl_url}/v2/translate?auth_key={deepl_key}&source_lang={ls}&target_lang=ZH' - params_title = { - 'Content-Type': 'application/x-www-form-urlencoded', - 'text': title, - } - params_outline = { - 'Content-Type': 'application/x-www-form-urlencoded', - 'text': outline, - } + params_title = {'Content-Type': 'application/x-www-form-urlencoded', 'text': title, } + params_outline = {'Content-Type': 'application/x-www-form-urlencoded', 'text': outline, } if title: result, res = post_html(url, data=params_title, json_data=True) @@ -177,8 +166,8 @@ def translate_info(json_data): tag_include = config.tag_include tag = json_data['tag'] - remove_key = ['HD高画质', 'HD高畫質', '高画质', '高畫質', '無碼流出', '无码流出', '無碼破解', '无码破解', - '無碼片', '无码片', '有碼片', '有码片', '無碼', '无码', '有碼', '有码', '流出', '国产', '國產'] + remove_key = ['HD高画质', 'HD高畫質', '高画质', '高畫質', '無碼流出', '无码流出', '無碼破解', '无码破解', '無碼片', '无码片', '有碼片', '有码片', '無碼', '无码', + '有碼', '有码', '流出', '国产', '國產'] for each_key in remove_key: tag = tag.replace(each_key, '') @@ -294,13 +283,12 @@ def translate_actor(json_data): # 非读取模式,勾选了使用真实名字时; 读取模式,勾选了允许更新真实名字时 if actor_realname == 'on': start_time = time.time() - if mosaic != '国产' and ( - number.startswith('FC2') or number.startswith('SIRO') or re.search(r'\d{3,}[A-Z]{3,}-', number)): + if mosaic != '国产' and (number.startswith('FC2') or number.startswith('SIRO') or re.search(r'\d{3,}[A-Z]{3,}-', number)): result, temp_actor = get_actorname(json_data['number']) if result: - actor:str = json_data['actor'] - all_actor:str = json_data['all_actor'] - actor_list:list = all_actor.split(',') + actor: str = json_data['actor'] + all_actor: str = json_data['all_actor'] + actor_list: list = all_actor.split(',') json_data['actor'] = temp_actor # 从actor_list中循环查找元素是否包含字符串temp_actor,有则替换 for item in actor_list: @@ -308,8 +296,7 @@ def translate_actor(json_data): actor_list[actor_list.index(item)] = temp_actor json_data['all_actor'] = ','.join(actor_list) - json_data[ - 'logs'] += f"\n 👩🏻 Av-wiki done! Actor's real Japanese name is '{temp_actor}' ({get_used_time(start_time)}s)" + json_data['logs'] += f"\n 👩🏻 Av-wiki done! Actor's real Japanese name is '{temp_actor}' ({get_used_time(start_time)}s)" else: json_data['logs'] += f"\n 🔴 Av-wiki failed! {temp_actor} ({get_used_time(start_time)}s)" @@ -355,9 +342,7 @@ def translate_actor(json_data): if actor_href_list: json_data['actor_href'] = actor_href_list[0] elif json_data['actor']: - json_data['actor_href'] = 'https://javdb.com/search?f=actor&q=' + \ - urllib.parse.quote( - json_data['actor'].split(',')[0]) # url转码,避免乱码 + json_data['actor_href'] = 'https://javdb.com/search?f=actor&q=' + urllib.parse.quote(json_data['actor'].split(',')[0]) # url转码,避免乱码 return json_data @@ -389,8 +374,7 @@ def _get_youdao_key_thread(): # 请求 js url ,获取 youdao key result, req = get_html(js_url) try: - youdaokey = re.search(r'(?<="fanyideskweb" \+ e \+ i \+ ")[^"]+', req).group(0) - # sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") + youdaokey = re.search(r'(?<="fanyideskweb" \+ e \+ i \+ ")[^"]+', req).group(0) # sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") except: try: youdaokey = re.search(r'(?<="fanyideskweb"\+e\+i\+")[^"]+', req).group(0) @@ -466,21 +450,18 @@ def translate_title_outline(json_data, movie_number): else: # 使用deepl翻译 t, o, r = deepl_translate(trans_title, trans_outline, 'JA', json_data) if r: - json_data[ - 'logs'] += f'\n 🔴 Translation failed!({each.capitalize()})({get_used_time(start_time)}s) Error: {r}' + json_data['logs'] += f'\n 🔴 Translation failed!({each.capitalize()})({get_used_time(start_time)}s) Error: {r}' else: if t: json_data['title'] = t if o: json_data['outline'] = o - json_data[ - 'logs'] += f'\n 🍀 Translation done!({each.capitalize()})({get_used_time(start_time)}s)' + json_data['logs'] += f'\n 🍀 Translation done!({each.capitalize()})({get_used_time(start_time)}s)' json_data['outline_from'] = each break else: translate_by = translate_by.strip(',').capitalize() - json_data[ - 'logs'] += f'\n 🔴 Translation failed! {translate_by} 不可用!({get_used_time(start_time)}s)' + json_data['logs'] += f'\n 🔴 Translation failed! {translate_by} 不可用!({get_used_time(start_time)}s)' # 简繁转换 if title_language == 'zh_cn': diff --git a/src/models/core/utils.py b/src/models/core/utils.py index b13b13f..5b44502 100644 --- a/src/models/core/utils.py +++ b/src/models/core/utils.py @@ -136,7 +136,7 @@ def get_video_size(json_data, file_path): if definition and 'definition' in config.tag_include: new_tag_list.insert(0, definition) if hd_get == 'video': - new_tag_list.insert(0, codec_fourcc.upper()) # 插入编码格式 + new_tag_list.insert(0, codec_fourcc.upper()) # 插入编码格式 json_data['tag'] = ','.join(new_tag_list) return json_data @@ -181,8 +181,7 @@ def deal_url(url): def replace_special_word(json_data): # 常见字段替换的字符 - all_key_word = ['title', 'originaltitle', 'outline', 'originalplot', 'series', 'director', 'studio', - 'publisher', 'tag'] + all_key_word = ['title', 'originaltitle', 'outline', 'originalplot', 'series', 'director', 'studio', 'publisher', 'tag'] for key, value in config.special_word.items(): for each in all_key_word: json_data[each] = json_data[each].replace(key, value) @@ -292,17 +291,13 @@ def get_movie_path_setting(file_path=''): movie_path = nfd2c(movie_path) end_folder_name = split_path(movie_path)[1] # 用户设置的软链接输出目录 - softlink_path = config.softlink_path \ - .replace('\\', '/').replace('end_folder_name', end_folder_name) + softlink_path = config.softlink_path.replace('\\', '/').replace('end_folder_name', end_folder_name) # 用户设置的成功输出目录 - success_folder = config.success_output_folder \ - .replace('\\', '/').replace('end_folder_name', end_folder_name) + success_folder = config.success_output_folder.replace('\\', '/').replace('end_folder_name', end_folder_name) # 用户设置的失败输出目录 - failed_folder = config.failed_output_folder \ - .replace('\\', '/').replace('end_folder_name', end_folder_name) + failed_folder = config.failed_output_folder.replace('\\', '/').replace('end_folder_name', end_folder_name) # 用户设置的排除目录 - escape_folder_list = config.folders \ - .replace('\\', '/').replace('end_folder_name', end_folder_name).replace(',', ',').split(',') + escape_folder_list = config.folders.replace('\\', '/').replace('end_folder_name', end_folder_name).replace(',', ',').split(',') # 用户设置的剧照副本目录 extrafanart_folder = config.extrafanart_folder.replace('\\', '/') @@ -336,5 +331,4 @@ def get_movie_path_setting(file_path=''): success_folder = success_folder.replace('first_folder_name', first_folder_name) failed_folder = failed_folder.replace('first_folder_name', first_folder_name) - return convert_path(movie_path), success_folder, failed_folder, \ - escape_folder_new_list, extrafanart_folder, softlink_path + return convert_path(movie_path), success_folder, failed_folder, escape_folder_new_list, extrafanart_folder, softlink_path diff --git a/src/models/core/video.py b/src/models/core/video.py index a352045..71b7620 100644 --- a/src/models/core/video.py +++ b/src/models/core/video.py @@ -14,8 +14,7 @@ def add_del_extras(mode): """ signal.show_log_text(f'Start {mode} extrafanart extras! \n') - movie_path, success_folder, failed_folder, escape_folder_list, \ - extrafanart_folder, softlink_path = get_movie_path_setting() + movie_path, success_folder, failed_folder, escape_folder_list, extrafanart_folder, softlink_path = get_movie_path_setting() signal.show_log_text(f' 🖥 Movie path: {movie_path} \n 🔎 Checking all videos, Please wait...') movie_type = config.media_type movie_list = movie_lists('', movie_type, movie_path) # 获取所有需要刮削的影片列表 @@ -61,8 +60,7 @@ def add_del_extras(mode): def add_del_theme_videos(mode): signal.show_log_text(f'Start {mode} theme videos! \n') - movie_path, success_folder, failed_folder, escape_folder_list, \ - extrafanart_folder, softlink_path = get_movie_path_setting() + movie_path, success_folder, failed_folder, escape_folder_list, extrafanart_folder, softlink_path = get_movie_path_setting() signal.show_log_text(f' 🖥 Movie path: {movie_path} \n 🔎 Checking all videos, Please wait...') movie_type = config.media_type movie_list = movie_lists('', movie_type, movie_path) # 获取所有需要刮削的影片列表 diff --git a/src/models/core/web.py b/src/models/core/web.py index 95b6d0a..0fc1a87 100644 --- a/src/models/core/web.py +++ b/src/models/core/web.py @@ -45,9 +45,7 @@ def get_yesjav_title(json_data, movie_number): if result and response: parser = etree.HTMLParser(encoding="utf-8") html = etree.HTML(response, parser) - movie_title = html.xpath( - '//dl[@id="zi"]/p/font/a/b[contains(text(), $number)]/../../a[contains(text(), "中文字幕")]/text()', - number=movie_number) + movie_title = html.xpath('//dl[@id="zi"]/p/font/a/b[contains(text(), $number)]/../../a[contains(text(), "中文字幕")]/text()', number=movie_number) if movie_title: movie_title = movie_title[0] for each in config.char_list: @@ -111,17 +109,15 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): originaltitle_amazon_list = [originaltitle_amazon] for originaltitle_amazon in originaltitle_amazon_list: # 需要两次urlencode,nb_sb_noss表示无推荐来源 - url_search = 'https://www.amazon.co.jp/black-curtain/save-eligibility/black-curtain?returnUrl=/s?k=' + urllib.parse.quote_plus( - urllib.parse.quote_plus(originaltitle_amazon.replace('&', ' ') + ' [DVD]')) + '&ref=nb_sb_noss' + url_search = 'https://www.amazon.co.jp/black-curtain/save-eligibility/black-curtain?returnUrl=/s?k=' + urllib.parse.quote_plus(urllib.parse.quote_plus( + originaltitle_amazon.replace('&', ' ') + ' [DVD]')) + '&ref=nb_sb_noss' result, html_search = get_amazon_data(url_search) # 没有结果,尝试拆词,重新搜索 - if 'キーワードが正しく入力されていても一致する商品がない場合は、別の言葉をお試しください。' in html_search and len( - originaltitle_amazon_list) < 2: + if 'キーワードが正しく入力されていても一致する商品がない場合は、別の言葉をお試しください。' in html_search and len(originaltitle_amazon_list) < 2: for each_name in originaltitle_amazon.split(' '): if each_name not in originaltitle_amazon_list: - if len(each_name) > 8 or (not each_name.encode('utf-8').isalnum() and len( - each_name) > 4) and each_name not in actor_amazon: + if len(each_name) > 8 or (not each_name.encode('utf-8').isalnum() and len(each_name) > 4) and each_name not in actor_amazon: originaltitle_amazon_list.append(each_name) continue @@ -133,8 +129,7 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): # 标题缩短匹配(如无结果,则使用缩小标题再次搜索) if '検索に一致する商品はありませんでした。' in html_search and len(originaltitle_amazon_list) < 2: - short_originaltitle_amazon = html.xpath( - '//div[@class="a-section a-spacing-base a-spacing-top-base"]/span[@class="a-size-base a-color-base"]/text()') + short_originaltitle_amazon = html.xpath('//div[@class="a-section a-spacing-base a-spacing-top-base"]/span[@class="a-size-base a-color-base"]/text()') if short_originaltitle_amazon: short_originaltitle_amazon = short_originaltitle_amazon[0].upper().replace(' DVD', '') if short_originaltitle_amazon in originaltitle_amazon.upper(): @@ -144,14 +139,12 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): originaltitle_amazon_half = short_originaltitle_amazon for each_name in originaltitle_amazon.split(' '): if each_name not in originaltitle_amazon_list: - if len(each_name) > 8 or (not each_name.encode('utf-8').isalnum() and len( - each_name) > 4) and each_name not in actor_amazon: + if len(each_name) > 8 or (not each_name.encode('utf-8').isalnum() and len(each_name) > 4) and each_name not in actor_amazon: originaltitle_amazon_list.append(each_name) # 标题不带演员名匹配 for each_actor in actor_amazon: - originaltitle_amazon_half_no_actor = originaltitle_amazon_half_no_actor.replace(each_actor.upper(), - '') + originaltitle_amazon_half_no_actor = originaltitle_amazon_half_no_actor.replace(each_actor.upper(), '') # 检查搜索结果 actor_result_list = set() @@ -159,12 +152,9 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): # s-card-container s-overflow-hidden aok-relative puis-wide-grid-style puis-wide-grid-style-t2 puis-expand-height puis-include-content-margin puis s-latency-cf-section s-card-border pic_card = html.xpath('//div[@class="a-section a-spacing-base"]') for each in pic_card: # tek-077 - pic_ver_list = each.xpath( - 'div//a[@class="a-size-base a-link-normal s-underline-text s-underline-link-text s-link-style a-text-bold"]/text()') - pic_title_list = each.xpath( - 'div//span[@class="a-size-base-plus a-color-base a-text-normal"]/text()') - pic_url_list = each.xpath( - 'div//div[@class="a-section aok-relative s-image-square-aspect"]/img/@src') + pic_ver_list = each.xpath('div//a[@class="a-size-base a-link-normal s-underline-text s-underline-link-text s-link-style a-text-bold"]/text()') + pic_title_list = each.xpath('div//span[@class="a-size-base-plus a-color-base a-text-normal"]/text()') + pic_url_list = each.xpath('div//div[@class="a-section aok-relative s-image-square-aspect"]/img/@src') detail_url_list = each.xpath('div//a[@class="a-link-normal s-no-outline"]/@href') if len(pic_ver_list) and len(pic_url_list) and (len(pic_title_list) and len(detail_url_list)): pic_ver = pic_ver_list[0] # 图片版本 @@ -178,12 +168,10 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): pic_title_half_no_actor = pic_title_half_no_actor.replace(each_actor, '') # 判断标题是否命中 - if originaltitle_amazon_half[:15] in pic_title_half or originaltitle_amazon_half_no_actor[ - :15] in pic_title_half_no_actor: + if originaltitle_amazon_half[:15] in pic_title_half or originaltitle_amazon_half_no_actor[:15] in pic_title_half_no_actor: detail_url = urllib.parse.unquote_plus(detail_url) temp_title = re.findall(r'(.+)keywords=', detail_url) - temp_detail_url = temp_title[ - 0] + pic_title_half if temp_title else detail_url + pic_title_half + temp_detail_url = temp_title[0] + pic_title_half if temp_title else detail_url + pic_title_half url = re.sub(r'\._[_]?AC_[^\.]+\.', '.', pic_url) # 判断演员是否在标题里,避免同名标题误匹配 MOPP-023 @@ -208,8 +196,7 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): for each in actor_result_list: new_pic_w = get_imgsize(each)[0] if new_pic_w > pic_w: - if new_pic_w >= 1770 or ( - 1750 > new_pic_w > 600): # 不要小图 FCDSS-001,截短的图(1758/1759) + if new_pic_w >= 1770 or (1750 > new_pic_w > 600): # 不要小图 FCDSS-001,截短的图(1758/1759) pic_w = new_pic_w hd_pic_url = each else: @@ -220,8 +207,7 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): return hd_pic_url # 当搜索结果命中了标题,没有命中演员时,尝试去详情页获取演员信息 - elif len( - title_result_list) <= 20 and 's-pagination-item s-pagination-next s-pagination-button s-pagination-separator' not in html_search: + elif len(title_result_list) <= 20 and 's-pagination-item s-pagination-next s-pagination-button s-pagination-separator' not in html_search: for each in title_result_list[:4]: try: url_new = 'https://www.amazon.co.jp' + re.findall(r'(/dp/[^/]+)', each[1])[0] @@ -231,10 +217,8 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): if result and html_detail: html = etree.fromstring(html_detail, etree.HTMLParser()) detail_actor = str(html.xpath('//span[@class="author notFaded"]/a/text()')).replace(' ', '') - detail_info_1 = str(html.xpath( - '//ul[@class="a-unordered-list a-vertical a-spacing-mini"]//text()')).replace(' ', '') - detail_info_2 = str( - html.xpath('//div[@id="detailBulletsWrapper_feature_div"]//text()')).replace(' ', '') + detail_info_1 = str(html.xpath('//ul[@class="a-unordered-list a-vertical a-spacing-mini"]//text()')).replace(' ', '') + detail_info_2 = str(html.xpath('//div[@id="detailBulletsWrapper_feature_div"]//text()')).replace(' ', '') detail_info_3 = str(html.xpath('//div[@id="productDescription"]//text()')).replace(' ', '') all_info = detail_actor + detail_info_1 + detail_info_2 + detail_info_3 for each_actor in actor_amazon: @@ -247,8 +231,7 @@ def get_big_pic_by_amazon(json_data, originaltitle_amazon, actor_amazon): json_data['poster_from'] = 'Amazon' # 有很多结果时(有下一页按钮),加演员名字重新搜索 - if 's-pagination-item s-pagination-next s-pagination-button s-pagination-separator' in html_search or len( - title_result_list) > 5: + if 's-pagination-item s-pagination-next s-pagination-button s-pagination-separator' in html_search or len(title_result_list) > 5: amazon_orginaltitle_actor = json_data.get('amazon_orginaltitle_actor') if amazon_orginaltitle_actor and amazon_orginaltitle_actor not in originaltitle_amazon: originaltitle_amazon_list.append(f'{originaltitle_amazon} {amazon_orginaltitle_actor}') @@ -343,8 +326,7 @@ def trailer_download(json_data, folder_new_path, folder_old_path, naming_rule): if download_file_with_filepath(json_data, trailer_url, trailer_file_path_temp, trailer_folder_path): file_size = os.path.getsize(trailer_file_path_temp) if file_size >= content_length or 'ignore_size' in download_files: - json_data['logs'] += "\n 🍀 Trailer done! (%s %s/%s)(%ss) " % ( - json_data['trailer_from'], file_size, content_length, get_used_time(start_time)) + json_data['logs'] += "\n 🍀 Trailer done! (%s %s/%s)(%ss) " % (json_data['trailer_from'], file_size, content_length, get_used_time(start_time)) signal.show_traceback_log(f"✅ {json_data['number']} trailer done!") if trailer_file_path_temp != trailer_file_path: move_file(trailer_file_path_temp, trailer_file_path) @@ -355,13 +337,11 @@ def trailer_download(json_data, folder_new_path, folder_old_path, naming_rule): if trailer_name == 0: # 带文件名,已下载成功,删除掉那些不用的文件夹即可 if os.path.exists(trailer_old_folder_path): shutil.rmtree(trailer_old_folder_path, ignore_errors=True) - if trailer_new_folder_path != trailer_old_folder_path and os.path.exists( - trailer_new_folder_path): + if trailer_new_folder_path != trailer_old_folder_path and os.path.exists(trailer_new_folder_path): shutil.rmtree(trailer_new_folder_path, ignore_errors=True) return True else: - json_data['logs'] += "\n 🟠 Trailer size is incorrect! delete it! (%s %s/%s) " % ( - json_data['trailer_from'], file_size, content_length) + json_data['logs'] += "\n 🟠 Trailer size is incorrect! delete it! (%s %s/%s) " % (json_data['trailer_from'], file_size, content_length) # 删除下载失败的文件 delete_file(trailer_file_path_temp) json_data['logs'] += "\n 🟠 Trailer download failed! (%s) " % trailer_url @@ -399,8 +379,7 @@ def _get_big_thumb(json_data): # faleno.jp 番号检查,都是大图,返回即可 if json_data['cover_from'] in ['faleno', 'dahlia']: if json_data['cover']: - json_data['logs'] += "\n 🖼 HD Thumb found! (%s)(%ss)" % ( - json_data['cover_from'], get_used_time(start_time)) + json_data['logs'] += "\n 🖼 HD Thumb found! (%s)(%ss)" % (json_data['cover_from'], get_used_time(start_time)) json_data['poster_big'] = True return json_data @@ -416,9 +395,7 @@ def _get_big_thumb(json_data): req_url = 'https://faleno.jp/top/works/%s/' % number_lower_no_line result, response = get_html(req_url) if result: - temp_url = re.findall( - r'src="((https://cdn.faleno.net/top/wp-content/uploads/[^_]+_)([^?]+))\?output-quality=', - response) + temp_url = re.findall(r'src="((https://cdn.faleno.net/top/wp-content/uploads/[^_]+_)([^?]+))\?output-quality=', response) if temp_url: json_data['cover'] = temp_url[0][0] json_data['poster'] = temp_url[0][1] + '2125.jpg' @@ -473,8 +450,7 @@ def _get_big_thumb(json_data): pic_domain = re.findall(r'://([^/]+)', thumb_url)[0] json_data['cover_from'] = f'Google({pic_domain})' json_data['cover'] = thumb_url - json_data['logs'] += "\n 🖼 HD Thumb found! (%s)(%ss)" % ( - json_data['cover_from'], get_used_time(start_time)) + json_data['logs'] += "\n 🖼 HD Thumb found! (%s)(%ss)" % (json_data['cover_from'], get_used_time(start_time)) return json_data @@ -489,8 +465,7 @@ def _get_big_poster(json_data): # 如果有大图时,直接下载 if json_data.get('poster_big') and get_imgsize(json_data['poster'])[1] > 600: json_data['image_download'] = True - json_data[ - 'logs'] += f"\n 🖼 HD Poster found! ({json_data['poster_from']})({get_used_time(start_time)}s)" + json_data['logs'] += f"\n 🖼 HD Poster found! ({json_data['poster_from']})({get_used_time(start_time)}s)" return json_data # 初始化数据 @@ -500,10 +475,8 @@ def _get_big_poster(json_data): poster_width = 0 # 通过原标题去 amazon 查询 - if 'amazon' in config.download_hd_pics and json_data['mosaic'] in ['有码', '有碼', '流出', '无码破解', '無碼破解', - '里番', '裏番', '动漫', '動漫']: - hd_pic_url = get_big_pic_by_amazon(json_data, json_data['originaltitle_amazon'], - json_data['actor_amazon']) + if 'amazon' in config.download_hd_pics and json_data['mosaic'] in ['有码', '有碼', '流出', '无码破解', '無碼破解', '里番', '裏番', '动漫', '動漫']: + hd_pic_url = get_big_pic_by_amazon(json_data, json_data['originaltitle_amazon'], json_data['actor_amazon']) if hd_pic_url: json_data['poster'] = hd_pic_url json_data['poster_from'] = 'Amazon' @@ -511,8 +484,7 @@ def _get_big_poster(json_data): json_data['image_download'] = True # 通过番号去 官网 查询获取稍微大一些的封面图,以便去 Google 搜索 - if not hd_pic_url and 'official' in config.download_hd_pics and 'official' not in config.website_set and \ - json_data['poster_from'] != 'Amazon': + if not hd_pic_url and 'official' in config.download_hd_pics and 'official' not in config.website_set and json_data['poster_from'] != 'Amazon': letters = json_data['letters'].upper() official_url = config.official_websites.get(letters) if official_url: @@ -531,8 +503,7 @@ def _get_big_poster(json_data): # 使用google以图搜图,放在最后是因为有时有错误,比如 kawd-943 poster_url = json_data.get('poster') - if not hd_pic_url and poster_url and 'google' in config.download_hd_pics and json_data[ - 'poster_from'] != 'theporndb': + if not hd_pic_url and poster_url and 'google' in config.download_hd_pics and json_data['poster_from'] != 'theporndb': hd_pic_url, poster_size = get_big_pic_by_google(poster_url, poster=True) if hd_pic_url: if 'prestige' in json_data['poster'] or json_data['poster_from'] == 'Amazon': @@ -546,8 +517,7 @@ def _get_big_poster(json_data): # 如果找到了高清链接,则替换 if hd_pic_url: json_data['image_download'] = True - json_data['logs'] += "\n 🖼 HD Poster found! (%s)(%ss)" % ( - json_data['poster_from'], get_used_time(start_time)) + json_data['logs'] += "\n 🖼 HD Poster found! (%s)(%ss)" % (json_data['poster_from'], get_used_time(start_time)) return json_data @@ -575,8 +545,7 @@ def thumb_download(json_data, folder_new_path, thumb_final_path): # 尝试复制其他分集。看分集有没有下载,如果下载完成则可以复制,否则就自行下载 if json_data['cd_part']: done_thumb_path = Flags.file_done_dic.get(json_data['number']).get('thumb') - if done_thumb_path and os.path.exists(done_thumb_path) and split_path(done_thumb_path)[0] == \ - split_path(thumb_final_path)[0]: + if done_thumb_path and os.path.exists(done_thumb_path) and split_path(done_thumb_path)[0] == split_path(thumb_final_path)[0]: copy_file(done_thumb_path, thumb_final_path) json_data['logs'] += "\n 🍀 Thumb done! (copy cd-thumb)(%ss) " % get_used_time(start_time) json_data['cover_from'] = 'copy cd-thumb' @@ -604,17 +573,14 @@ def thumb_download(json_data, folder_new_path, thumb_final_path): cover_from, cover_url = each cover_url = check_url(cover_url) if not cover_url: - json_data['logs'] += "\n 🟠 检测到 Thumb 图片失效! 跳过!(%s)(%ss) " % ( - cover_from, get_used_time(start_time)) + each[1] + json_data['logs'] += "\n 🟠 检测到 Thumb 图片失效! 跳过!(%s)(%ss) " % (cover_from, get_used_time(start_time)) + each[1] continue json_data['cover_from'] = cover_from if download_file_with_filepath(json_data, cover_url, thumb_final_path_temp, folder_new_path): cover_size = check_pic(thumb_final_path_temp) if cover_size: if not cover_from.startswith('Google') or cover_size == json_data['cover_size'] or ( - cover_size[0] >= 800 and abs( - cover_size[0] / cover_size[1] - json_data['cover_size'][0] / json_data['cover_size'][ - 1]) <= 0.1): + cover_size[0] >= 800 and abs(cover_size[0] / cover_size[1] - json_data['cover_size'][0] / json_data['cover_size'][1]) <= 0.1): # 图片下载正常,替换旧的 thumb.jpg if thumb_final_path_temp != thumb_final_path: move_file(thumb_final_path_temp, thumb_final_path) @@ -623,14 +589,12 @@ def thumb_download(json_data, folder_new_path, thumb_final_path): dic = {'thumb': thumb_final_path} Flags.file_done_dic[json_data['number']].update(dic) json_data['thumb_marked'] = False # 表示还没有走加水印流程 - json_data['logs'] += "\n 🍀 Thumb done! (%s)(%ss) " % ( - json_data['cover_from'], get_used_time(start_time)) + json_data['logs'] += "\n 🍀 Thumb done! (%s)(%ss) " % (json_data['cover_from'], get_used_time(start_time)) json_data['thumb_path'] = thumb_final_path return True else: delete_file(thumb_final_path_temp) - json_data['logs'] += "\n 🟠 检测到 Thumb 分辨率不对%s! 已删除 (%s)(%ss)" % ( - str(cover_size), cover_from, get_used_time(start_time)) + json_data['logs'] += "\n 🟠 检测到 Thumb 分辨率不对%s! 已删除 (%s)(%ss)" % (str(cover_size), cover_from, get_used_time(start_time)) continue json_data['logs'] += f"\n 🟠 Thumb download failed! {cover_from}: {cover_url} " else: @@ -679,8 +643,7 @@ def poster_download(json_data, folder_new_path, poster_final_path): # 尝试复制其他分集。看分集有没有下载,如果下载完成则可以复制,否则就自行下载 if json_data['cd_part']: done_poster_path = Flags.file_done_dic.get(json_data['number']).get('poster') - if done_poster_path and os.path.exists(done_poster_path) and split_path(done_poster_path)[0] == \ - split_path(poster_final_path)[0]: + if done_poster_path and os.path.exists(done_poster_path) and split_path(done_poster_path)[0] == split_path(poster_final_path)[0]: copy_file(done_poster_path, poster_final_path) json_data['poster_from'] = 'copy cd-poster' json_data['poster_path'] = poster_final_path @@ -729,8 +692,7 @@ def poster_download(json_data, folder_new_path, poster_final_path): if download_file_with_filepath(json_data, poster_url, poster_final_path_temp, folder_new_path): poster_size = check_pic(poster_final_path_temp) if poster_size: - if not poster_from.startswith('Google') or poster_size == json_data[ - 'poster_size'] or 'media-amazon.com' in poster_url: + if not poster_from.startswith('Google') or poster_size == json_data['poster_size'] or 'media-amazon.com' in poster_url: if poster_final_path_temp != poster_final_path: move_file(poster_final_path_temp, poster_final_path) delete_file(poster_final_path_temp) @@ -739,13 +701,11 @@ def poster_download(json_data, folder_new_path, poster_final_path): Flags.file_done_dic[json_data['number']].update(dic) json_data['poster_marked'] = False # 下载的图,还没加水印 json_data['poster_path'] = poster_final_path - json_data['logs'] += "\n 🍀 Poster done! (%s)(%ss)" % ( - poster_from, get_used_time(start_time)) + json_data['logs'] += "\n 🍀 Poster done! (%s)(%ss)" % (poster_from, get_used_time(start_time)) return True else: delete_file(poster_final_path_temp) - json_data['logs'] += "\n 🟠 检测到 Poster 分辨率不对%s! 已删除 (%s)" % ( - str(poster_size), poster_from) + json_data['logs'] += "\n 🟠 检测到 Poster 分辨率不对%s! 已删除 (%s)" % (str(poster_size), poster_from) # 判断之前有没有 poster 和 thumb if not poster_path and not thumb_path: @@ -817,8 +777,7 @@ def fanart_download(json_data, fanart_final_path): # 尝试复制其他分集。看分集有没有下载,如果下载完成则可以复制,否则就自行下载 if json_data['cd_part']: done_fanart_path = Flags.file_done_dic.get(json_data['number']).get('fanart') - if done_fanart_path and os.path.exists(done_fanart_path) and split_path(done_fanart_path)[0] == \ - split_path(fanart_final_path)[0]: + if done_fanart_path and os.path.exists(done_fanart_path) and split_path(done_fanart_path)[0] == split_path(fanart_final_path)[0]: if fanart_path: delete_file(fanart_path) copy_file(done_fanart_path, fanart_final_path) @@ -897,8 +856,7 @@ def extrafanart_download(json_data, folder_new_path): extrafanart_count += 1 extrafanart_name = 'fanart' + str(extrafanart_count) + '.jpg' extrafanart_file_path = os.path.join(extrafanart_folder_path_temp, extrafanart_name) - task_list.append( - [json_data, extrafanart_url, extrafanart_file_path, extrafanart_folder_path_temp, extrafanart_name]) + task_list.append([json_data, extrafanart_url, extrafanart_file_path, extrafanart_folder_path_temp, extrafanart_name]) extrafanart_pool = Pool(20) # 剧照下载线程池 result = extrafanart_pool.map(_mutil_extrafanart_download_thread, task_list) for res in result: @@ -909,13 +867,11 @@ def extrafanart_download(json_data, folder_new_path): shutil.rmtree(extrafanart_folder_path) os.rename(extrafanart_folder_path_temp, extrafanart_folder_path) json_data['logs'] += "\n 🍀 ExtraFanart done! (%s %s/%s)(%ss)" % ( - json_data['extrafanart_from'], extrafanart_count_succ, extrafanart_count, - get_used_time(start_time)) + json_data['extrafanart_from'], extrafanart_count_succ, extrafanart_count, get_used_time(start_time)) return True else: json_data['logs'] += "\n 🟠 ExtraFanart download failed! (%s %s/%s)(%ss)" % ( - json_data['extrafanart_from'], extrafanart_count_succ, extrafanart_count, - get_used_time(start_time)) + json_data['extrafanart_from'], extrafanart_count_succ, extrafanart_count, get_used_time(start_time)) if extrafanart_folder_path_temp != extrafanart_folder_path: shutil.rmtree(extrafanart_folder_path_temp) else: @@ -939,12 +895,11 @@ def show_netstatus(): signal.show_traceback_log(traceback.format_exc()) signal.show_net_info(traceback.format_exc()) if proxy == '' or proxy_type == '' or proxy_type == 'no': - signal.show_net_info(' 当前网络状态:❌ 未启用代理\n 类型: ' + str(proxy_type) + ' 地址:' + str( - proxy) + ' 超时时间:' + str(timeout) + ' 重试次数:' + str(retry_count)) + signal.show_net_info(' 当前网络状态:❌ 未启用代理\n 类型: ' + str(proxy_type) + ' 地址:' + str(proxy) + ' 超时时间:' + str(timeout) + ' 重试次数:' + str( + retry_count)) else: - signal.show_net_info( - ' 当前网络状态:✅ 已启用代理\n 类型: ' + proxy_type + ' 地址:' + proxy + ' 超时时间:' + str( - timeout) + ' 重试次数:' + str(retry_count)) + signal.show_net_info(' 当前网络状态:✅ 已启用代理\n 类型: ' + proxy_type + ' 地址:' + proxy + ' 超时时间:' + str(timeout) + ' 重试次数:' + str( + retry_count)) signal.show_net_info('=' * 80) diff --git a/src/models/crawlers/airav.py b/src/models/crawlers/airav.py index ad1d23c..0990124 100644 --- a/src/models/crawlers/airav.py +++ b/src/models/crawlers/airav.py @@ -89,15 +89,13 @@ def getOutline(html, language, real_url): def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): start_time = time.time() website_name = 'airav' - req_web += '-> %s[%s]' % (website_name, language) + req_web += f'-> {website_name}[{language}]' number = number.upper() if re.match(r'N\d{4}', number): # n1403 number = number.lower() real_url = appoint_url - cover_url = '' image_cut = 'right' image_download = False - url_search = '' mosaic = '有码' if language == 'zh_cn': airav_url = 'https://cn.airav.wiki' @@ -106,7 +104,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): else: airav_url = 'https://jp.airav.wiki' web_info = '\n ' - log_info += ' \n 🌐 airav[%s]' % language.replace('zh_', '') + log_info += f' \n 🌐 airav[{language.replace("zh_", "")}]' debug_info = '' try: # 捕获主动抛出的异常 @@ -114,13 +112,13 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): # 通过搜索获取real_url url_search = airav_url + '/?search=' + number - debug_info = '搜索地址: %s ' % url_search + debug_info = f'搜索地址: {url_search} ' log_info += web_info + debug_info # ========================================================================搜索番号 result, html_search = curl_html(url_search) if not result: - debug_info = '网络请求错误: %s' % html_search + debug_info = f'网络请求错误: {html_search}' log_info += web_info + debug_info raise Exception(debug_info) html = etree.fromstring(html_search, etree.HTMLParser()) @@ -136,11 +134,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): raise Exception(debug_info) if real_url: - debug_info = '番号地址: %s ' % real_url + debug_info = f'番号地址: {real_url} ' log_info += web_info + debug_info result, html_content = curl_html(real_url) if not result: - debug_info = '网络请求错误: %s' % html_content + debug_info = f'网络请求错误: {html_content}' log_info += web_info + debug_info raise Exception(debug_info) @@ -196,7 +194,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): 'image_cut': image_cut, 'log_info': log_info, 'error_info': '', - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), + 'req_web': req_web + f'({round((time.time() - start_time), )}s) ', 'mosaic': mosaic, 'website': real_url, 'wanted': '', @@ -205,7 +203,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): log_info += web_info + debug_info dic['log_info'] = log_info except Exception as e: - debug_info = '数据生成出错: %s' % str(e) + debug_info = f'数据生成出错: {str(e)}' log_info += web_info + debug_info raise Exception(debug_info) except Exception as e: @@ -216,16 +214,10 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): 'website': '', 'log_info': log_info, 'error_info': debug_info, - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), + 'req_web': req_web + f'({round((time.time() - start_time), )}s) ', } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -236,30 +228,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): # print(main('STARS-199')) # poster图片 # print(main('APNS-259', language='zh_cn')) # print(main('PRED-300')) # 马赛克破坏版 - print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main('x-art.19.11.03')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('1101132', '')) - # print(main('OFJE-318')) - # print(main('110119-001')) - # print(main('abs-001')) - # print(main('SSIS-090', '')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main('x-art.19.11.03', '')) + print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main('x-art.19.11.03')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('1101132', '')) # print(main('OFJE-318')) # print(main('110119-001')) # print(main('abs-001')) # print(main('SSIS-090', '')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main('x-art.19.11.03', '')) diff --git a/src/models/crawlers/airav_cc.py b/src/models/crawlers/airav_cc.py index 8588e1e..f0ae761 100644 --- a/src/models/crawlers/airav_cc.py +++ b/src/models/crawlers/airav_cc.py @@ -98,7 +98,7 @@ def get_outline(html): return '' else: # 去除简介中的无意义信息,中间和首尾的空白字符、*根据分发等 - result = re.sub(r'[\n\t]', '', result).split('*根据分发', 1 )[0].strip() + result = re.sub(r'[\n\t]', '', result).split('*根据分发', 1)[0].strip() return result @@ -111,7 +111,7 @@ def get_series(html): def retry_request(real_url, log_info, web_info): result, html_content = curl_html(real_url) if not result: - debug_info = '网络请求错误: %s ' % html_content + debug_info = f'网络请求错误: {html_content} ' log_info += web_info + debug_info raise Exception(debug_info) html_info = etree.fromstring(html_content, etree.HTMLParser()) @@ -121,7 +121,7 @@ def retry_request(real_url, log_info, web_info): log_info += web_info + debug_info raise Exception(debug_info) web_number = get_web_number(html_info) # 获取番号,用来替换标题里的番号 - web_number1 = '[%s]' % web_number + web_number1 = f'[{web_number}]' title = title.replace(web_number1, '').strip() outline = get_outline(html_info) actor = get_actor(html_info) # 获取actor @@ -151,17 +151,14 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): if re.match(r'N\d{4}', number): # n1403 number = number.lower() real_url = appoint_url - cover_url = '' image_cut = 'right' image_download = False - url_search = '' mosaic = '有码' airav_url = getattr(config, 'airav_cc_website', 'https://airav.io') if language == 'zh_cn': airav_url += '/cn' web_info = '\n ' log_info += f' \n 🌐 airav[{language.replace("zh_", "")}]' - debug_info = '' # real_url = 'https://airav5.fun/jp/playon.aspx?hid=44733' @@ -170,13 +167,13 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): # 通过搜索获取real_url https://airav.io/search_result?kw=ssis-200 url_search = airav_url + f'/search_result?kw={number}' - debug_info = '搜索地址: %s ' % url_search + debug_info = f'搜索地址: {url_search} ' log_info += web_info + debug_info # ========================================================================搜索番号 result, html_search = curl_html(url_search) if not result: - debug_info = '网络请求错误: %s ' % html_search + debug_info = f'网络请求错误: {html_search} ' log_info += web_info + debug_info raise Exception(debug_info) html = etree.fromstring(html_search, etree.HTMLParser()) @@ -200,12 +197,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): raise Exception(debug_info) else: real_url = urllib.parse.urljoin(airav_url, real_url) if real_url.startswith("/") else real_url - - debug_info = '番号地址: %s ' % real_url + + debug_info = f'番号地址: {real_url} ' log_info += web_info + debug_info for i in range(3): - html_info, title, outline, actor, cover_url, tag, studio, log_info = ( - retry_request(real_url, log_info, web_info)) + html_info, title, outline, actor, cover_url, tag, studio, log_info = (retry_request(real_url, log_info, web_info)) if cover_url.startswith("/"): # coverurl 可能是相对路径 cover_url = urllib.parse.urljoin(airav_url, cover_url) @@ -214,11 +210,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): if '�' not in temp_str: break else: - debug_info = '%s 请求 airav_cc 返回内容存在乱码 �,尝试第 %s/3 次请求' % (number, (i + 1)) + debug_info = f'{number} 请求 airav_cc 返回内容存在乱码 �,尝试第 {(i + 1)}/3 次请求' signal.add_log(debug_info) log_info += web_info + debug_info else: - debug_info = '%s 已请求三次,返回内容仍存在乱码 � !视为失败!' % number + debug_info = f'{number} 已请求三次,返回内容仍存在乱码 � !视为失败!' signal.add_log(debug_info) log_info += web_info + debug_info raise Exception(debug_info) @@ -264,7 +260,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): 'image_cut': image_cut, 'log_info': log_info, 'error_info': '', - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), + 'req_web': req_web + f'({round((time.time() - start_time), )}s) ', 'mosaic': mosaic, 'website': real_url, 'wanted': '', @@ -273,7 +269,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): log_info += web_info + debug_info dic['log_info'] = log_info except Exception as e: - debug_info = '数据生成出错: %s' % str(e) + debug_info = f'数据生成出错: {str(e)}' log_info += web_info + debug_info raise Exception(debug_info) except Exception as e: @@ -285,16 +281,10 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): 'website': '', 'log_info': log_info, 'error_info': debug_info, - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), + 'req_web': req_web + f'({round((time.time() - start_time), )}s) ', } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -337,4 +327,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): # print(main('ssis-200', '')) # 多个搜索结果 # print(main('JUY-331', '')) # 存在系列字段 # print(main('SONE-248', '')) # 简介存在无效信息 "*根据分发方式,内容可能会有所不同" - print('CAWD-688','') # 无码破解 + print('CAWD-688', '') # 无码破解 diff --git a/src/models/crawlers/avsex.py b/src/models/crawlers/avsex.py index 565652b..34c9997 100644 --- a/src/models/crawlers/avsex.py +++ b/src/models/crawlers/avsex.py @@ -47,14 +47,11 @@ def get_actor_photo(actor): def get_outline(html): result = html.xpath('string(//h2[contains(text(), "劇情簡介")]/following-sibling::p)') - rep_list = [ - '(中文字幕1280x720)', '(日本同步最新‧中文字幕1280x720)', '(日本同步最新‧中文字幕)', - '(日本同步最新‧完整激薄版‧中文字幕1280x720)', '*日本女優* 劇情做愛影片 *完整日本版*', - '*日本女優* 剧情做爱影片 *完整日本版*', ' ', '
', '

', '

', - '\n', - '\n\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\n\t\n
', - '
', '★ (请到免费赠片区观赏)' - ] + rep_list = ['(中文字幕1280x720)', '(日本同步最新‧中文字幕1280x720)', '(日本同步最新‧中文字幕)', '(日本同步最新‧完整激薄版‧中文字幕1280x720)', + '*日本女優* 劇情做愛影片 *完整日本版*', '*日本女優* 剧情做爱影片 *完整日本版*', ' ', '
', '

', '

', + '\n', + '\n\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\n\t\n
', + '
', '★ (请到免费赠片区观赏)'] for each in rep_list: result = result.replace(each, '').strip() return result @@ -108,8 +105,7 @@ def get_cover(html): def get_extrafanart(html): - ex_list = html.xpath( - '//h2[contains(text(), "精彩劇照")]/following-sibling::ul/li/div[@class="relative overflow-hidden rounded-md"]/img/@src') + ex_list = html.xpath('//h2[contains(text(), "精彩劇照")]/following-sibling::ul/li/div[@class="relative overflow-hidden rounded-md"]/img/@src') return ex_list @@ -133,8 +129,7 @@ def get_real_url(html, number): temp_poster = each.xpath('div[@class="relative overflow-hidden rounded-t-md"]/img/@src') if temp_title: temp_title = temp_title[0] - if temp_title.upper().startswith(number.upper()) or ( - f'{number.upper()}-' in temp_title.upper() and temp_title[:1].isdigit()): + if temp_title.upper().startswith(number.upper()) or (f'{number.upper()}-' in temp_title.upper() and temp_title[:1].isdigit()): # https://9sex.tv/web/video?id=317900 # https://9sex.tv/#/home/video/340496 real_url = temp_url @@ -282,13 +277,7 @@ def main(number, appoint_url='', log_info='', req_web='', language=''): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -299,47 +288,4 @@ def main(number, appoint_url='', log_info='', req_web='', language=''): # print(main('', 'https://9sex.tv/#/home/video/332642')) # print(main('EVA-088')) # print(main('SNIS-216')) - print(main('CAWD-582')) - # print(main('ALDN-107')) - # print(main('ten-024')) - # print(main('459ten-024')) - # print(main('IPX-729')) - # print(main('STARS-199')) # 无结果 - # print(main('SIVR-160')) - # print(main('', 'https://avsex.club/web/video?id=333778')) - # print(main('', 'avsex.club/web/video?id=333778')) - # print(main('ssni-700')) - # print(main('ssis-200')) - # print(main('heyzo-2026')) - # print(main('110219-001')) - # print(main('abw-157')) - # print(main('010520-001')) - # print(main('hbad-599', 'https://avsex.club/web/video?id=333777')) - # print(main('hbad-599', 'https://avsex.club/web/video?id=oo')) - # print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('1101132', '')) - # print(main('OFJE-318')) - # print(main('110119-001')) - # print(main('abs-001')) - # print(main('SSIS-090', '')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main_us('x-art.19.11.03', '')) + print(main('CAWD-582')) # print(main('ALDN-107')) # print(main('ten-024')) # print(main('459ten-024')) # print(main('IPX-729')) # print(main('STARS-199')) # 无结果 # print(main('SIVR-160')) # print(main('', 'https://avsex.club/web/video?id=333778')) # print(main('', 'avsex.club/web/video?id=333778')) # print(main('ssni-700')) # print(main('ssis-200')) # print(main('heyzo-2026')) # print(main('110219-001')) # print(main('abw-157')) # print(main('010520-001')) # print(main('hbad-599', 'https://avsex.club/web/video?id=333777')) # print(main('hbad-599', 'https://avsex.club/web/video?id=oo')) # print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main_us('x-art.19.11.03')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('1101132', '')) # print(main('OFJE-318')) # print(main('110119-001')) # print(main('abs-001')) # print(main('SSIS-090', '')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main_us('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main_us('x-art.19.11.03', '')) diff --git a/src/models/crawlers/avsox.py b/src/models/crawlers/avsox.py index 53e8167..f8df2a7 100644 --- a/src/models/crawlers/avsox.py +++ b/src/models/crawlers/avsox.py @@ -56,8 +56,7 @@ def get_tag(html): def get_release(html): - result = html.xpath( - '//span[contains(text(),"发行时间:") or contains(text(),"發行日期:") or contains(text(),"発売日:")]/../text()') + result = html.xpath('//span[contains(text(),"发行时间:") or contains(text(),"發行日期:") or contains(text(),"発売日:")]/../text()') return result[0].strip() if result else '' @@ -66,8 +65,7 @@ def get_year(release): def get_runtime(html): - result = html.xpath( - '//span[contains(text(),"长度:") or contains(text(),"長度:") or contains(text(),"収録時間:")]/../text()') + result = html.xpath('//span[contains(text(),"长度:") or contains(text(),"長度:") or contains(text(),"収録時間:")]/../text()') return re.findall(r'(\d+)', result[0])[0] if result else '' @@ -87,9 +85,7 @@ def get_real_url(number, html): i = 0 if url_list: for i in range(1, len(url_list) + 1): - number_get = str(html.xpath( - '//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip( - " ['']") + number_get = str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip(" ['']") if number.upper().replace('-PPV', '') == number_get.upper().replace('-PPV', ''): page_url = 'https:' + url_list[i - 1] break @@ -207,13 +203,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -221,6 +211,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # print(main('051119-917')) # print(main('EDVR-063 ')) # print(main('032620_001')) - print(main('FC2-2101993')) - # print(main('032620_001', 'https://avsox.click/cn/movie/cb8d28437cff4e90')) - # print(main('', 'https://avsox.click/cn/movie/0b4e42a270b9871b')) + print(main('FC2-2101993')) # print(main('032620_001', 'https://avsox.click/cn/movie/cb8d28437cff4e90')) # print(main('', 'https://avsox.click/cn/movie/0b4e42a270b9871b')) diff --git a/src/models/crawlers/cableav.py b/src/models/crawlers/cableav.py index f7fd337..6e9dd7b 100644 --- a/src/models/crawlers/cableav.py +++ b/src/models/crawlers/cableav.py @@ -157,13 +157,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js diff --git a/src/models/crawlers/cnmdb.py b/src/models/crawlers/cnmdb.py index 103708e..8a665b0 100644 --- a/src/models/crawlers/cnmdb.py +++ b/src/models/crawlers/cnmdb.py @@ -80,8 +80,7 @@ def get_actor_title(title, number, studio): continue if i < 2 and ('传媒' in temp_list[i] or studio in temp_list[i]): continue - if i > 2 and ( - studio == temp_list[i] or '麻豆' in temp_list[i] or '出品' in temp_list[i] or '传媒' in temp_list[i]): + if i > 2 and (studio == temp_list[i] or '麻豆' in temp_list[i] or '出品' in temp_list[i] or '传媒' in temp_list[i]): break if i < 3 and len(temp_list[i]) <= 4 and len(actor_list) < 1: actor_list.append(temp_list[i]) @@ -114,8 +113,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file result, response = get_html(real_url) if result: detail_page = etree.fromstring(response, etree.HTMLParser()) - result, number, title, actor, real_url, cover_url, studio, series = get_detail_info(detail_page, - real_url) + result, number, title, actor, real_url, cover_url, studio, series = get_detail_info(detail_page, real_url) else: debug_info = '没有找到数据 %s ' % response log_info += web_info + debug_info @@ -131,8 +129,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file result, response = get_html(real_url, keep=False) if result: detail_page = etree.fromstring(response, etree.HTMLParser()) - result, number, title, actor, real_url, cover_url, studio, series = get_detail_info(detail_page, - real_url) + result, number, title, actor, real_url, cover_url, studio, series = get_detail_info(detail_page, real_url) break else: filename_list = re.split(r'[\.,,]', file_path) @@ -148,8 +145,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file log_info += web_info + debug_info raise Exception(debug_info) search_page = etree.fromstring(response, etree.HTMLParser()) - result, number, title, actor, real_url, cover_url, studio, series = get_search_info(search_page, - number_list) + result, number, title, actor, real_url, cover_url, studio, series = get_search_info(search_page, number_list) if result: break else: @@ -212,13 +208,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js @@ -239,61 +229,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('MDJ001-EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.2021麻豆最强跨国合作', file_path='MDJ001-EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.2021麻豆最强跨国合作')) # print(main('MKY-TN-003.周宁.乱伦黑料流出.最喜欢爸爸的鸡巴了.麻豆传媒MKY系列', file_path='MKY-TN-003.周宁.乱伦黑料流出.最喜欢爸爸的鸡巴了.麻豆传媒MKY系列')) print(main('XSJ138.养子的秘密教学EP6.薇安姐内射教学.性视界出品', - file_path='XSJ138.养子的秘密教学EP6.薇安姐内射教学.性视界出品')) - # print(main('MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样', file_path='MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样')) - # print(main('MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版', file_path='MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版')) - # print(main('MSD-023', file_path='MSD023.袁子仪.杨柳.可爱女孩非亲妹.渴望已久的(非)近亲性爱.麻豆传媒映画.Model.Seeding系列.mp4')) - # print(main('', file_path='夏日回忆 贰')) - # print(main('MDX-0016')) - # print(main('MDSJ-0004')) - # print(main('RS-020')) - # print(main('PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒', file_path='PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒')) - # print(main('老公在外出差家里的娇妻被入室小偷强迫性交 - 美酱')) - # print(main('', file_path='夏日回忆 贰 HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) - # print(main('', file_path='HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) - # print(main('', file_path="【HongKongDoll玩偶姐姐.短篇集.情人节特辑.Valentine's Day Special-cd2")) - # print(main('', file_path='PMC-062 唐茜.綠帽丈夫連同新弟怒操出軌老婆.強拍淫蕩老婆被操 唐茜.ts')) - # print(main('', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画')) - # print(main('淫欲游戏王.EP6', appoint_number='淫欲游戏王.EP5', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) # EP不带.才能搜到 - # print(main('', file_path='PMS-003.职场冰与火.EP3设局.宁静.苏文文.设局我要女人都臣服在我胯下.蜜桃影像传媒')) - # print(main('', file_path='PMS-001 性爱公寓EP04 仨人.蜜桃影像传媒.ts')) - # print(main('', file_path='PMS-001.性爱公寓EP03.ts')) - # print(main('', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.ts')) - # print(main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) - # main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts') - # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱')) # 简体搜不到 - # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木结爱.TS')) - # '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛', '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-', ' 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛'] - # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱.TS')) - # print(main('', file_path='PMS-001 性爱公寓EP02 女王 蜜桃影像传媒 -莉娜乔安.TS')) - # print(main('91CM-081', file_path='91CM-081.田恬.李琼.继母与女儿.三.爸爸不在家先上妹妹再玩弄母亲.果冻传媒.mp4')) - # print(main('91CM-081', file_path='MDJ-0001.EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.麻豆传媒映画.mp4')) - # print(main('91CM-081', file_path='MDJ0001 EP2 AV 淫兽鬼父 陈美惠 .TS')) - # print(main('91CM-081', file_path='MXJ-0005.EP1.弥生美月.小恶魔高校生.与老师共度的放浪补课.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='PH-US-002.色控.音乐老师全裸诱惑.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.TS')) - # print(main('91CM-081', file_path='MD-0140-2.蜜苏.家有性事EP2.爱在身边.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDUS系列[中文字幕].LAX0025.性感尤物渴望激情猛操.RUCK ME LIKE A SEX DOLL.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='REAL野性派001-朋友的女友讓我最上火.TS')) - # print(main('91CM-081', file_path='MDS-009.张芸熙.巨乳旗袍诱惑.搔首弄姿色气满点.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDS005 被雇主强上的熟女家政妇 大声呻吟被操到高潮 杜冰若.mp4.TS')) - # print(main('91CM-081', file_path='TT-005.孟若羽.F罩杯性感巨乳DJ.麻豆出品x宫美娱乐.TS')) - # print(main('91CM-081', file_path='台湾第一女优吴梦梦.OL误上痴汉地铁.惨遭多人轮番奸玩.麻豆传媒映画代理出品.TS')) - # print(main('91CM-081', file_path='PsychoPorn色控.找来大奶姐姐帮我乳交.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='鲍鱼游戏SquirtGame.吸舔碰糖.失败者屈辱凌辱.TS')) - # print(main('91CM-081', file_path='导演系列 外卖员的色情体验 麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDS007 骚逼女友在作妖-硬上男友当玩具 叶一涵.TS')) - # print(main('MDM-002')) # 去掉标题最后的发行商 - # print(main('MDS-007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS - # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 - # print(main('91CM-090')) # 带横线才能搜到 - # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 - # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 - # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) - # print(main('MD-0020')) - # print(main('mds009')) - # print(main('mds02209')) - # print(main('女王的SM调教')) - # print(main('91CM202')) - # print(main('91CM-202')) + file_path='XSJ138.养子的秘密教学EP6.薇安姐内射教学.性视界出品')) # print(main('MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样', file_path='MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样')) # print(main('MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版', file_path='MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版')) # print(main('MSD-023', file_path='MSD023.袁子仪.杨柳.可爱女孩非亲妹.渴望已久的(非)近亲性爱.麻豆传媒映画.Model.Seeding系列.mp4')) # print(main('', file_path='夏日回忆 贰')) # print(main('MDX-0016')) # print(main('MDSJ-0004')) # print(main('RS-020')) # print(main('PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒', file_path='PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒')) # print(main('老公在外出差家里的娇妻被入室小偷强迫性交 - 美酱')) # print(main('', file_path='夏日回忆 贰 HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) # print(main('', file_path='HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) # print(main('', file_path="【HongKongDoll玩偶姐姐.短篇集.情人节特辑.Valentine's Day Special-cd2")) # print(main('', file_path='PMC-062 唐茜.綠帽丈夫連同新弟怒操出軌老婆.強拍淫蕩老婆被操 唐茜.ts')) # print(main('', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画')) # print(main('淫欲游戏王.EP6', appoint_number='淫欲游戏王.EP5', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) # EP不带.才能搜到 # print(main('', file_path='PMS-003.职场冰与火.EP3设局.宁静.苏文文.设局我要女人都臣服在我胯下.蜜桃影像传媒')) # print(main('', file_path='PMS-001 性爱公寓EP04 仨人.蜜桃影像传媒.ts')) # print(main('', file_path='PMS-001.性爱公寓EP03.ts')) # print(main('', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.ts')) # print(main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) # main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts') # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱')) # 简体搜不到 # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木结爱.TS')) # '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛', '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-', ' 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛'] # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱.TS')) # print(main('', file_path='PMS-001 性爱公寓EP02 女王 蜜桃影像传媒 -莉娜乔安.TS')) # print(main('91CM-081', file_path='91CM-081.田恬.李琼.继母与女儿.三.爸爸不在家先上妹妹再玩弄母亲.果冻传媒.mp4')) # print(main('91CM-081', file_path='MDJ-0001.EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.麻豆传媒映画.mp4')) # print(main('91CM-081', file_path='MDJ0001 EP2 AV 淫兽鬼父 陈美惠 .TS')) # print(main('91CM-081', file_path='MXJ-0005.EP1.弥生美月.小恶魔高校生.与老师共度的放浪补课.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='PH-US-002.色控.音乐老师全裸诱惑.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.TS')) # print(main('91CM-081', file_path='MD-0140-2.蜜苏.家有性事EP2.爱在身边.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDUS系列[中文字幕].LAX0025.性感尤物渴望激情猛操.RUCK ME LIKE A SEX DOLL.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='REAL野性派001-朋友的女友讓我最上火.TS')) # print(main('91CM-081', file_path='MDS-009.张芸熙.巨乳旗袍诱惑.搔首弄姿色气满点.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDS005 被雇主强上的熟女家政妇 大声呻吟被操到高潮 杜冰若.mp4.TS')) # print(main('91CM-081', file_path='TT-005.孟若羽.F罩杯性感巨乳DJ.麻豆出品x宫美娱乐.TS')) # print(main('91CM-081', file_path='台湾第一女优吴梦梦.OL误上痴汉地铁.惨遭多人轮番奸玩.麻豆传媒映画代理出品.TS')) # print(main('91CM-081', file_path='PsychoPorn色控.找来大奶姐姐帮我乳交.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='鲍鱼游戏SquirtGame.吸舔碰糖.失败者屈辱凌辱.TS')) # print(main('91CM-081', file_path='导演系列 外卖员的色情体验 麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDS007 骚逼女友在作妖-硬上男友当玩具 叶一涵.TS')) # print(main('MDM-002')) # 去掉标题最后的发行商 # print(main('MDS-007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 # print(main('91CM-090')) # 带横线才能搜到 # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) # print(main('MD-0020')) # print(main('mds009')) # print(main('mds02209')) # print(main('女王的SM调教')) # print(main('91CM202')) # print(main('91CM-202')) diff --git a/src/models/crawlers/dahlia.py b/src/models/crawlers/dahlia.py index 0a96de0..7c3b6a2 100644 --- a/src/models/crawlers/dahlia.py +++ b/src/models/crawlers/dahlia.py @@ -23,8 +23,7 @@ def get_title(html): def get_actor(html): - actor_result = html.xpath( - '//div[@class="box_works01_list clearfix"]//span[text()="出演女優"]/following-sibling::p[1]/text()') + actor_result = html.xpath('//div[@class="box_works01_list clearfix"]//span[text()="出演女優"]/following-sibling::p[1]/text()') return ','.join(actor_result) @@ -55,8 +54,7 @@ def get_series(html): def get_director(html): - result = html.xpath( - '//span[contains(text(), "导演") or contains(text(), "導演") or contains(text(), "監督")]/following-sibling::*//text()') + result = html.xpath('//span[contains(text(), "导演") or contains(text(), "導演") or contains(text(), "監督")]/following-sibling::*//text()') return result[0] if result else '' @@ -149,8 +147,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): for each in actor_photo.keys(): title = title.replace(' ' + each, '') cover_url = get_cover(html_detail) # 获取cover - poster_url = cover_url.replace('_web_h4', '_h1').replace('_1200.jpg', '_2125.jpg').replace('_tsp.jpg', - '_actor.jpg') + poster_url = cover_url.replace('_web_h4', '_h1').replace('_1200.jpg', '_2125.jpg').replace('_tsp.jpg', '_actor.jpg') outline = get_outline(html_detail) tag = '' release = get_release(html_detail) @@ -207,16 +204,16 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): except Exception as e: # print(traceback.format_exc()) debug_info = str(e) - dic = {'title': '', 'cover': '', 'website': '', 'log_info': log_info, 'error_info': debug_info, - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), ))} + dic = { + 'title': '', + 'cover': '', + 'website': '', + 'log_info': log_info, + 'error_info': debug_info, + 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )) + } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js diff --git a/src/models/crawlers/dmm.py b/src/models/crawlers/dmm.py index b4cb1d4..5e85688 100644 --- a/src/models/crawlers/dmm.py +++ b/src/models/crawlers/dmm.py @@ -92,8 +92,7 @@ def get_release(html): def get_tag(html): result = html.xpath("//td[contains(text(),'ジャンル')]/following-sibling::td/a/text()") if not result: - result = html.xpath( - "//div[@class='info__item']/table/tbody/tr/th[contains(text(),'ジャンル')]/following-sibling::td/a/text()") + result = html.xpath("//div[@class='info__item']/table/tbody/tr/th[contains(text(),'ジャンル')]/following-sibling::td/a/text()") return str(result).strip(" ['']").replace("', '", ",") @@ -128,8 +127,7 @@ def get_director(html): def get_ountline(html): - result = html.xpath( - "normalize-space(string(//div[@class='wp-smplex']/preceding-sibling::div[contains(@class, 'mg-b20')][1]))") + result = html.xpath("normalize-space(string(//div[@class='wp-smplex']/preceding-sibling::div[contains(@class, 'mg-b20')][1]))") return result.replace('「コンビニ受取」対象商品です。詳しくはこちらをご覧ください。', '').strip() @@ -239,20 +237,12 @@ def get_tv_jp_data(real_url): 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', } data = { - "operationName": - "GetFanzaTvContentDetail", - "variables": { - "id": cid, - "device": "BROWSER", - "playDevice": "BROWSER", - "isLoggedIn": False - }, - "query": - "query GetFanzaTvContentDetail($id: ID!, $device: Device!, $isLoggedIn: Boolean!, $playDevice: PlayDevice!) {\n fanzaTV(device: $device) {\n content(id: $id) {\n __typename\n id\n contentType\n shopName\n shopOption\n shopType\n title\n description\n packageImage\n packageLargeImage\n noIndex\n ppvShopName\n viewingRights(device: $playDevice) @include(if: $isLoggedIn) {\n isStreamable\n __typename\n }\n startDeliveryAt\n endDeliveryAt\n isBeingDelivered\n hasBookmark @include(if: $isLoggedIn)\n sampleMovie {\n url\n thumbnail\n __typename\n }\n samplePictures {\n image\n imageLarge\n __typename\n }\n actresses {\n id\n name\n __typename\n }\n histrions {\n id\n name\n __typename\n }\n directors {\n id\n name\n __typename\n }\n series {\n id\n name\n __typename\n }\n maker {\n id\n name\n __typename\n }\n label {\n id\n name\n __typename\n }\n genres {\n id\n name\n __typename\n }\n playInfo(withResume: $isLoggedIn, device: $device) {\n parts {\n contentId\n number\n duration\n resumePoint\n __typename\n }\n resumePartNumber\n highestQualityName\n duration\n __typename\n }\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n reviews(first: 5) {\n edges {\n node {\n id\n reviewerName\n reviewerId\n title\n point\n hasSpoiler\n comment\n date\n postEvaluationCount\n helpfulVoteCount\n isReviewerPurchased\n __typename\n }\n __typename\n }\n pageInfo {\n endCursor\n hasNextPage\n __typename\n }\n total\n __typename\n }\n fanzaTvRecommendations: itemBasedRecommendations(\n device: $device\n shop: FANZA_TV\n limit: 30\n ) {\n id\n title\n packageImage\n averageReviewPoint\n price\n salePrice\n __typename\n }\n fanzaPpvRecommendations: itemBasedRecommendations(\n device: $device\n shop: VIDEO\n limit: 30\n ) {\n id\n title\n packageImage\n averageReviewPoint\n price\n salePrice\n __typename\n }\n }\n userBasedRecommendations(place: DETAIL_PAGE, limit: 30) @include(if: $isLoggedIn) {\n id\n title\n packageImage\n averageReviewPoint\n price\n salePrice\n __typename\n }\n __typename\n }\n}\n" + "operationName": "GetFanzaTvContentDetail", + "variables": {"id": cid, "device": "BROWSER", "playDevice": "BROWSER", "isLoggedIn": False}, + "query": "query GetFanzaTvContentDetail($id: ID!, $device: Device!, $isLoggedIn: Boolean!, $playDevice: PlayDevice!) {\n fanzaTV(device: $device) {\n content(id: $id) {\n __typename\n id\n contentType\n shopName\n shopOption\n shopType\n title\n description\n packageImage\n packageLargeImage\n noIndex\n ppvShopName\n viewingRights(device: $playDevice) @include(if: $isLoggedIn) {\n isStreamable\n __typename\n }\n startDeliveryAt\n endDeliveryAt\n isBeingDelivered\n hasBookmark @include(if: $isLoggedIn)\n sampleMovie {\n url\n thumbnail\n __typename\n }\n samplePictures {\n image\n imageLarge\n __typename\n }\n actresses {\n id\n name\n __typename\n }\n histrions {\n id\n name\n __typename\n }\n directors {\n id\n name\n __typename\n }\n series {\n id\n name\n __typename\n }\n maker {\n id\n name\n __typename\n }\n label {\n id\n name\n __typename\n }\n genres {\n id\n name\n __typename\n }\n playInfo(withResume: $isLoggedIn, device: $device) {\n parts {\n contentId\n number\n duration\n resumePoint\n __typename\n }\n resumePartNumber\n highestQualityName\n duration\n __typename\n }\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n reviews(first: 5) {\n edges {\n node {\n id\n reviewerName\n reviewerId\n title\n point\n hasSpoiler\n comment\n date\n postEvaluationCount\n helpfulVoteCount\n isReviewerPurchased\n __typename\n }\n __typename\n }\n pageInfo {\n endCursor\n hasNextPage\n __typename\n }\n total\n __typename\n }\n fanzaTvRecommendations: itemBasedRecommendations(\n device: $device\n shop: FANZA_TV\n limit: 30\n ) {\n id\n title\n packageImage\n averageReviewPoint\n price\n salePrice\n __typename\n }\n fanzaPpvRecommendations: itemBasedRecommendations(\n device: $device\n shop: VIDEO\n limit: 30\n ) {\n id\n title\n packageImage\n averageReviewPoint\n price\n salePrice\n __typename\n }\n }\n userBasedRecommendations(place: DETAIL_PAGE, limit: 30) @include(if: $isLoggedIn) {\n id\n title\n packageImage\n averageReviewPoint\n price\n salePrice\n __typename\n }\n __typename\n }\n}\n" } - result, response = post_html('https://api.tv.dmm.co.jp/graphql', headers=headers, json=data, json_data=True, - keep=False) + result, response = post_html('https://api.tv.dmm.co.jp/graphql', headers=headers, json=data, json_data=True, keep=False) if result and response.get('data'): api_data = response['data']['fanzaTV']['content'] title = api_data['title'] @@ -317,22 +307,12 @@ def get_tv_com_data(number): 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', } data = { - "operationName": - "GetVideo", - "variables": { - "seasonId": number, - "contentId": "", - "device": "BROWSER", - "playDevice": "BROWSER", - "isLoggedIn": False, - "isContentId": False - }, - "query": - "query GetVideo($seasonId: ID!, $contentId: ID!, $device: Device!, $playDevice: PlayDevice!, $isLoggedIn: Boolean!, $isContentId: Boolean!) {\n video(id: $seasonId) {\n id\n seasonType\n hasBookmark @include(if: $isLoggedIn)\n titleName\n seasonName\n highlight(format: HTML)\n description(format: HTML)\n notices(format: HTML)\n packageImage\n productionYear\n isNewArrival\n isPublic\n isExclusive\n isBeingDelivered\n viewingTypes\n campaign {\n name\n endAt\n __typename\n }\n rating {\n category\n __typename\n }\n casts {\n castName\n actorName\n person {\n id\n __typename\n }\n __typename\n }\n staffs {\n roleName\n staffName\n person {\n id\n __typename\n }\n __typename\n }\n categories {\n name\n id\n __typename\n }\n genres {\n name\n id\n __typename\n }\n copyright\n relatedItems(device: $device) {\n videos {\n seasonId\n video {\n id\n titleName\n packageImage\n isNewArrival\n isExclusive\n __typename\n }\n __typename\n }\n books {\n seriesId\n title\n thumbnail\n url\n __typename\n }\n mono {\n banner\n url\n __typename\n }\n scratch {\n banner\n url\n __typename\n }\n onlineCrane {\n banner\n url\n __typename\n }\n __typename\n }\n ... on VideoSeason {\n ...CommonVideoSeason\n __typename\n }\n ... on VideoLegacySeason {\n ...CommonVideoLegacySeason\n __typename\n }\n ... on VideoStageSeason {\n ...CommonVideoStageSeason\n __typename\n }\n ... on VideoSpotLiveSeason {\n ...CommonVideoSpotLiveSeason\n __typename\n }\n __typename\n }\n}\n\nfragment CommonVideoSeason on VideoSeason {\n __typename\n metaDescription: description(format: PLAIN)\n keyVisualImage\n keyVisualWithoutLogoImage\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n relatedSeasons {\n id\n title\n __typename\n }\n upcomingEpisode {\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n continueWatching @include(if: $isLoggedIn) {\n resumePoint\n contentId\n content {\n episodeImage\n episodeTitle\n episodeNumber\n episodeNumberName\n viewingRights(device: $playDevice) {\n isStreamable\n __typename\n }\n __typename\n }\n __typename\n }\n priceSummary {\n lowestPrice\n discountedLowestPrice\n __typename\n }\n episode(id: $contentId) @include(if: $isContentId) {\n id\n episodeTitle\n episodeImage\n episodeNumber\n episodeNumberName\n episodeDetail\n playInfo {\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n duration\n audioRenditions\n textRenditions\n __typename\n }\n viewingRights(device: $playDevice) {\n isDownloadable\n isStreamable\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n episodes(type: MAIN, first: 1) {\n edges {\n node {\n id\n sampleMovie\n episodeTitle\n episodeNumber\n episodeNumberName\n playInfo {\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n duration\n audioRenditions\n textRenditions\n __typename\n }\n viewingRights(device: $playDevice) {\n isDownloadable\n isStreamable\n downloadableFiles @include(if: $isLoggedIn) {\n quality {\n name\n displayName\n displayPriority\n __typename\n }\n totalFileSize\n parts {\n partNumber\n fileSize\n __typename\n }\n __typename\n }\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n __typename\n }\n total\n __typename\n }\n purchasedContents(first: 1) @include(if: $isLoggedIn) {\n edges {\n node {\n id\n __typename\n }\n __typename\n }\n total\n __typename\n }\n specialEpisode: episodes(type: SPECIAL, first: 1) {\n total\n __typename\n }\n pvEpisode: episodes(type: PV, first: 1) {\n edges {\n node {\n id\n sampleMovie\n playInfo {\n duration\n __typename\n }\n __typename\n }\n __typename\n }\n total\n __typename\n }\n}\n\nfragment VideoPPVProductTag on VideoPPVProduct {\n id\n isOnSale\n isBeingDelivered\n isPurchased @include(if: $isLoggedIn)\n price {\n price\n salePrice\n __typename\n }\n __typename\n}\n\nfragment CommonVideoLegacySeason on VideoLegacySeason {\n __typename\n metaDescription: description(format: PLAIN)\n packageLargeImage\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n sampleMovie {\n url\n thumbnail\n __typename\n }\n samplePictures {\n image\n imageLarge\n __typename\n }\n sampleMovie {\n url\n thumbnail\n __typename\n }\n reviewSummary {\n averagePoint\n __typename\n }\n priceSummary {\n lowestPrice\n discountedLowestPrice\n __typename\n }\n continueWatching @include(if: $isLoggedIn) {\n partNumber\n resumePoint\n contentId\n content {\n playInfo {\n parts {\n contentId\n __typename\n }\n __typename\n }\n viewingRights(device: $playDevice) {\n isStreamable\n __typename\n }\n __typename\n }\n __typename\n }\n content {\n id\n contentType\n viewingRights(device: $playDevice) {\n isStreamable\n isDownloadable\n downloadableFiles @include(if: $isLoggedIn) {\n quality {\n name\n displayName\n displayPriority\n __typename\n }\n totalFileSize\n parts {\n partNumber\n fileSize\n __typename\n }\n __typename\n }\n windowsURLSchemes: appURLSchemes(app: WINDOWS_VR) @include(if: $isLoggedIn) {\n partNumber\n url\n __typename\n }\n iosURLSchemes: appURLSchemes(app: IOS_VR) @include(if: $isLoggedIn) {\n partNumber\n url\n __typename\n }\n androidURLSchemes: appURLSchemes(app: ANDROID_VR) @include(if: $isLoggedIn) {\n partNumber\n url\n __typename\n }\n __typename\n }\n playInfo {\n duration\n audioRenditions\n textRenditions\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n parts {\n contentId\n number\n __typename\n }\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n series {\n id\n name\n __typename\n }\n}\n\nfragment CommonVideoStageSeason on VideoStageSeason {\n __typename\n metaDescription: description(format: PLAIN)\n keyVisualImage\n keyVisualWithoutLogoImage\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n priceSummary {\n lowestPrice\n discountedLowestPrice\n __typename\n }\n allPerformances {\n performanceDate\n contents {\n id\n episodeTitle\n priority\n startLivePerformanceAt\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n __typename\n }\n __typename\n }\n purchasedContents(first: 1) @include(if: $isLoggedIn) {\n edges {\n node {\n id\n __typename\n }\n __typename\n }\n total\n __typename\n }\n}\n\nfragment CommonVideoSpotLiveSeason on VideoSpotLiveSeason {\n __typename\n metaDescription: description(format: PLAIN)\n keyVisualImage\n keyVisualWithoutLogoImage\n episodes(type: MAIN, first: 1) {\n edges {\n node {\n id\n episodeTitle\n episodeNumber\n episodeNumberName\n viewingRights(device: $playDevice) {\n isStreamable\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n playInfo {\n audioRenditions\n textRenditions\n duration\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n" + "operationName": "GetVideo", + "variables": {"seasonId": number, "contentId": "", "device": "BROWSER", "playDevice": "BROWSER", "isLoggedIn": False, "isContentId": False}, + "query": "query GetVideo($seasonId: ID!, $contentId: ID!, $device: Device!, $playDevice: PlayDevice!, $isLoggedIn: Boolean!, $isContentId: Boolean!) {\n video(id: $seasonId) {\n id\n seasonType\n hasBookmark @include(if: $isLoggedIn)\n titleName\n seasonName\n highlight(format: HTML)\n description(format: HTML)\n notices(format: HTML)\n packageImage\n productionYear\n isNewArrival\n isPublic\n isExclusive\n isBeingDelivered\n viewingTypes\n campaign {\n name\n endAt\n __typename\n }\n rating {\n category\n __typename\n }\n casts {\n castName\n actorName\n person {\n id\n __typename\n }\n __typename\n }\n staffs {\n roleName\n staffName\n person {\n id\n __typename\n }\n __typename\n }\n categories {\n name\n id\n __typename\n }\n genres {\n name\n id\n __typename\n }\n copyright\n relatedItems(device: $device) {\n videos {\n seasonId\n video {\n id\n titleName\n packageImage\n isNewArrival\n isExclusive\n __typename\n }\n __typename\n }\n books {\n seriesId\n title\n thumbnail\n url\n __typename\n }\n mono {\n banner\n url\n __typename\n }\n scratch {\n banner\n url\n __typename\n }\n onlineCrane {\n banner\n url\n __typename\n }\n __typename\n }\n ... on VideoSeason {\n ...CommonVideoSeason\n __typename\n }\n ... on VideoLegacySeason {\n ...CommonVideoLegacySeason\n __typename\n }\n ... on VideoStageSeason {\n ...CommonVideoStageSeason\n __typename\n }\n ... on VideoSpotLiveSeason {\n ...CommonVideoSpotLiveSeason\n __typename\n }\n __typename\n }\n}\n\nfragment CommonVideoSeason on VideoSeason {\n __typename\n metaDescription: description(format: PLAIN)\n keyVisualImage\n keyVisualWithoutLogoImage\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n relatedSeasons {\n id\n title\n __typename\n }\n upcomingEpisode {\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n continueWatching @include(if: $isLoggedIn) {\n resumePoint\n contentId\n content {\n episodeImage\n episodeTitle\n episodeNumber\n episodeNumberName\n viewingRights(device: $playDevice) {\n isStreamable\n __typename\n }\n __typename\n }\n __typename\n }\n priceSummary {\n lowestPrice\n discountedLowestPrice\n __typename\n }\n episode(id: $contentId) @include(if: $isContentId) {\n id\n episodeTitle\n episodeImage\n episodeNumber\n episodeNumberName\n episodeDetail\n playInfo {\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n duration\n audioRenditions\n textRenditions\n __typename\n }\n viewingRights(device: $playDevice) {\n isDownloadable\n isStreamable\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n episodes(type: MAIN, first: 1) {\n edges {\n node {\n id\n sampleMovie\n episodeTitle\n episodeNumber\n episodeNumberName\n playInfo {\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n duration\n audioRenditions\n textRenditions\n __typename\n }\n viewingRights(device: $playDevice) {\n isDownloadable\n isStreamable\n downloadableFiles @include(if: $isLoggedIn) {\n quality {\n name\n displayName\n displayPriority\n __typename\n }\n totalFileSize\n parts {\n partNumber\n fileSize\n __typename\n }\n __typename\n }\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n __typename\n }\n total\n __typename\n }\n purchasedContents(first: 1) @include(if: $isLoggedIn) {\n edges {\n node {\n id\n __typename\n }\n __typename\n }\n total\n __typename\n }\n specialEpisode: episodes(type: SPECIAL, first: 1) {\n total\n __typename\n }\n pvEpisode: episodes(type: PV, first: 1) {\n edges {\n node {\n id\n sampleMovie\n playInfo {\n duration\n __typename\n }\n __typename\n }\n __typename\n }\n total\n __typename\n }\n}\n\nfragment VideoPPVProductTag on VideoPPVProduct {\n id\n isOnSale\n isBeingDelivered\n isPurchased @include(if: $isLoggedIn)\n price {\n price\n salePrice\n __typename\n }\n __typename\n}\n\nfragment CommonVideoLegacySeason on VideoLegacySeason {\n __typename\n metaDescription: description(format: PLAIN)\n packageLargeImage\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n sampleMovie {\n url\n thumbnail\n __typename\n }\n samplePictures {\n image\n imageLarge\n __typename\n }\n sampleMovie {\n url\n thumbnail\n __typename\n }\n reviewSummary {\n averagePoint\n __typename\n }\n priceSummary {\n lowestPrice\n discountedLowestPrice\n __typename\n }\n continueWatching @include(if: $isLoggedIn) {\n partNumber\n resumePoint\n contentId\n content {\n playInfo {\n parts {\n contentId\n __typename\n }\n __typename\n }\n viewingRights(device: $playDevice) {\n isStreamable\n __typename\n }\n __typename\n }\n __typename\n }\n content {\n id\n contentType\n viewingRights(device: $playDevice) {\n isStreamable\n isDownloadable\n downloadableFiles @include(if: $isLoggedIn) {\n quality {\n name\n displayName\n displayPriority\n __typename\n }\n totalFileSize\n parts {\n partNumber\n fileSize\n __typename\n }\n __typename\n }\n windowsURLSchemes: appURLSchemes(app: WINDOWS_VR) @include(if: $isLoggedIn) {\n partNumber\n url\n __typename\n }\n iosURLSchemes: appURLSchemes(app: IOS_VR) @include(if: $isLoggedIn) {\n partNumber\n url\n __typename\n }\n androidURLSchemes: appURLSchemes(app: ANDROID_VR) @include(if: $isLoggedIn) {\n partNumber\n url\n __typename\n }\n __typename\n }\n playInfo {\n duration\n audioRenditions\n textRenditions\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n parts {\n contentId\n number\n __typename\n }\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n __typename\n }\n series {\n id\n name\n __typename\n }\n}\n\nfragment CommonVideoStageSeason on VideoStageSeason {\n __typename\n metaDescription: description(format: PLAIN)\n keyVisualImage\n keyVisualWithoutLogoImage\n reviewSummary {\n averagePoint\n reviewerCount\n reviewCommentCount\n __typename\n }\n priceSummary {\n lowestPrice\n discountedLowestPrice\n __typename\n }\n allPerformances {\n performanceDate\n contents {\n id\n episodeTitle\n priority\n startLivePerformanceAt\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n __typename\n }\n __typename\n }\n purchasedContents(first: 1) @include(if: $isLoggedIn) {\n edges {\n node {\n id\n __typename\n }\n __typename\n }\n total\n __typename\n }\n}\n\nfragment CommonVideoSpotLiveSeason on VideoSpotLiveSeason {\n __typename\n metaDescription: description(format: PLAIN)\n keyVisualImage\n keyVisualWithoutLogoImage\n episodes(type: MAIN, first: 1) {\n edges {\n node {\n id\n episodeTitle\n episodeNumber\n episodeNumberName\n viewingRights(device: $playDevice) {\n isStreamable\n __typename\n }\n ppvExpiration @include(if: $isLoggedIn) {\n expirationType\n viewingExpiration\n viewingStartExpiration\n startDeliveryAt\n __typename\n }\n freeProduct {\n contentId\n __typename\n }\n ppvProducts {\n ...VideoPPVProductTag\n __typename\n }\n svodProduct {\n startDeliveryAt\n __typename\n }\n playInfo {\n audioRenditions\n textRenditions\n duration\n highestQuality\n isSupportHDR\n highestAudioChannelLayout\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n" } - result, response = post_html('https://api.tv.dmm.com/graphql', headers=headers, json=data, json_data=True, - keep=False) + result, response = post_html('https://api.tv.dmm.com/graphql', headers=headers, json=data, json_data=True, keep=False) if result and response.get('data'): api_data = response['data']['video'] title = api_data['titleName'] @@ -491,8 +471,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', file_pa number_00 = '5497' + number_00 elif number_00.lower().startswith('ftbd'): number_00 = '5533' + number_00 - elif number_00.lower().startswith('ugm') or number_00.lower().startswith( - 'dmi') or number_00.lower().startswith('whm'): + elif number_00.lower().startswith('ugm') or number_00.lower().startswith('dmi') or number_00.lower().startswith('whm'): number_00 = '5083' + number_00 number_00 = '5083' + number_00 real_url = f'https://tv.dmm.com/vod/detail/?season={number_00}' @@ -656,18 +635,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', file_pa # print(main('ssni00888')) # print(main('ssni-288')) # print(main('', 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=ssni00288/')) - print(main('俺をイジメてた地元ヤンキーの巨乳彼女を寝とって復讐を果たす話 The Motion Anime')) # 模糊匹配 MAXVR-008 - # print(main('', 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=h_173dhry23/')) # 地域限制 - # print(main('ssni00288')) - # print(main('ssni00999')) - # print(main('ipx-292')) - # print(main('wicp-002')) # 无视频 - # print(main('ssis-080')) - # print(main('DV-1562')) - # print(main('mide00139', "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=mide00139")) - # print(main('mide00139', "")) - # print(main('kawd00969')) - # print(main('', 'https://tv.dmm.com/vod/detail/?title=5533ftbd00042&season=5533ftbd00042')) - # print(main('stars-779')) - # print(main('FAKWM-001', 'https://tv.dmm.com/vod/detail/?season=5497fakwm00001')) - # print(main('FAKWM-064', 'https://tv.dmm.com/vod/detail/?season=5497fakwm00064')) + print(main('俺をイジメてた地元ヤンキーの巨乳彼女を寝とって復讐を果たす話 The Motion Anime')) # 模糊匹配 MAXVR-008 # print(main('', 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=h_173dhry23/')) # 地域限制 # print(main('ssni00288')) # print(main('ssni00999')) # print(main('ipx-292')) # print(main('wicp-002')) # 无视频 # print(main('ssis-080')) # print(main('DV-1562')) # print(main('mide00139', "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=mide00139")) # print(main('mide00139', "")) # print(main('kawd00969')) # print(main('', 'https://tv.dmm.com/vod/detail/?title=5533ftbd00042&season=5533ftbd00042')) # print(main('stars-779')) # print(main('FAKWM-001', 'https://tv.dmm.com/vod/detail/?season=5497fakwm00001')) # print(main('FAKWM-064', 'https://tv.dmm.com/vod/detail/?season=5497fakwm00064')) diff --git a/src/models/crawlers/faleno.py b/src/models/crawlers/faleno.py index 0ad74d4..801f50e 100644 --- a/src/models/crawlers/faleno.py +++ b/src/models/crawlers/faleno.py @@ -23,8 +23,7 @@ def get_title(html): def get_actor(html): - actor_result = html.xpath( - '//div[@class="box_works01_list clearfix"]//span[text()="出演女優"]/following-sibling::p[1]/text()') + actor_result = html.xpath('//div[@class="box_works01_list clearfix"]//span[text()="出演女優"]/following-sibling::p[1]/text()') return ','.join(actor_result) @@ -54,8 +53,7 @@ def get_series(html): def get_director(html): - result = html.xpath( - '//span[contains(text(), "导演") or contains(text(), "導演") or contains(text(), "監督")]/following-sibling::*//text()') + result = html.xpath('//span[contains(text(), "导演") or contains(text(), "導演") or contains(text(), "監督")]/following-sibling::*//text()') return result[0] if result else '' @@ -122,14 +120,12 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): number_lo = number.lower() number_lo_noline = number_lo.replace('-', '') number_lo_space = number_lo.replace('-', ' ') - search_url_list = [f'https://faleno.jp/top/?s={number_lo_space}', - f'https://falenogroup.com/top/?s={number_lo_space}'] + search_url_list = [f'https://faleno.jp/top/?s={number_lo_space}', f'https://falenogroup.com/top/?s={number_lo_space}'] real_url_list = [] if real_url: real_url_list = [real_url] elif number.upper().startswith('FLN'): - real_url_list = [f"https://faleno.jp/top/works/{number_lo_noline}/", - f"https://faleno.jp/top/works/{number_lo}/", f"https://falenogroup.com/works/{number_lo}/", + real_url_list = [f"https://faleno.jp/top/works/{number_lo_noline}/", f"https://faleno.jp/top/works/{number_lo}/", f"https://falenogroup.com/works/{number_lo}/", f"https://falenogroup.com/works/{number_lo_noline}/"] log_info += '\n 🌐 faleno' mosaic = '有码' @@ -179,8 +175,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): title = title.replace(' ' + each, '') cover_url = get_cover(html_detail) # 获取cover if not poster_url: - poster_url = cover_url.replace('_1200.jpg', '_2125.jpg').replace('_tsp.jpg', '_actor.jpg').replace( - '1200_re', '2125').replace('_1200-1', '_2125-1') + poster_url = cover_url.replace('_1200.jpg', '_2125.jpg').replace('_tsp.jpg', '_actor.jpg').replace('1200_re', '2125').replace('_1200-1', '_2125-1') outline = get_outline(html_detail) tag = '' release = get_release(html_detail) @@ -240,24 +235,19 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): except Exception as e: # print(traceback.format_exc()) debug_info = str(e) - dic = {'title': '', 'cover': '', 'website': '', 'log_info': log_info, 'error_info': debug_info, - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), ))} + dic = { + 'title': '', + 'cover': '', + 'website': '', + 'log_info': log_info, + 'error_info': debug_info, + 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )) + } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': # yapf: disable - print(main('fsdss-564')) - # print(main('mgold-017')) # 地址带 - - # print(main('votan-034')) # falenogroup.com 番号和数字加空格才能搜到 - # print(main('fcdss-001')) # 页面地址 flnc001 - # print(main('FSDSS-037')) # poster .replace('_1200-1', '_2125-1') - # print(main('flns-072')) # outline 换行 + print(main('fsdss-564')) # print(main('mgold-017')) # 地址带 - # print(main('votan-034')) # falenogroup.com 番号和数字加空格才能搜到 # print(main('fcdss-001')) # 页面地址 flnc001 # print(main('FSDSS-037')) # poster .replace('_1200-1', '_2125-1') # print(main('flns-072')) # outline 换行 diff --git a/src/models/crawlers/fantastica.py b/src/models/crawlers/fantastica.py index 888299f..f1730dd 100644 --- a/src/models/crawlers/fantastica.py +++ b/src/models/crawlers/fantastica.py @@ -232,19 +232,10 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': # yapf: disable - print(main('FAAP525')) # 无图 - # print(main('fakwm-001')) - # print(main('fakwm-064')) - # print(main('fapro-123')) + print(main('FAAP525')) # 无图 # print(main('fakwm-001')) # print(main('fakwm-064')) # print(main('fapro-123')) diff --git a/src/models/crawlers/fc2.py b/src/models/crawlers/fc2.py index ad51c75..672a366 100644 --- a/src/models/crawlers/fc2.py +++ b/src/models/crawlers/fc2.py @@ -156,9 +156,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'publisher': studio, 'source': 'fc2', 'website': real_url, - 'actor_photo': { - actor: '' - }, + 'actor_photo': {actor: ''}, 'cover': cover_url, 'poster': poster_url, 'extrafanart': extrafanart, @@ -190,28 +188,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js if __name__ == '__main__': # yapf: disable - print(main('1723984', '')) # 有码 - # print(main('1924776', '')) - # print(main('1860858', '')) - # print(main('1599412', '')) # fc2hub有,fc2/fc2club没有 - # print(main('1131214', '')) # fc2club有,fc2/fc2hub没有 - # print(main('1837553', '')) # 无码 - # print(main('1613618', '')) - # print(main('1837553', '')) - # print(main('1837589', "")) - # print(main('1760182', '')) - # print(main('1251689', '')) - # print(main('674239', "")) - # print(main('674239', ")) + print(main('1723984', + '')) # 有码 # print(main('1924776', '')) # print(main('1860858', '')) # print(main('1599412', '')) # fc2hub有,fc2/fc2club没有 # print(main('1131214', '')) # fc2club有,fc2/fc2hub没有 # print(main('1837553', '')) # 无码 # print(main('1613618', '')) # print(main('1837553', '')) # print(main('1837589', "")) # print(main('1760182', '')) # print(main('1251689', '')) # print(main('674239', "")) # print(main('674239', ")) diff --git a/src/models/crawlers/fc2club.py b/src/models/crawlers/fc2club.py index 0a4a5a9..e4160e4 100644 --- a/src/models/crawlers/fc2club.py +++ b/src/models/crawlers/fc2club.py @@ -88,10 +88,7 @@ def getTag(html): # 获取标签 def getOutline(html): # 获取简介 - result = str(html.xpath('//div[@class="col des"]/text()')).strip('[' ']').replace("',", '').replace('\\n', - '').replace("'", - '').replace( - '・', '').strip() + result = str(html.xpath('//div[@class="col des"]/text()')).strip('[' ']').replace("',", '').replace('\\n', '').replace("'", '').replace('・', '').strip() return result @@ -197,30 +194,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js if __name__ == '__main__': # print(main('1470588', '')) - print(main('743423', '')) - # print(main('674261', '')) - # print(main('406570', '')) - # print(main('1474843', '')) - # print(main('1860858', '')) - # print(main('1599412', '')) - # print(main('1131214', '')) - # print(main('1837553', '')) - # print(main('1613618', '')) - # print(main('1837553', '')) - # print(main('1837589', "")) - # print(main('1760182', '')) - # print(main('1251689', '')) - # print(main('674239', "")) - # print(main('674239', ")) + print(main('743423', + '')) # print(main('674261', '')) # print(main('406570', '')) # print(main('1474843', '')) # print(main('1860858', '')) # print(main('1599412', '')) # print(main('1131214', '')) # print(main('1837553', '')) # print(main('1613618', '')) # print(main('1837553', '')) # print(main('1837589', "")) # print(main('1760182', '')) # print(main('1251689', '')) # print(main('674239', "")) # print(main('674239', ")) diff --git a/src/models/crawlers/fc2hub.py b/src/models/crawlers/fc2hub.py index 661e82c..e5739ac 100644 --- a/src/models/crawlers/fc2hub.py +++ b/src/models/crawlers/fc2hub.py @@ -64,13 +64,7 @@ def getTag(html): # 获取标签 def getOutline(html): # 获取简介 - result = (''.join(html.xpath('//div[@class="col des"]//text()')) - .strip('[' ']') - .replace("',", '') - .replace('\\n', '') - .replace("'", '') - .replace('・', '') - .strip()) + result = (''.join(html.xpath('//div[@class="col des"]//text()')).strip('[' ']').replace("',", '').replace('\\n', '').replace("'", '').replace('・', '').strip()) return result @@ -167,9 +161,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'publisher': studio, 'source': 'fc2hub.main', 'website': str(real_url).strip('[]'), - 'actor_photo': { - actor: '' - }, + 'actor_photo': {actor: ''}, 'cover': str(cover_url), 'poster': '', 'extrafanart': extrafanart, @@ -202,29 +194,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js if __name__ == '__main__': # yapf: disable # print(main('FC2-424646')) - print(main('1940476')) # 无码 - # print(main('1860858', '')) #有码 - # print(main('1599412', '')) - # print(main('1131214', '')) # 未找到 - # print(main('1837553', '')) - # print(main('1613618', '')) - # print(main('1837553', '')) - # print(main('1837589', "")) - # print(main('1760182', '')) - # print(main('1251689', '')) - # print(main('674239', "")) - # print(main('674239', ")) - # print(main('1924003', '')) # 无图 + print(main('1940476')) # 无码 # print(main('1860858', '')) #有码 # print(main('1599412', '')) # print(main('1131214', '')) # 未找到 # print(main('1837553', '')) # print(main('1613618', '')) # print(main('1837553', '')) # print(main('1837589', "")) # print(main('1760182', '')) # print(main('1251689', '')) # print(main('674239', "")) # print(main('674239', ")) # print(main('1924003', '')) # 无图 diff --git a/src/models/crawlers/freejavbt.py b/src/models/crawlers/freejavbt.py index 7698501..c0dbf0a 100644 --- a/src/models/crawlers/freejavbt.py +++ b/src/models/crawlers/freejavbt.py @@ -23,8 +23,8 @@ def get_title(html): number = result[0] title = result[1].replace(number, '').strip() number = number.strip() - title = title.replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_', '-').replace( - number.upper(), '').replace(number, '').replace('--', '-').strip() + title = title.replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_', '-').replace(number.upper(), '').replace(number, '').replace('--', + '-').strip() if not title or '翻译错误' in title or '每日更新' in str(result): return '', '' return title, number @@ -34,39 +34,21 @@ def get_title(html): def get_actor(html): actor_result = html.xpath('//a[@class="actress"]/text()') - av_man = [ - '貞松大輔', '鮫島', '森林原人', '黒田悠斗', '主観', '吉村卓', '野島誠', '小田切ジュン', 'しみけん', - 'セツネヒデユキ', - '大島丈', '玉木玲', 'ウルフ田中', 'ジャイアント廣田', 'イセドン内村', '西島雄介', '平田司', '杉浦ボッ樹', - '大沢真司', 'ピエール剣', - '羽田', '田淵正浩', 'タツ', '南佳也', '吉野篤史', '今井勇太', 'マッスル澤野', '井口', '松山伸也', '花岡じった', - '佐川銀次', 'およよ中野', '小沢とおる', '橋本誠吾', '阿部智広', '沢井亮', '武田大樹', '市川哲也', '???', - '浅野あたる', - '梅田吉雄', '阿川陽志', '素人', '結城結弦', '畑中哲也', '堀尾', '上田昌宏', 'えりぐち', '市川潤', '沢木和也', - 'トニー大木', '横山大輔', '一条真斗', '真田京', 'イタリアン高橋', '中田一平', '完全主観', 'イェーイ高島', - '山田万次郎', - '澤地真人', '杉山', 'ゴロー', '細田あつし', '藍井優太', '奥村友真', 'ザーメン二郎', '桜井ちんたろう', - '冴山トシキ', '久保田裕也', - '戸川夏也', '北こうじ', '柏木純吉', 'ゆうき', 'トルティーヤ鈴木', '神けんたろう', '堀内ハジメ', 'ナルシス小林', - 'アーミー', '池田径', - '吉村文孝', '優生', '久道実', '一馬', '辻隼人', '片山邦生', 'Qべぇ', '志良玉弾吾', '今岡爽紫郎', '工藤健太', - '原口', 'アベ', '染島貢', '岩下たろう', '小野晃', 'たむらあゆむ', '川越将護', '桜木駿', '瀧口', - 'TJ本田', '園田', '宮崎', '鈴木一徹', '黒人', 'カルロス', '天河', 'ぷーてゃん', '左曲かおる', '富田', 'TECH', - 'ムールかいせ', - '健太', '山田裕二', '池沼ミキオ', 'ウサミ', '押井敬之', '浅見草太', 'ムータン', 'フランクフルト林', '石橋豊彦', - '矢野慎二', - '芦田陽', 'くりぼ', 'ダイ', 'ハッピー池田', '山形健', '忍野雅一', '渋谷優太', '服部義', 'たこにゃん', - '北山シロ', 'つよぽん', - '山本いくお', '学万次郎', '平井シンジ', '望月', 'ゆーきゅん', '頭田光', '向理来', 'かめじろう', '高橋しんと', - '栗原良', 'テツ神山', - 'タラオ', '真琴', '滝本', '金田たかお', '平ボンド', '春風ドギー', '桐島達也', '中堀健二', '徳田重男', - '三浦屋助六', - '志戸哲也', 'ヒロシ', 'オクレ', '羽目白武', 'ジョニー岡本', '幸野賀一', 'インフィニティ', 'ジャック天野', - '覆面', '安大吉', - '井上亮太', '笹木良一', '艦長', '軍曹', 'タッキー', '阿部ノボル', 'ダウ兄', 'まーくん', '梁井一', - 'カンパニー松尾', '大塚玉堂', - '日比野達郎', '小梅', 'ダイナマイト幸男', 'タケル', 'くるみ太郎', '山田伸夫', '氷崎健人' - ] + av_man = ['貞松大輔', '鮫島', '森林原人', '黒田悠斗', '主観', '吉村卓', '野島誠', '小田切ジュン', 'しみけん', 'セツネヒデユキ', '大島丈', '玉木玲', 'ウルフ田中', + 'ジャイアント廣田', 'イセドン内村', '西島雄介', '平田司', '杉浦ボッ樹', '大沢真司', 'ピエール剣', '羽田', '田淵正浩', 'タツ', '南佳也', '吉野篤史', + '今井勇太', 'マッスル澤野', '井口', '松山伸也', '花岡じった', '佐川銀次', 'およよ中野', '小沢とおる', '橋本誠吾', '阿部智広', '沢井亮', '武田大樹', + '市川哲也', '???', '浅野あたる', '梅田吉雄', '阿川陽志', '素人', '結城結弦', '畑中哲也', '堀尾', '上田昌宏', 'えりぐち', '市川潤', '沢木和也', 'トニー大木', + '横山大輔', '一条真斗', '真田京', 'イタリアン高橋', '中田一平', '完全主観', 'イェーイ高島', '山田万次郎', '澤地真人', '杉山', 'ゴロー', '細田あつし', + '藍井優太', '奥村友真', 'ザーメン二郎', '桜井ちんたろう', '冴山トシキ', '久保田裕也', '戸川夏也', '北こうじ', '柏木純吉', 'ゆうき', 'トルティーヤ鈴木', + '神けんたろう', '堀内ハジメ', 'ナルシス小林', 'アーミー', '池田径', '吉村文孝', '優生', '久道実', '一馬', '辻隼人', '片山邦生', 'Qべぇ', '志良玉弾吾', + '今岡爽紫郎', '工藤健太', '原口', 'アベ', '染島貢', '岩下たろう', '小野晃', 'たむらあゆむ', '川越将護', '桜木駿', '瀧口', 'TJ本田', '園田', '宮崎', + '鈴木一徹', '黒人', 'カルロス', '天河', 'ぷーてゃん', '左曲かおる', '富田', 'TECH', 'ムールかいせ', '健太', '山田裕二', '池沼ミキオ', 'ウサミ', '押井敬之', + '浅見草太', 'ムータン', 'フランクフルト林', '石橋豊彦', '矢野慎二', '芦田陽', 'くりぼ', 'ダイ', 'ハッピー池田', '山形健', '忍野雅一', '渋谷優太', '服部義', + 'たこにゃん', '北山シロ', 'つよぽん', '山本いくお', '学万次郎', '平井シンジ', '望月', 'ゆーきゅん', '頭田光', '向理来', 'かめじろう', '高橋しんと', + '栗原良', 'テツ神山', 'タラオ', '真琴', '滝本', '金田たかお', '平ボンド', '春風ドギー', '桐島達也', '中堀健二', '徳田重男', '三浦屋助六', '志戸哲也', + 'ヒロシ', 'オクレ', '羽目白武', 'ジョニー岡本', '幸野賀一', 'インフィニティ', 'ジャック天野', '覆面', '安大吉', '井上亮太', '笹木良一', '艦長', '軍曹', + 'タッキー', '阿部ノボル', 'ダウ兄', 'まーくん', '梁井一', 'カンパニー松尾', '大塚玉堂', '日比野達郎', '小梅', 'ダイナマイト幸男', 'タケル', 'くるみ太郎', + '山田伸夫', '氷崎健人'] actor_list = [i.strip() for i in actor_result if i.replace('?', '')] all_actor_list = actor_list.copy() for each in all_actor_list: @@ -89,8 +71,7 @@ def get_actor_photo(actor): def get_runtime(html): - result = html.xpath( - '//span[contains(text(), "时长") or contains(text(), "時長") or contains(text(), "収録時間")]/following-sibling::*//text()') + result = html.xpath('//span[contains(text(), "时长") or contains(text(), "時長") or contains(text(), "収録時間")]/following-sibling::*//text()') if result: result = re.findall(r'\d+', result[0]) return result[0] if result else '' @@ -102,14 +83,12 @@ def get_series(html): def get_director(html): - result = html.xpath( - '//span[contains(text(), "导演") or contains(text(), "導演") or contains(text(), "監督")]/following-sibling::*//text()') + result = html.xpath('//span[contains(text(), "导演") or contains(text(), "導演") or contains(text(), "監督")]/following-sibling::*//text()') return result[0] if result else '' def get_studio(html): - result = html.xpath( - '//span[contains(text(), "制作") or contains(text(), "製作") or contains(text(), "メーカー")]/following-sibling::*//text()') + result = html.xpath('//span[contains(text(), "制作") or contains(text(), "製作") or contains(text(), "メーカー")]/following-sibling::*//text()') return result[0] if result else '' @@ -138,9 +117,7 @@ def get_tag(html): def get_cover(html): try: - result = html.xpath( - "//img[@class='video-cover rounded lazyload' or @class='col-lg-2 col-md-2 col-sm-6 col-12 lazyload']/@data-src")[ - 0] + result = html.xpath("//img[@class='video-cover rounded lazyload' or @class='col-lg-2 col-md-2 col-sm-6 col-12 lazyload']/@data-src")[0] if 'no_preview_lg' in result or 'http' not in result: return '' except: @@ -276,16 +253,16 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): except Exception as e: # print(traceback.format_exc()) debug_info = str(e) - dic = {'title': '', 'cover': '', 'website': '', 'log_info': log_info, 'error_info': debug_info, - 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), ))} + dic = { + 'title': '', + 'cover': '', + 'website': '', + 'log_info': log_info, + 'error_info': debug_info, + 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )) + } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -296,65 +273,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # print(main('ssis-118')) # print(main('DANDY-520', '')) # 预告片默认低品质dm,改成高品质dmb # print(main('PPPD-653')) - print(main('SSNI-531')) - # print(main('ssis-330')) # 预告片 - # print(main('n1403')) - # print(main('SKYHD-014')) # 无预览图 - # print(main('FC2-424646')) # 无番号 - # print(main('CWPBD-168')) - # print(main('BadMilfs.22.04.02')) - # print(main('vixen.19.12.10')) - # print(main('CEMD-133')) - # print(main('FC2-880652')) # 无番号 - # print(main('PLA-018')) - # print(main('SIVR-060')) - # print(main('STCV-067')) - # print(main('ALDN-107')) - # print(main('DSVR-1205')) # 无标题 - # print(main('SIVR-100')) - # print(main('FC2-2787433')) - # print(main('MIDV-018')) - # print(main('MIDV-018', appoint_url='https://javdb.com/v/BnMY9')) - # print(main('SVSS-003')) - # print(main('SIVR-008')) - # print(main('blacked.21.07.03')) - # print(main('FC2-1262472')) # 需要登录 - # print(main('HUNTB-107')) # 预告片返回url错误,只有https - # print(main('FC2-2392657')) # 需要登录 - # print(main('GS-067')) # 两个同名番号 - # print(main('MIDE-022')) - # print(main('KRAY-001')) - # print(main('ssis-243')) - # print(main('MIDE-900', 'https://javdb.com/v/MZp24?locale=en')) - # print(main('TD-011')) - # print(main('stars-011')) # 发行商SOD star,下载封面 - # print(main('stars-198')) # 发行商SOD star,下载封面 - # print(main('mium-748')) - # print(main('KMHRS-050')) # 剧照第一张作为poster - # print(main('SIRO-4042')) - # print(main('snis-035')) - # print(main('vixen.18.07.18', '')) - # print(main('vixen.16.08.02', '')) - # print(main('SNIS-016', '')) - # print(main('bangbros18.19.09.17')) - # print(main('x-art.19.11.03')) - # print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('SSIS-001', '')) - # print(main('SSIS-090', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main_us('x-art.19.11.03', '')) + print(main('SSNI-531')) # print(main('ssis-330')) # 预告片 # print(main('n1403')) # print(main('SKYHD-014')) # 无预览图 # print(main('FC2-424646')) # 无番号 # print(main('CWPBD-168')) # print(main('BadMilfs.22.04.02')) # print(main('vixen.19.12.10')) # print(main('CEMD-133')) # print(main('FC2-880652')) # 无番号 # print(main('PLA-018')) # print(main('SIVR-060')) # print(main('STCV-067')) # print(main('ALDN-107')) # print(main('DSVR-1205')) # 无标题 # print(main('SIVR-100')) # print(main('FC2-2787433')) # print(main('MIDV-018')) # print(main('MIDV-018', appoint_url='https://javdb.com/v/BnMY9')) # print(main('SVSS-003')) # print(main('SIVR-008')) # print(main('blacked.21.07.03')) # print(main('FC2-1262472')) # 需要登录 # print(main('HUNTB-107')) # 预告片返回url错误,只有https # print(main('FC2-2392657')) # 需要登录 # print(main('GS-067')) # 两个同名番号 # print(main('MIDE-022')) # print(main('KRAY-001')) # print(main('ssis-243')) # print(main('MIDE-900', 'https://javdb.com/v/MZp24?locale=en')) # print(main('TD-011')) # print(main('stars-011')) # 发行商SOD star,下载封面 # print(main('stars-198')) # 发行商SOD star,下载封面 # print(main('mium-748')) # print(main('KMHRS-050')) # 剧照第一张作为poster # print(main('SIRO-4042')) # print(main('snis-035')) # print(main('vixen.18.07.18', '')) # print(main('vixen.16.08.02', '')) # print(main('SNIS-016', '')) # print(main('bangbros18.19.09.17')) # print(main('x-art.19.11.03')) # print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('SSIS-001', '')) # print(main('SSIS-090', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main_us('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main_us('x-art.19.11.03', '')) diff --git a/src/models/crawlers/getchu.py b/src/models/crawlers/getchu.py index 02e9f22..52e56be 100644 --- a/src/models/crawlers/getchu.py +++ b/src/models/crawlers/getchu.py @@ -136,8 +136,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): keyword = keyword.encode('cp932').decode('shift_jis') # 转换为常见日文,比如~ 转换成 〜 except: pass - keyword2 = urllib.parse.quote_plus(keyword, - encoding="EUC-JP") # quote() 不编码斜线,空格‘ ’编码为‘%20’;quote_plus() 会编码斜线为‘%2F’; 空格‘ ’编码为‘+’ + keyword2 = urllib.parse.quote_plus(keyword, encoding="EUC-JP") # quote() 不编码斜线,空格‘ ’编码为‘%20’;quote_plus() 会编码斜线为‘%2F’; 空格‘ ’编码为‘+’ url_search = f'http://www.getchu.com/php/search.phtml?genre=all&search_keyword={keyword2}&gc=gc' # http://www.getchu.com/php/search.phtml?genre=anime_dvd&search_keyword=_WORD_&check_key_dtl=1&submit=&genre=anime_dvd&gc=gc debug_info = f'搜索地址: {url_search} ' @@ -252,13 +251,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js @@ -267,15 +260,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # print(main('コンビニ○○Z 第三話 あなた、ヤンクレママですよね。旦那に万引きがバレていいんですか?')) # print(main('dokidokiりとる大家さん お家賃6突き目 妖しい踊りで悪霊祓い!『婦警』さんのきわどいオシオキ')) # print(main('[PoRO]エロコンビニ店長 泣きべそ蓮っ葉・栞~お仕置きじぇらしぃナマ逸機~')) - print(main('4562215333534')) - # print(main('人妻、蜜と肉 第二巻[月野定規]')) - # print(main('ACHDL-1159')) - # print(main('好きにしやがれ GOTcomics')) # 書籍,没有番号 - # print(main('あまあまロ●ータ女装男子レズ キス・フェラ・69からの3P介入')) - # print(main('DLID4033023')) - # print(main('', appoint_url='https://dl.getchu.com/i/item4033023')) - # print(main('ACMDP-1005')) # 有时间、导演,上下集ACMDP-1005B - # print(main('ISTU-5391')) - # print(main('INH-392')) - # print(main('ISTU-5391', appoint_url='http://www.getchu.com/soft.phtml?id=1180483')) - # print(main('SPY×FAMILY Vol.1 Blu-ray Disc<初回生産限定版>')) # dmm 没有 + print(main('4562215333534')) # print(main('人妻、蜜と肉 第二巻[月野定規]')) # print(main('ACHDL-1159')) # print(main('好きにしやがれ GOTcomics')) # 書籍,没有番号 # print(main('あまあまロ●ータ女装男子レズ キス・フェラ・69からの3P介入')) # print(main('DLID4033023')) # print(main('', appoint_url='https://dl.getchu.com/i/item4033023')) # print(main('ACMDP-1005')) # 有时间、导演,上下集ACMDP-1005B # print(main('ISTU-5391')) # print(main('INH-392')) # print(main('ISTU-5391', appoint_url='http://www.getchu.com/soft.phtml?id=1180483')) # print(main('SPY×FAMILY Vol.1 Blu-ray Disc<初回生産限定版>')) # dmm 没有 diff --git a/src/models/crawlers/getchu_dl.py b/src/models/crawlers/getchu_dl.py index a6c658b..18c0075 100644 --- a/src/models/crawlers/getchu_dl.py +++ b/src/models/crawlers/getchu_dl.py @@ -87,8 +87,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): cookies = {"adult_check_flag": "1"} if not real_url and ('DLID' in number.upper() or 'ITEM' in number.upper() or 'GETCHU' in number.upper()): id = re.findall(r'\d+', number)[0] - real_url = f'https://dl.getchu.com/i/item{id}' - # real_url = 'https://dl.getchu.com/i/item4024984' + real_url = f'https://dl.getchu.com/i/item{id}' # real_url = 'https://dl.getchu.com/i/item4024984' try: # 捕获主动抛出的异常 if not real_url: @@ -98,8 +97,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): keyword = keyword.encode('cp932').decode('shift_jis') # 转换为常见日文,比如~ 转换成 〜 except: pass - keyword2 = urllib.parse.quote_plus(keyword, - encoding="EUC-JP") # quote() 不编码斜线,空格‘ ’编码为‘%20’;quote_plus() 会编码斜线为‘%2F’; 空格‘ ’编码为‘+’ + keyword2 = urllib.parse.quote_plus(keyword, encoding="EUC-JP") # quote() 不编码斜线,空格‘ ’编码为‘%20’;quote_plus() 会编码斜线为‘%2F’; 空格‘ ’编码为‘+’ url_search = f'https://dl.getchu.com/search/search_list.php?dojin=1&search_category_id=&search_keyword={keyword2}&btnWordSearch=%B8%A1%BA%F7&action=search&set_category_flag=1' debug_info = f'搜索地址: {url_search} ' log_info += web_info + debug_info @@ -206,13 +204,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js @@ -222,6 +214,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # print(main('[PoRO]エロコンビニ店長 泣きべそ蓮っ葉・栞~お仕置きじぇらしぃナマ逸機~')) # print(main('母ちゃんの友達にシコってるところ見られた。')) # print(main('DLID4024984')) - print( - main('【姫始めセックス流出】人気Y●u●berリアル彼女とのプライベートハメ撮り映像流出!!初詣帰りに振袖姿のまま彼女にしゃぶらせ生中出し!生々しい映像データ')) - # print(main('好きにしやがれ GOTcomics')) # 書籍,没有番号 + print(main('【姫始めセックス流出】人気Y●u●berリアル彼女とのプライベートハメ撮り映像流出!!初詣帰りに振袖姿のまま彼女にしゃぶらせ生中出し!生々しい映像データ')) # print(main('好きにしやがれ GOTcomics')) # 書籍,没有番号 diff --git a/src/models/crawlers/getchu_dmm.py b/src/models/crawlers/getchu_dmm.py index abbe9d9..e0d59bf 100644 --- a/src/models/crawlers/getchu_dmm.py +++ b/src/models/crawlers/getchu_dmm.py @@ -28,17 +28,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): else: json_data_new['log_info'] = json_data_dmm['dmm']['jp']['log_info'] json_data_new['req_web'] = json_data_dmm['dmm']['jp']['req_web'] - return json.dumps( - {'getchu_dmm': { - 'zh_cn': json_data_new, - 'zh_tw': json_data_new, - 'jp': json_data_new, - }}, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + return json.dumps({'getchu_dmm': {'zh_cn': json_data_new, 'zh_tw': json_data_new, 'jp': json_data_new, }}, + ensure_ascii=False, + sort_keys=False, + indent=4, + separators=(',', ': '), ) if __name__ == '__main__': @@ -54,6 +48,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # print(main('OVA催眠性指導 #5宮島椿の場合')) # 都没有 # print(main('GLOD-148')) # getchu 没有 # print(main('(18禁アニメ) (無修正) 紅蓮 第1幕 「鬼」 (spursengine 960x720 h.264 aac)')) - print(main('誘惑 ~始発の章~')) - # print(main('ISTU-5391', appoint_url='http://www.getchu.com/soft.phtml?id=1180483')) - # print(main('SPY×FAMILY Vol.1 Blu-ray Disc<初回生産限定版>')) # dmm 没有 + print(main('誘惑 ~始発の章~')) # print(main('ISTU-5391', appoint_url='http://www.getchu.com/soft.phtml?id=1180483')) # print(main('SPY×FAMILY Vol.1 Blu-ray Disc<初回生産限定版>')) # dmm 没有 diff --git a/src/models/crawlers/giga.py b/src/models/crawlers/giga.py index de3c3b9..f218a4c 100644 --- a/src/models/crawlers/giga.py +++ b/src/models/crawlers/giga.py @@ -259,23 +259,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': # yapf: disable # print(main('TRE-82')) # 没有背景图,封面图查找路径变了 - print(main('gsad-18')) # 没有背景图,封面图查找路径变了 - # print(main('GHOV-21')) - # print(main('GHOV-28')) - # print(main('MIAE-346')) - # print(main('STARS-1919')) # poster图片 - # print(main('abw-157')) - # print(main('abs-141')) + print(main('gsad-18')) # 没有背景图,封面图查找路径变了 # print(main('GHOV-21')) # print(main('GHOV-28')) # print(main('MIAE-346')) # print(main('STARS-1919')) # poster图片 # print(main('abw-157')) # print(main('abs-141')) diff --git a/src/models/crawlers/guochan.py b/src/models/crawlers/guochan.py index da3d20e..3bf488a 100644 --- a/src/models/crawlers/guochan.py +++ b/src/models/crawlers/guochan.py @@ -17,111 +17,60 @@ def get_lable_list(): - return ["麻豆传媒", "91茄子", "Ed Mosaic", "HongKongDoll", "JVID", "MINI传媒", "SA国际传媒", "TWAV", "乌鸦传媒", - "乐播传媒", "优蜜传媒", "偶蜜国际", "叮叮映画", "哔哩传媒", "大象传媒", "天美传媒", "开心鬼传媒", "微密圈", - "扣扣传媒", "抖阴传媒", "星空无限传媒", "映秀传媒", "杏吧传媒", "果冻传媒", "模密传媒", "爱污传媒", - "爱神传媒", - "爱豆传媒", "狂点映像", "猛料原创", "猫爪影像", "皇家华人", "精东影业", "糖心VLOG", "维秘传媒", "草莓视频", - "萝莉社", - "蜜桃传媒", "西瓜影视", "起点传媒", "香蕉视频", "PsychoPorn色控", "蜜桃影像传媒", "大番号番啪啪", - "REAL野性派", "豚豚创媒", - "宫美娱乐", "肉肉传媒", "爱妃传媒", "91制片厂", "O-STAR", "兔子先生", "杏吧原创", "杏吧独家", "辣椒原创", - "麻豆传媒映画", "红斯灯影像", - "绝对领域", "麻麻传媒", "渡边传媒", "AV帝王", "桃花源", "蝌蚪传媒", "SWAG", "麻豆", "杏吧", "糖心", - "国产短视频", "国产精品", "国产AV", "涩会"] + return ["麻豆传媒", "91茄子", "Ed Mosaic", "HongKongDoll", "JVID", "MINI传媒", "SA国际传媒", "TWAV", "乌鸦传媒", "乐播传媒", "优蜜传媒", "偶蜜国际", "叮叮映画", + "哔哩传媒", "大象传媒", "天美传媒", "开心鬼传媒", "微密圈", "扣扣传媒", "抖阴传媒", "星空无限传媒", "映秀传媒", "杏吧传媒", "果冻传媒", "模密传媒", + "爱污传媒", "爱神传媒", "爱豆传媒", "狂点映像", "猛料原创", "猫爪影像", "皇家华人", "精东影业", "糖心VLOG", "维秘传媒", "草莓视频", "萝莉社", "蜜桃传媒", + "西瓜影视", "起点传媒", "香蕉视频", "PsychoPorn色控", "蜜桃影像传媒", "大番号番啪啪", "REAL野性派", "豚豚创媒", "宫美娱乐", "肉肉传媒", "爱妃传媒", + "91制片厂", "O-STAR", "兔子先生", "杏吧原创", "杏吧独家", "辣椒原创", "麻豆传媒映画", "红斯灯影像", "绝对领域", "麻麻传媒", "渡边传媒", "AV帝王", "桃花源", + "蝌蚪传媒", "SWAG", "麻豆", "杏吧", "糖心", "国产短视频", "国产精品", "国产AV", "涩会"] def get_actor_list(): - return ['Madison Summers', 'Spencer Bradley', 'Madison Morgan', 'Rosalyn Sphinx', 'Braylin Bailey', - 'Whitney Wright', 'Victoria Voxxx', 'Alexia Anders', - 'Bella Rolland', 'Violet Myers', 'Sophia Leone', 'Violet Starr', 'Eliza Ibarra', 'HongKongDoll', - 'Keira Croft', 'April Olsen', 'Avery Black', - 'Amber Moore', 'Anny Aurora', 'Skylar Snow', 'Harley Haze', 'Paige Owens', 'Vanessa Sky', 'MasukuChan', - 'Kate Bloom', 'Kimmy Kimm', 'Ana Foxxx', - 'Lexi Luna', 'Gia Derza', 'Skye Blue', 'Nico Love', 'Alyx Star', 'Ryan Reid', 'Kira Noir', 'Karma Rx', - '下面有根棒棒糖', 'Vivian姐', 'COLA酱', - 'cola醬', 'Stacy', 'ROXIE', '真木今日子', '小七软同学', 'Chloe', 'Alona', '小日向可怜', 'NANA', '玩偶姐姐', - '粉色情人', '桥本香菜', '冉冉学姐', '小二先生', - '饼干姐姐', 'Rona', '不见星空', '米娜学姐', '阿蛇姐姐', '樱花小猫', '樱井美里', '宸荨樱桃', '樱空桃桃', - '牛奶泡芙', '91兔兔', '棉花糖糖', '桥本爱菜', - '许木学长', 'MOMO', '驯鹿女孩', '高梨遥香', 'DORY', '冬月结衣', 'Aida', '香菜公主', '藤田美绪', '浅尾美羽', - '天音美纱', '中条爱莉', '三月樱花', 'Emma', - 'Vita', '千夜喵喵', '水原圣子', '白川麻衣', '池田奈美', '西村莉娜', 'A天使爱', '中野惠子', '麻衣CC', - '樱桃空空', 'LENA', '小泽纱香', '木下日葵', '中岛芳子', - '弥生美月', '逢见梨花', '宇佐爱花', '沙月芽衣', '羽月萌音', '前田由美', '伊东爱瑠', 'Misa', '绿帽先生', - '莉娜乔安', '柚木结爱', '黑田奈奈', '神山奈奈', - '孟若羽', '夏晴子', '吴梦梦', '沈娜娜', '李蓉蓉', '林思妤', '仙儿媛', '金宝娜', '季妍希', '温芮欣', - '吴文淇', '苏语棠', '秦可欣', '吴芳宜', '李娜娜', - '乐奈子', '宋南伊', '小水水', '白允儿', '管明美', '雪千夏', '苏清歌', '玥可岚', '梁芸菲', '白熙雨', - '小敏儿', '楚梦舒', '柚子猫', '姚宛儿', '宋雨川', - '舒可芯', '苏念瑾', '白沛瑶', '林沁儿', '唐雨菲', '李允熙', '张芸熙', '寻小小', '白靖寒', '钟宛冰', - '李薇薇', '米菲兔', '雷梦娜', '董悦悦', '袁子仪', - '赖畇希', '王以欣', '夏禹熙', '狐不妖', '凌波丽', '黎芷萱', '陆斑比', '辛尤里', '小猫咪', '顾桃桃', - '南芊允', '岚湘庭', '林芊彤', '梁佳芯', '林凤娇', - '明日香', '绫波丽', '邓紫晴', '赵一曼', '吴茜茜', '锅锅酱', '倪哇哇', '潘雨曦', '吴恺彤', '美杜莎', - '郭童童', '陈可心', '莫夕慈', '沈芯语', '董小宛', - '苏艾文', '翁雨澄', '赵晓涵', '小桃酱', '宋东琳', '香月怜', '李文雯', '白若冰', '徐夜夜', '真希波', - '爱丽丝', '张宇芯', '金善雅', '李依依', '苏安亚', - '奶咪酱', '白葵司', '罗瑾萱', '宁洋子', '小夜夜', '白晶晶', '张雅婷', '吴心语', '林曼芸', '项子甯', - '吳芳宜', '苏小小', '文冰冰', '韩宝儿', '白星雨', - '林怡梦', '张欣妍', '七濑恋', '白思吟', '吴凯彤', '溫芮欣', '林可菲', '黎芷媗', '御梦子', '苏雨彤', - '古伊娜', '聂小倩', '陈圆圆', '沙美辰', '林妙可', - '乐淆雪', '李恩娜', '周晴晴', '杨思敏', '李曼妮', '戚小怜', '谢语彤', '王筱璐', '卢珊珊', '程诗诗', - '林玥玥', '白瞳瞳', '魏乔安', '米胡桃', '施子涵', - '北野爱', '杜冰若', '玛丽莲', '胡蓉蓉', '万静雪', '花语柔', '萧悦儿', '林晓雪', '兰心洁', '神谷怜', - '唐雨霏', '鸡蛋饼', '沈湘妮', '费爵娜', '小美惠', - '大奶露', '向若云', '苏小沫', '榨汁姬', '陈星然', '夏雨荷', '姚彤彤', '莫云雪', '艾瑞卡', '黄雪纯', - '赵雅琳', '叶宸欣', '伊琬琳', '陈美惠', '金巧巧', - '陈美琳', '陆思涵', '顾小北', '陈小雨', '维里娜', '兔小白', '叶子红', '美凉子', '李丹彤', '李微微', - '白婷婷', '艾米酱', '刘小姗', '白童童', '张琪琪', - 'Yua', '小玩子', '岚可彤', '都可可', '李慕儿', '叶一涵', '赵佳美', '董小婉', '钟丽琪', '韩小雅', '杨朵儿', - '叶梦语', '程雨沫', '张曼青', '纪妍希', '伊婉琳', - '凌雨萱', '潘甜甜', '美竹玲', '韩依人', '奈奈子', '林雪漫', '宋甜甜', '陆雪琪', '宋妮可', '陆子欣', - '范可可', '许依然', '苏小新', '蒋梦琳', '李可欣', - '小鹿酱', '小林杏', '陶杏儿', '明步奈', '苏宁儿', '白潼潼', '增田枫', '特污兔', '何安汝', '倪菀儿', - '唐可可', '口罩酱', '小千绪', '糖糖儿', '许安妮', - '李婧琪', '刘思慧', '欧阳晶', '欧美玲', '林亦涵', '钟以彤', '许书曼', '付妙菱', '伊靖瑶', '张娅庭', - '韩小野', '宫泽蓝', '冯思雨', '林小樱', '刘颖儿', - '莫潇潇', '胡心瑶', '林雨露', '苏婧薇', '许月珍', '陈若瑶', '吴芮瑜', '叶如梦', '刘依依', '吴语菲', - '张妮妮', '林子涵', '张子瑜', '周卿卿', '李师师', - '苏语堂', '方紫璐', '袁采菱', '刘清韵', '李曼丽', '刘小雯', '姬咲华', '高小颜', '蔡晓雨', '梁如意', - '林语桐', '王小妮', '唐月琴', '星谷瞳', '何小丽', - '张婉妍', '酒井爱', '张秀玲', '晚晚酱', '薛梦琪', '李乐乐', '张佳晨', '程媛媛', '沐语柔', '安琪拉', - '韩倪希', '苏妲己', '白佳萱', '刘语珊', '徐韵珊', - '糖果屋', '顾伊梦', '赵颖儿', '莫安安', '黎星若', '林予曦', '蒋佑怡', '王有容', '李恩琦', '赵美凤', - '徐筱欣', '黄雅曼', '菲于娜', '金丞熙', '叶凡舒', - '郭瑶瑶', '李嘉欣', '袁庭妮', '林思好', '张云熙', '李忆彤', '伊蒂丝', '沙耶香', '美雪樱', '王亦舒', - '李文静', '鸡教练', '斑斑', '坏坏', '糖糖', '艾秋', - '凌薇', '莉娜', '韩棠', '苡若', '尤莉', '优娜', '林嫣', '徐蕾', '周甯', '唐茜', '香菱', '佳芯', '湘湘', - '米欧', '斑比', '蜜苏', '小婕', '艾熙', '娃娃', - '艾玛', '雪霏', '夜夜', '欣欣', '乔安', '羽芮', '美酱', '师师', '玖玖', '橙子', '晨曦', '苏娅', '黎儿', - '晨晨', '嘉洛', '小遥', '苏畅', '琪琪', '苡琍', - '李慕', '心萱', '昀希', '黎娜', '乐乐', '樱桃', '桐桐', '苏璇', '安娜', '悠悠', '茉莉', '宛冰', '静静', - '丝丝', '菲菲', '樱樱', '波妮', '唐芯', '小野', - '何苗', '甜心', '瑶瑶', '小捷', '薇薇', '美樱', '宁静', '欧妮', '吉吉', '小桃', '绯丽', '嘉琪', '咪妮', - '雯茜', '小洁', '李琼', '唐霏', '岚玥', '熙熙', - '米娅', '舒舒', '斯斯', '欣怡', '妍儿', '阿雅', '宋可', '畇希', '柔伊', '雅沁', '惠敏', '露露', '艾悠', - '娜娜', '李娜', '肖云', '王玥', '林洋', '清洛', - '艾鲤', '依涵', '半雪', '琦琦', '莎莎', '小冉', '琳怡', '莉奈', '梅子', '啤儿', '瑶贝', '杨柳', '童汐', - '米亚', '琳达', '晴天', 'KK', '紫宸', '淑怡', - '花花', '金铭', '程葳', '妍希', '咪妃', '茜茜', '小蜜', '凌萱', '觅嫣', '涵涵', '欲梦', '美琳', '杜鹃', - '许诺', '兮兮', '白鹿', '虞姬', '丽萨', '蔷薇', - '小影', '优优', '茶茶', '可儿', '甜甜', '憨憨', '波尼', '依颂', '依依', '思思', '芳情', '月牙', '小爱', - '淳儿', '苗方', '茶理', '苹果', '苏然', '陶子', - '董欣', '羽熙', '清沐', '林襄', '娃诺', '洁咪', '小昭', '球球', '紫萱', '南兰', '安琪', '可乐', '夏露', - '诗琪', '陈韵', '丽娜', '苏旋', '月月', '石榴', - '米兰', '恩恩', '西子', '芷萱', '酥酥', '王茜', '千鹤', '雪见', '姜洁', '张晴', '辰悦', '丁香', '白颖', - '穆娜', '小芳', '吉娜', '秋霞', '无双', '夏宝', - '舒涵', '小柔', '小小', '璇元', '米砂', '余丽', '美嘉', '莉莉', '奈奈', '黑糖', '晴子', '多乙', '徐婕', - '闵闵', '小雪', '洋洋', '明儿', '苏茜', '芯怡', - '姚茜', '百合', '婉婷', '小乔', '芽芽', '婕珍', '乔乔', '紫寒', '小薇', '菜菜', '洁米', '夏天', '灵枝', - '语伊', '徐艳', '王佩', '希汶', '雅捷', '喵喵', - '尤奈', '仙儿', '氖氖', '蔚曼', '田恬', '颂潮', '小婵', '千凌', '李燕', '林芳', '杨桃', '艾莉', '落落', - '冯雪', '王蓉', '妖妖', '雨晨', '心雪', '穆雪', - '韩焉', '邱月', '檀雅', '柯柯', '七七', '鱼儿', '丹丹', '简一', '淑仪', '小哇', '朵儿', '妲己', '云朵', - '唐菲', '邦妮', '白英', '夏夏', '安安', '小艺', - '丽丽', '敏敏', '空空', '椿芽', '小言', '李蕊', '水水', '小鱼', '艾艾', '尹媚', '夏滢', '琳希', '王欣', - '洛雪', '李茹', '娜米', '萱萱', '肖泳'] + return ['Madison Summers', 'Spencer Bradley', 'Madison Morgan', 'Rosalyn Sphinx', 'Braylin Bailey', 'Whitney Wright', 'Victoria Voxxx', 'Alexia Anders', + 'Bella Rolland', 'Violet Myers', 'Sophia Leone', 'Violet Starr', 'Eliza Ibarra', 'HongKongDoll', 'Keira Croft', 'April Olsen', 'Avery Black', 'Amber Moore', + 'Anny Aurora', 'Skylar Snow', 'Harley Haze', 'Paige Owens', 'Vanessa Sky', 'MasukuChan', 'Kate Bloom', 'Kimmy Kimm', 'Ana Foxxx', 'Lexi Luna', 'Gia Derza', + 'Skye Blue', 'Nico Love', 'Alyx Star', 'Ryan Reid', 'Kira Noir', 'Karma Rx', '下面有根棒棒糖', 'Vivian姐', 'COLA酱', 'cola醬', 'Stacy', 'ROXIE', '真木今日子', + '小七软同学', 'Chloe', 'Alona', '小日向可怜', 'NANA', '玩偶姐姐', '粉色情人', '桥本香菜', '冉冉学姐', '小二先生', '饼干姐姐', 'Rona', '不见星空', '米娜学姐', + '阿蛇姐姐', '樱花小猫', '樱井美里', '宸荨樱桃', '樱空桃桃', '牛奶泡芙', '91兔兔', '棉花糖糖', '桥本爱菜', '许木学长', 'MOMO', '驯鹿女孩', '高梨遥香', 'DORY', + '冬月结衣', 'Aida', '香菜公主', '藤田美绪', '浅尾美羽', '天音美纱', '中条爱莉', '三月樱花', 'Emma', 'Vita', '千夜喵喵', '水原圣子', '白川麻衣', '池田奈美', + '西村莉娜', 'A天使爱', '中野惠子', '麻衣CC', '樱桃空空', 'LENA', '小泽纱香', '木下日葵', '中岛芳子', '弥生美月', '逢见梨花', '宇佐爱花', '沙月芽衣', + '羽月萌音', '前田由美', '伊东爱瑠', 'Misa', '绿帽先生', '莉娜乔安', '柚木结爱', '黑田奈奈', '神山奈奈', '孟若羽', '夏晴子', '吴梦梦', '沈娜娜', '李蓉蓉', + '林思妤', '仙儿媛', '金宝娜', '季妍希', '温芮欣', '吴文淇', '苏语棠', '秦可欣', '吴芳宜', '李娜娜', '乐奈子', '宋南伊', '小水水', '白允儿', '管明美', + '雪千夏', '苏清歌', '玥可岚', '梁芸菲', '白熙雨', '小敏儿', '楚梦舒', '柚子猫', '姚宛儿', '宋雨川', '舒可芯', '苏念瑾', '白沛瑶', '林沁儿', '唐雨菲', + '李允熙', '张芸熙', '寻小小', '白靖寒', '钟宛冰', '李薇薇', '米菲兔', '雷梦娜', '董悦悦', '袁子仪', '赖畇希', '王以欣', '夏禹熙', '狐不妖', '凌波丽', + '黎芷萱', '陆斑比', '辛尤里', '小猫咪', '顾桃桃', '南芊允', '岚湘庭', '林芊彤', '梁佳芯', '林凤娇', '明日香', '绫波丽', '邓紫晴', '赵一曼', '吴茜茜', + '锅锅酱', '倪哇哇', '潘雨曦', '吴恺彤', '美杜莎', '郭童童', '陈可心', '莫夕慈', '沈芯语', '董小宛', '苏艾文', '翁雨澄', '赵晓涵', '小桃酱', '宋东琳', + '香月怜', '李文雯', '白若冰', '徐夜夜', '真希波', '爱丽丝', '张宇芯', '金善雅', '李依依', '苏安亚', '奶咪酱', '白葵司', '罗瑾萱', '宁洋子', '小夜夜', + '白晶晶', '张雅婷', '吴心语', '林曼芸', '项子甯', '吳芳宜', '苏小小', '文冰冰', '韩宝儿', '白星雨', '林怡梦', '张欣妍', '七濑恋', '白思吟', '吴凯彤', + '溫芮欣', '林可菲', '黎芷媗', '御梦子', '苏雨彤', '古伊娜', '聂小倩', '陈圆圆', '沙美辰', '林妙可', '乐淆雪', '李恩娜', '周晴晴', '杨思敏', '李曼妮', + '戚小怜', '谢语彤', '王筱璐', '卢珊珊', '程诗诗', '林玥玥', '白瞳瞳', '魏乔安', '米胡桃', '施子涵', '北野爱', '杜冰若', '玛丽莲', '胡蓉蓉', '万静雪', + '花语柔', '萧悦儿', '林晓雪', '兰心洁', '神谷怜', '唐雨霏', '鸡蛋饼', '沈湘妮', '费爵娜', '小美惠', '大奶露', '向若云', '苏小沫', '榨汁姬', '陈星然', + '夏雨荷', '姚彤彤', '莫云雪', '艾瑞卡', '黄雪纯', '赵雅琳', '叶宸欣', '伊琬琳', '陈美惠', '金巧巧', '陈美琳', '陆思涵', '顾小北', '陈小雨', '维里娜', + '兔小白', '叶子红', '美凉子', '李丹彤', '李微微', '白婷婷', '艾米酱', '刘小姗', '白童童', '张琪琪', 'Yua', '小玩子', '岚可彤', '都可可', '李慕儿', '叶一涵', + '赵佳美', '董小婉', '钟丽琪', '韩小雅', '杨朵儿', '叶梦语', '程雨沫', '张曼青', '纪妍希', '伊婉琳', '凌雨萱', '潘甜甜', '美竹玲', '韩依人', '奈奈子', + '林雪漫', '宋甜甜', '陆雪琪', '宋妮可', '陆子欣', '范可可', '许依然', '苏小新', '蒋梦琳', '李可欣', '小鹿酱', '小林杏', '陶杏儿', '明步奈', '苏宁儿', + '白潼潼', '增田枫', '特污兔', '何安汝', '倪菀儿', '唐可可', '口罩酱', '小千绪', '糖糖儿', '许安妮', '李婧琪', '刘思慧', '欧阳晶', '欧美玲', '林亦涵', + '钟以彤', '许书曼', '付妙菱', '伊靖瑶', '张娅庭', '韩小野', '宫泽蓝', '冯思雨', '林小樱', '刘颖儿', '莫潇潇', '胡心瑶', '林雨露', '苏婧薇', '许月珍', + '陈若瑶', '吴芮瑜', '叶如梦', '刘依依', '吴语菲', '张妮妮', '林子涵', '张子瑜', '周卿卿', '李师师', '苏语堂', '方紫璐', '袁采菱', '刘清韵', '李曼丽', + '刘小雯', '姬咲华', '高小颜', '蔡晓雨', '梁如意', '林语桐', '王小妮', '唐月琴', '星谷瞳', '何小丽', '张婉妍', '酒井爱', '张秀玲', '晚晚酱', '薛梦琪', + '李乐乐', '张佳晨', '程媛媛', '沐语柔', '安琪拉', '韩倪希', '苏妲己', '白佳萱', '刘语珊', '徐韵珊', '糖果屋', '顾伊梦', '赵颖儿', '莫安安', '黎星若', + '林予曦', '蒋佑怡', '王有容', '李恩琦', '赵美凤', '徐筱欣', '黄雅曼', '菲于娜', '金丞熙', '叶凡舒', '郭瑶瑶', '李嘉欣', '袁庭妮', '林思好', '张云熙', + '李忆彤', '伊蒂丝', '沙耶香', '美雪樱', '王亦舒', '李文静', '鸡教练', '斑斑', '坏坏', '糖糖', '艾秋', '凌薇', '莉娜', '韩棠', '苡若', '尤莉', '优娜', '林嫣', + '徐蕾', '周甯', '唐茜', '香菱', '佳芯', '湘湘', '米欧', '斑比', '蜜苏', '小婕', '艾熙', '娃娃', '艾玛', '雪霏', '夜夜', '欣欣', '乔安', '羽芮', '美酱', + '师师', '玖玖', '橙子', '晨曦', '苏娅', '黎儿', '晨晨', '嘉洛', '小遥', '苏畅', '琪琪', '苡琍', '李慕', '心萱', '昀希', '黎娜', '乐乐', '樱桃', '桐桐', + '苏璇', '安娜', '悠悠', '茉莉', '宛冰', '静静', '丝丝', '菲菲', '樱樱', '波妮', '唐芯', '小野', '何苗', '甜心', '瑶瑶', '小捷', '薇薇', '美樱', '宁静', + '欧妮', '吉吉', '小桃', '绯丽', '嘉琪', '咪妮', '雯茜', '小洁', '李琼', '唐霏', '岚玥', '熙熙', '米娅', '舒舒', '斯斯', '欣怡', '妍儿', '阿雅', '宋可', + '畇希', '柔伊', '雅沁', '惠敏', '露露', '艾悠', '娜娜', '李娜', '肖云', '王玥', '林洋', '清洛', '艾鲤', '依涵', '半雪', '琦琦', '莎莎', '小冉', '琳怡', + '莉奈', '梅子', '啤儿', '瑶贝', '杨柳', '童汐', '米亚', '琳达', '晴天', 'KK', '紫宸', '淑怡', '花花', '金铭', '程葳', '妍希', '咪妃', '茜茜', '小蜜', '凌萱', + '觅嫣', '涵涵', '欲梦', '美琳', '杜鹃', '许诺', '兮兮', '白鹿', '虞姬', '丽萨', '蔷薇', '小影', '优优', '茶茶', '可儿', '甜甜', '憨憨', '波尼', '依颂', + '依依', '思思', '芳情', '月牙', '小爱', '淳儿', '苗方', '茶理', '苹果', '苏然', '陶子', '董欣', '羽熙', '清沐', '林襄', '娃诺', '洁咪', '小昭', '球球', + '紫萱', '南兰', '安琪', '可乐', '夏露', '诗琪', '陈韵', '丽娜', '苏旋', '月月', '石榴', '米兰', '恩恩', '西子', '芷萱', '酥酥', '王茜', '千鹤', '雪见', + '姜洁', '张晴', '辰悦', '丁香', '白颖', '穆娜', '小芳', '吉娜', '秋霞', '无双', '夏宝', '舒涵', '小柔', '小小', '璇元', '米砂', '余丽', '美嘉', '莉莉', + '奈奈', '黑糖', '晴子', '多乙', '徐婕', '闵闵', '小雪', '洋洋', '明儿', '苏茜', '芯怡', '姚茜', '百合', '婉婷', '小乔', '芽芽', '婕珍', '乔乔', '紫寒', + '小薇', '菜菜', '洁米', '夏天', '灵枝', '语伊', '徐艳', '王佩', '希汶', '雅捷', '喵喵', '尤奈', '仙儿', '氖氖', '蔚曼', '田恬', '颂潮', '小婵', '千凌', + '李燕', '林芳', '杨桃', '艾莉', '落落', '冯雪', '王蓉', '妖妖', '雨晨', '心雪', '穆雪', '韩焉', '邱月', '檀雅', '柯柯', '七七', '鱼儿', '丹丹', '简一', + '淑仪', '小哇', '朵儿', '妲己', '云朵', '唐菲', '邦妮', '白英', '夏夏', '安安', '小艺', '丽丽', '敏敏', '空空', '椿芽', '小言', '李蕊', '水水', '小鱼', + '艾艾', '尹媚', '夏滢', '琳希', '王欣', '洛雪', '李茹', '娜米', '萱萱', '肖泳'] def get_number_list(number, appoint_number='', file_path=''): # 处理国产番号 @@ -408,26 +357,4 @@ def get_extra_info(title, file_path, info_type, tag='', actor='', series=''): # get_number_list('91CM-081', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.TS') # get_number_list('91CM-081', file_path='MD-0140-2.蜜苏.家有性事EP2.爱在身边.麻豆传媒映画.TS') print(get_number_list('91CM-081', - file_path='aaa/MDUS系列[中文字幕].LAX0025.性感尤物渴望激情猛操.RuCK ME LIKE A SEX DOLL.麻豆传媒映画.TS')) - # get_number_list('91CM-081', file_path='REAL野性派001-朋友的女友讓我最上火.TS') - # get_number_list('91CM-081', file_path='MDS-009.张芸熙.巨乳旗袍诱惑.搔首弄姿色气满点.麻豆传媒映画.TS') - # get_number_list('91CM-081', file_path='MDS005 被雇主强上的熟女家政妇 大声呻吟被操到高潮 杜冰若.mp4.TS') - # get_number_list('91CM-081', file_path='TT-005.孟若羽.F罩杯性感巨乳DJ.麻豆出品x宫美娱乐.TS') - # get_number_list('91CM-081', file_path='台湾第一女优吴梦梦.OL误上痴汉地铁.惨遭多人轮番奸玩.麻豆传媒映画代理出品.TS') - # get_number_list('91CM-081', file_path='PsychoPorn色控.找来大奶姐姐帮我乳交.麻豆传媒映画.TS') - # get_number_list('91CM-081', file_path='鲍鱼游戏SquirtGame.吸舔碰糖.失败者屈辱凌辱.TS') - # get_number_list('91CM-081', file_path='导演系列 外卖员的色情体验 麻豆传媒映画.TS') - # get_number_list('91CM-081', file_path='MDS007 骚逼女友在作妖-硬上男友当玩具 叶一涵.TS') - # get_number_list('MDM-002') # 去掉标题最后的发行商 - # get_number_list('MDS-007') # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS - # get_number_list('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS') # 数字要四位才能搜索到,即 MDJ-0001.EP1 - # get_number_list('91CM-090') # 带横线才能搜到 - # get_number_list('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿') # 带空格才能搜到 - # get_number_list('淫欲游戏王EP2') # 不带空格才能搜到 - # get_number_list('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿') - # get_number_list('MD-0020') - # get_number_list('mds009') - # get_number_list('mds02209') - # get_number_list('女王的SM调教') - # get_number_list('91CM202') - # get_number_list('91CM-202') + file_path='aaa/MDUS系列[中文字幕].LAX0025.性感尤物渴望激情猛操.RuCK ME LIKE A SEX DOLL.麻豆传媒映画.TS')) # get_number_list('91CM-081', file_path='REAL野性派001-朋友的女友讓我最上火.TS') # get_number_list('91CM-081', file_path='MDS-009.张芸熙.巨乳旗袍诱惑.搔首弄姿色气满点.麻豆传媒映画.TS') # get_number_list('91CM-081', file_path='MDS005 被雇主强上的熟女家政妇 大声呻吟被操到高潮 杜冰若.mp4.TS') # get_number_list('91CM-081', file_path='TT-005.孟若羽.F罩杯性感巨乳DJ.麻豆出品x宫美娱乐.TS') # get_number_list('91CM-081', file_path='台湾第一女优吴梦梦.OL误上痴汉地铁.惨遭多人轮番奸玩.麻豆传媒映画代理出品.TS') # get_number_list('91CM-081', file_path='PsychoPorn色控.找来大奶姐姐帮我乳交.麻豆传媒映画.TS') # get_number_list('91CM-081', file_path='鲍鱼游戏SquirtGame.吸舔碰糖.失败者屈辱凌辱.TS') # get_number_list('91CM-081', file_path='导演系列 外卖员的色情体验 麻豆传媒映画.TS') # get_number_list('91CM-081', file_path='MDS007 骚逼女友在作妖-硬上男友当玩具 叶一涵.TS') # get_number_list('MDM-002') # 去掉标题最后的发行商 # get_number_list('MDS-007') # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS # get_number_list('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS') # 数字要四位才能搜索到,即 MDJ-0001.EP1 # get_number_list('91CM-090') # 带横线才能搜到 # get_number_list('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿') # 带空格才能搜到 # get_number_list('淫欲游戏王EP2') # 不带空格才能搜到 # get_number_list('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿') # get_number_list('MD-0020') # get_number_list('mds009') # get_number_list('mds02209') # get_number_list('女王的SM调教') # get_number_list('91CM202') # get_number_list('91CM-202') diff --git a/src/models/crawlers/hdouban.py b/src/models/crawlers/hdouban.py index c6f3274..8440359 100644 --- a/src/models/crawlers/hdouban.py +++ b/src/models/crawlers/hdouban.py @@ -357,13 +357,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -376,37 +370,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('snis-036')) # 双人 # print(main('SSNI-826')) # print(main('大胸母女勾引家教被爆操')) - print(main('CEMD-248')) - # print(main('TMG-019')) - # print(main('FC2-2473284 ')) - # print(main('SHYN-147 ')) # 有系列 - # print(main('MIAE-346')) # 无结果 - # print(main('STARS-191')) # poster图片 - # print(main('abw-157')) - # print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main('x-art.19.11.03')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('1101132', '')) - # print(main('OFJE-318')) - # print(main('110119-001')) - # print(main('abs-001')) - # print(main('SSIS-090', '')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main('x-art.19.11.03', '')) + print(main('CEMD-248')) # print(main('TMG-019')) # print(main('FC2-2473284 ')) # print(main('SHYN-147 ')) # 有系列 # print(main('MIAE-346')) # 无结果 # print(main('STARS-191')) # poster图片 # print(main('abw-157')) # print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main('x-art.19.11.03')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('1101132', '')) # print(main('OFJE-318')) # print(main('110119-001')) # print(main('abs-001')) # print(main('SSIS-090', '')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main('x-art.19.11.03', '')) diff --git a/src/models/crawlers/hscangku.py b/src/models/crawlers/hscangku.py index ba6dcad..f4a1b1d 100644 --- a/src/models/crawlers/hscangku.py +++ b/src/models/crawlers/hscangku.py @@ -27,8 +27,7 @@ def get_actor_photo(actor): def get_detail_info(html, real_url, number, file_path): href = re.split(r'[/.]', real_url)[-2] - title_h1 = html.xpath( - '//h3[@class="title" and not(contains(normalize-space(.), "目录")) and not(contains(normalize-space(.), "为你推荐"))]/text()') + title_h1 = html.xpath('//h3[@class="title" and not(contains(normalize-space(.), "目录")) and not(contains(normalize-space(.), "为你推荐"))]/text()') title = title_h1[0].replace(number + ' ', '').strip() if title_h1 else number actor = get_extra_info(title, file_path, info_type="actor") tag = get_extra_info(title, file_path, info_type="tag") @@ -177,13 +176,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js diff --git a/src/models/crawlers/iqqtv.py b/src/models/crawlers/iqqtv.py index eebab5a..8383466 100644 --- a/src/models/crawlers/iqqtv.py +++ b/src/models/crawlers/iqqtv.py @@ -35,9 +35,7 @@ def getWebNumber(title, number): result = result[-1] else: result = number.upper() - return result.replace('_1pondo_', '').replace('1pondo_', '').replace('caribbeancom-', '').replace('caribbeancom', - '').replace( - '-PPV', '').strip(' _-') + return result.replace('_1pondo_', '').replace('1pondo_', '').replace('caribbeancom-', '').replace('caribbeancom', '').replace('-PPV', '').strip(' _-') def getActor(html): @@ -72,7 +70,7 @@ def getOutline(html): return '' else: # 去除简介中的无意义信息,中间和首尾的空白字符、简介两字、*根据分发等 - result = re.sub(r'[\n\t]|(简|簡)介:', '', result).split('*根据分发', 1 )[0].strip() + result = re.sub(r'[\n\t]|(简|簡)介:', '', result).split('*根据分发', 1)[0].strip() return result @@ -151,7 +149,7 @@ def get_real_url(html, number): detail_url = each.xpath('./a/@href')[0] title = each.xpath('./a/@title')[0] # 注意去除马赛克破坏版等几乎没有有效字段的条目 - if number.upper() in title and all(keyword not in title for keyword in ['克破', '无码破解', '無碼破解', '无码流出','無碼流出']): + if number.upper() in title and all(keyword not in title for keyword in ['克破', '无码破解', '無碼破解', '无码流出', '無碼流出']): return detail_url return '' @@ -297,13 +295,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {language: dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js diff --git a/src/models/crawlers/iqqtv_new.py b/src/models/crawlers/iqqtv_new.py index 8ef1d7a..3e6c85f 100644 --- a/src/models/crawlers/iqqtv_new.py +++ b/src/models/crawlers/iqqtv_new.py @@ -8,20 +8,13 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): - all_language = config.title_language + config.outline_language + config.actor_language + \ - config.tag_language + config.series_language + config.studio_language + all_language = config.title_language + config.outline_language + config.actor_language + config.tag_language + config.series_language + config.studio_language appoint_url = appoint_url.replace('/cn/', '/jp/').replace('iqqtv.cloud/player', 'iqqtv.cloud/jp/player') json_data = json.loads(iqqtv.main(number, appoint_url, log_info, req_web, 'jp')) if not json_data['iqqtv']['jp']['title']: json_data['iqqtv']['zh_cn'] = json_data['iqqtv']['jp'] json_data['iqqtv']['zh_tw'] = json_data['iqqtv']['jp'] - return json.dumps( - json_data, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + return json.dumps(json_data, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) log_info = json_data['iqqtv']['jp']['log_info'] req_web = json_data['iqqtv']['jp']['req_web'] @@ -40,42 +33,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): dic['originalplot'] = json_data['iqqtv']['jp']['originalplot'] json_data['iqqtv'].update({'zh_cn': dic, 'zh_tw': dic}) - js = json.dumps( - json_data, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(json_data, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': - print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) + print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main_us('x-art.19.11.03')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) - # print(main('1101132', '')) - # print(main('OFJE-318')) - # print(main('110119-001')) - # print(main('abs-001')) - # print(main('SSIS-090', '')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main_us('x-art.19.11.03', '')) + # print(main('1101132', '')) # print(main('OFJE-318')) # print(main('110119-001')) # print(main('abs-001')) # print(main('SSIS-090', '')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main_us('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main_us('x-art.19.11.03', '')) diff --git a/src/models/crawlers/jav321.py b/src/models/crawlers/jav321.py index ac0f5ef..daa34e9 100644 --- a/src/models/crawlers/jav321.py +++ b/src/models/crawlers/jav321.py @@ -33,8 +33,7 @@ def getActor(response): if re.search(r'(\S+)  ', response): return str(re.findall(r'(\S+)  ', response)).strip(" [',']").replace('\'', '') elif re.search(r'(\S+)  ', response): - return str(re.findall(r'(\S+)  ', response)).strip(" [',']").replace('\'', - '') + return str(re.findall(r'(\S+)  ', response)).strip(" [',']").replace('\'', '') else: return str(re.findall(r'出演者: ([^<]+)  
', response)).strip(" [',']").replace('\'', '') @@ -83,8 +82,7 @@ def getRelease(response): def getCover(detail_page): - cover_url = str(detail_page.xpath( - "/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 " "col-md-12'][1]/p/a/img[@class='img-responsive']/@src")).strip( + cover_url = str(detail_page.xpath("/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 " "col-md-12'][1]/p/a/img[@class='img-responsive']/@src")).strip( " ['']") if cover_url == '': cover_url = str(detail_page.xpath("//*[@id='vjs_sample_player']/@poster")).strip(" ['']") @@ -92,8 +90,7 @@ def getCover(detail_page): def getExtraFanart(htmlcode): - extrafanart_list = htmlcode.xpath( - "/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 col-md-12']/p/a/img[@class='img-responsive']/@src") + extrafanart_list = htmlcode.xpath("/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 col-md-12']/p/a/img[@class='img-responsive']/@src") return extrafanart_list @@ -169,15 +166,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): series = getSeries(detail_page) extrafanart = getExtraFanart(detail_page) # 判断无码 - uncensorted_list = ['一本道', 'HEYZO', 'サムライポルノ', 'キャットウォーク', 'サイクロン', 'ルチャリブレ', - 'スーパーモデルメディア', 'スタジオテリヤキ', - 'レッドホットコレクション', 'スカイハイエンターテインメント', '小天狗', - 'オリエンタルドリーム', 'Climax Zipang', 'CATCHEYE', - 'ファイブスター', 'アジアンアイズ', 'ゴリラ', 'ラフォーレ ガール', 'MIKADO', - 'ムゲンエンターテインメント', 'ツバキハウス', 'ザーメン二郎', - 'トラトラトラ', 'メルシーボークー', '神風', 'Queen 8', 'SASUKE', 'ファンタドリーム', - 'マツエンターテインメント', 'ピンクパンチャー', - 'ワンピース', 'ゴールデンドラゴン', 'Tokyo Hot', 'Caribbean'] + uncensorted_list = ['一本道', 'HEYZO', 'サムライポルノ', 'キャットウォーク', 'サイクロン', 'ルチャリブレ', 'スーパーモデルメディア', 'スタジオテリヤキ', + 'レッドホットコレクション', 'スカイハイエンターテインメント', '小天狗', 'オリエンタルドリーム', 'Climax Zipang', 'CATCHEYE', 'ファイブスター', + 'アジアンアイズ', 'ゴリラ', 'ラフォーレ ガール', 'MIKADO', 'ムゲンエンターテインメント', 'ツバキハウス', 'ザーメン二郎', 'トラトラトラ', + 'メルシーボークー', '神風', 'Queen 8', 'SASUKE', 'ファンタドリーム', 'マツエンターテインメント', 'ピンクパンチャー', 'ワンピース', + 'ゴールデンドラゴン', 'Tokyo Hot', 'Caribbean'] for each in uncensorted_list: if each == studio: mosaic = '无码' @@ -233,13 +226,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js diff --git a/src/models/crawlers/javbus.py b/src/models/crawlers/javbus.py index 7239fb8..f53188e 100644 --- a/src/models/crawlers/javbus.py +++ b/src/models/crawlers/javbus.py @@ -32,8 +32,7 @@ def getWebNumber(html, number): def getActor(html): try: - result = str(html.xpath('//div[@class="star-name"]/a/text()')).strip(" ['']").replace("'", '').replace(', ', - ',') + result = str(html.xpath('//div[@class="star-name"]/a/text()')).strip(" ['']").replace("'", '').replace(', ', ',') except: result = '' return result @@ -222,10 +221,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', mosaic= javbus_url = getattr(config, 'javbus_website', 'https://www.javbus.com') headers = config.headers cookie = config.javbus - headers_o = { - 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6', - 'cookie': cookie, - } + headers_o = {'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,ja;q=0.6', 'cookie': cookie, } headers.update(headers_o) title = '' @@ -380,43 +376,11 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', mosaic= 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': # yapf: disable # print(main('LAFBD-034')) # cwp,cwpbd 数字为2位时不带0 - print(main('PMAXVR-008')) - # print(main('cwpbd-034')) # cwp,cwpbd 数字为2位时不带0 - # print(main('FC2-1262472')) # 无结果 - # print(main('STARS-199')) # 禁止 - # print(main('EDVR-043')) # 无结果 - # print(main('SSIS-243')) - # print(main('ABW-015')) - # print(main('DASD-972')) - # print(main('ss-036')) # 无结果 - # print(main('KMHRS-050')) - # print(main('KV-115')) # 无结果 - # print(main('070621_001')) - # print(main('heyzo-1031')) - # print(main('heyzo-0811')) - # print(main('heyzo-1673')) - # print(main('dv-1175')) # 无结果,通过搜索有结果 - # print(main('dv1175')) - # print(main('ssni-644')) - # print(main('010115-001')) - # print(main('ssni644')) - # print(main('BigTitsatWork-17-09-26')) - # print(main('BrazzersExxtra.21.02.01')) - # print(main('KA-001')) # 无结果 - # print(main('012715-793')) - # print(main('ssni-644', "https://www.javbus.com/SSNI-644")) - # print(main('ssni-802', "")) - # print(main('DirtyMasseur.20.07.26', "https://www.javbus.hair/DirtyMasseur-21-01-31")) + print(main('PMAXVR-008')) # print(main('cwpbd-034')) # cwp,cwpbd 数字为2位时不带0 # print(main('FC2-1262472')) # 无结果 # print(main('STARS-199')) # 禁止 # print(main('EDVR-043')) # 无结果 # print(main('SSIS-243')) # print(main('ABW-015')) # print(main('DASD-972')) # print(main('ss-036')) # 无结果 # print(main('KMHRS-050')) # print(main('KV-115')) # 无结果 # print(main('070621_001')) # print(main('heyzo-1031')) # print(main('heyzo-0811')) # print(main('heyzo-1673')) # print(main('dv-1175')) # 无结果,通过搜索有结果 # print(main('dv1175')) # print(main('ssni-644')) # print(main('010115-001')) # print(main('ssni644')) # print(main('BigTitsatWork-17-09-26')) # print(main('BrazzersExxtra.21.02.01')) # print(main('KA-001')) # 无结果 # print(main('012715-793')) # print(main('ssni-644', "https://www.javbus.com/SSNI-644")) # print(main('ssni-802', "")) # print(main('DirtyMasseur.20.07.26', "https://www.javbus.hair/DirtyMasseur-21-01-31")) diff --git a/src/models/crawlers/javday.py b/src/models/crawlers/javday.py index f97cd82..58b6b50 100644 --- a/src/models/crawlers/javday.py +++ b/src/models/crawlers/javday.py @@ -239,8 +239,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file release = '' year = '' studio = get_studio(series, tag, lable_list) - number, title = get_real_number_title(number, title, number_list, appoint_number, appoint_url, lable_list, - tag, actor, series) + number, title = get_real_number_title(number, title, number_list, appoint_number, appoint_url, lable_list, tag, actor, series) try: dic = { @@ -295,13 +294,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js @@ -358,18 +351,5 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('', file_path='WTB-075 酒店妹包养软饭男 为了让他振作只好以身相许 乌托邦.ts')) # 标题里有\t # print(main('', file_path='杏吧八戒1 - 3000约操18岁大一新生,苗条身材白嫩紧致.ts')) # 分词匹配,带标点或者整个标题去匹配 # print(main('', file_path='萝莉社 女大学生找模特兼职 被要求裸露拍摄 被套路内射.ts')) # 分词匹配,带标点或者整个标题去匹配 - print(main('', file_path='/sp/sp6/国产测试/MD-0240 周處除三嗨.mp4')) - # print(main('MDM-002')) # 去掉标题最后的发行商 - # print(main('MDS-0007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS - # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 - # print(main('91CM-090')) # 带横线才能搜到 - # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 - # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 - # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) - # print(main('MD-0020')) - # print(main('mds009')) - # print(main('女王的SM调教')) - # print(main('91CM202')) - # print(main('必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts', file_path='必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts')) - # print(main('', file_path='素人自制舒舒 富婆偷情被偷拍 亏大了!50W买个视频还被操.ts')) - # print(main('', file_path='/sp/sp3/国产/2021年10月份 國產原創原版合集/20211003 91CM-191 你好同学ep5 MSD011/[c0e0.com]实战现场 .TS')) + print(main('', + file_path='/sp/sp6/国产测试/MD-0240 周處除三嗨.mp4')) # print(main('MDM-002')) # 去掉标题最后的发行商 # print(main('MDS-0007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 # print(main('91CM-090')) # 带横线才能搜到 # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) # print(main('MD-0020')) # print(main('mds009')) # print(main('女王的SM调教')) # print(main('91CM202')) # print(main('必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts', file_path='必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts')) # print(main('', file_path='素人自制舒舒 富婆偷情被偷拍 亏大了!50W买个视频还被操.ts')) # print(main('', file_path='/sp/sp3/国产/2021年10月份 國產原創原版合集/20211003 91CM-191 你好同学ep5 MSD011/[c0e0.com]实战现场 .TS')) diff --git a/src/models/crawlers/javdb.py b/src/models/crawlers/javdb.py index 91d2b01..88e1880 100644 --- a/src/models/crawlers/javdb.py +++ b/src/models/crawlers/javdb.py @@ -39,10 +39,8 @@ def get_title(html, org_language): def get_actor(html): - actor_result = html.xpath( - '//div[@class="panel-block"]/strong[contains(text(), "演員:") or contains(text(), "Actor(s):")]/../span[@class="value"]/a/text()') - gender_result = html.xpath( - '//div[@class="panel-block"]/strong[contains(text(), "演員:") or contains(text(), "Actor(s):")]/../span[@class="value"]/strong/@class') + actor_result = html.xpath('//div[@class="panel-block"]/strong[contains(text(), "演員:") or contains(text(), "Actor(s):")]/../span[@class="value"]/a/text()') + gender_result = html.xpath('//div[@class="panel-block"]/strong[contains(text(), "演員:") or contains(text(), "Actor(s):")]/../span[@class="value"]/strong/@class') i = 0 actor_list = [] for gender in gender_result: @@ -102,9 +100,7 @@ def get_year(get_release): def get_tag(html): result1 = str(html.xpath('//strong[contains(text(),"類別:")]/../span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"Tags:")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', - '').lstrip( - ',') + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') def get_cover(html): @@ -173,9 +169,7 @@ def get_real_url(html, number): # 获取详情页链接 if number.upper() in each[1].upper(): return each[0] for each in info_list: # 再从所有结果模糊匹配 - if number.upper().replace('.', '').replace('-', '').replace(' ', '') in (each[1] + each[2]).upper().replace('-', - '').replace( - '.', '').replace(' ', ''): + if number.upper().replace('.', '').replace('-', '').replace(' ', '') in (each[1] + each[2]).upper().replace('-', '').replace('.', '').replace(' ', ''): return each[0] return False @@ -318,11 +312,12 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', org_lan actor_photo = get_actor_photo(actor) all_actor_photo = get_actor_photo(all_actor) number = get_number(html_detail, number) - title = title.replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_', '-').replace( - number.upper(), '').replace(number, '').replace('--', '-').strip() - originaltitle = originaltitle.replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_', - '-').replace( - number.upper(), '').replace(number, '').replace('--', '-').strip() + title = title.replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_', '-').replace(number.upper(), '').replace(number, '').replace('--', + '-').strip() + originaltitle = originaltitle.replace('中文字幕', '').replace('無碼', '').replace("\\n", '').replace('_', '-').replace(number.upper(), '').replace(number, + '').replace( + '--', + '-').strip() cover_url = get_cover(html_detail) # 获取cover poster_url = cover_url.replace('/covers/', '/thumbs/') outline = '' @@ -398,13 +393,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', org_lan 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -431,47 +420,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', org_lan # print(main('', 'https://javdb.com/v/GAO75')) # print(main('SIRO-4770 ')) # print(main('4030-2405')) # 需要登录 - print(main('FC2-1262472')) # 需要登录 - # print(main('HUNTB-107')) # 预告片返回url错误,只有https - # print(main('FC2-2392657')) # 需要登录 - # print(main('GS-067')) # 两个同名番号 - # print(main('MIDE-022')) - # print(main('KRAY-001')) - # print(main('ssis-243')) - # print(main('MIDE-900', 'https://javdb.com/v/MZp24?locale=en')) - # print(main('TD-011')) - # print(main('stars-011')) # 发行商SOD star,下载封面 - # print(main('stars-198')) # 发行商SOD star,下载封面 - # print(main('mium-748')) - # print(main('KMHRS-050')) # 剧照第一张作为poster - # print(main('SIRO-4042')) - # print(main('snis-035')) - # print(main('snis-036')) - # print(main('vixen.18.07.18', '')) - # print(main('vixen.16.08.02', '')) - # print(main('SNIS-016', '')) - # print(main('bangbros18.19.09.17')) - # print(main('x-art.19.11.03')) - # print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('SSIS-001', '')) - # print(main('SSIS-090', '')) - # print(main('DANDY-520', '')) - # print(main('teenslovehugecocks.22.09.14')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main_us('x-art.19.11.03', '')) + print(main('FC2-1262472')) # 需要登录 # print(main('HUNTB-107')) # 预告片返回url错误,只有https # print(main('FC2-2392657')) # 需要登录 # print(main('GS-067')) # 两个同名番号 # print(main('MIDE-022')) # print(main('KRAY-001')) # print(main('ssis-243')) # print(main('MIDE-900', 'https://javdb.com/v/MZp24?locale=en')) # print(main('TD-011')) # print(main('stars-011')) # 发行商SOD star,下载封面 # print(main('stars-198')) # 发行商SOD star,下载封面 # print(main('mium-748')) # print(main('KMHRS-050')) # 剧照第一张作为poster # print(main('SIRO-4042')) # print(main('snis-035')) # print(main('snis-036')) # print(main('vixen.18.07.18', '')) # print(main('vixen.16.08.02', '')) # print(main('SNIS-016', '')) # print(main('bangbros18.19.09.17')) # print(main('x-art.19.11.03')) # print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('SSIS-001', '')) # print(main('SSIS-090', '')) # print(main('DANDY-520', '')) # print(main('teenslovehugecocks.22.09.14')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main_us('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main_us('x-art.19.11.03', '')) diff --git a/src/models/crawlers/javlibrary.py b/src/models/crawlers/javlibrary.py index 428e40a..8526699 100644 --- a/src/models/crawlers/javlibrary.py +++ b/src/models/crawlers/javlibrary.py @@ -304,13 +304,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {language: dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -321,29 +315,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): # print(main('SSNI-994')) # print(main('SSNI-795')) # print(main(' IPX-071')) - print(main('SNIS-003')) - # print(main('SSIS-118')) - # print(main('AA-007')) - # print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('SSIS-001', '')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main_us('x-art.19.11.03', '')) + print(main('SNIS-003')) # print(main('SSIS-118')) # print(main('AA-007')) # print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main_us('x-art.19.11.03')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('SSIS-001', '')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main_us('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main_us('x-art.19.11.03', '')) diff --git a/src/models/crawlers/javlibrary_new.py b/src/models/crawlers/javlibrary_new.py index 3c02d4d..917632e 100644 --- a/src/models/crawlers/javlibrary_new.py +++ b/src/models/crawlers/javlibrary_new.py @@ -12,21 +12,13 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): - all_language = config.title_language + config.outline_language + \ - config.actor_language + config.tag_language + config.series_language + \ - config.studio_language + all_language = config.title_language + config.outline_language + config.actor_language + config.tag_language + config.series_language + config.studio_language appoint_url = appoint_url.replace('/cn/', '/ja/').replace('/tw/', '/ja/') json_data = json.loads(javlibrary.main(number, appoint_url, log_info, req_web, 'jp')) if not json_data['javlibrary']['jp']['title']: json_data['javlibrary']['zh_cn'] = json_data['javlibrary']['jp'] json_data['javlibrary']['zh_tw'] = json_data['javlibrary']['jp'] - return json.dumps( - json_data, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + return json.dumps(json_data, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) log_info = json_data['javlibrary']['jp']['log_info'] req_web = json_data['javlibrary']['jp']['req_web'] @@ -44,42 +36,10 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'): dic['originalplot'] = json_data['javlibrary']['jp']['originalplot'] json_data['javlibrary'].update({'zh_cn': dic, 'zh_tw': dic}) - js = json.dumps( - json_data, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(json_data, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': # print(main('SSIS-118', 'https://www.javlibrary.com/cn/?v=javme5ly4e')) - print(main('abs-141')) - # print(main('HYSD-00083')) - # print(main('IESP-660')) - # print(main('n1403')) - # print(main('GANA-1910')) - # print(main('heyzo-1031')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001')) - # print(main('S2M-055')) - # print(main('LUXU-1217')) - # print(main('1101132', '')) - # print(main('OFJE-318')) - # print(main('110119-001')) - # print(main('abs-001')) - # print(main('SSIS-090', '')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) - # print(main('HYSD-00083', '')) - # print(main('IESP-660', '')) - # print(main('n1403', '')) - # print(main('GANA-1910', '')) - # print(main('heyzo-1031', '')) - # print(main_us('x-art.19.11.03')) - # print(main('032020-001', '')) - # print(main('S2M-055', '')) - # print(main('LUXU-1217', '')) - # print(main_us('x-art.19.11.03', '')) + print(main('abs-141')) # print(main('HYSD-00083')) # print(main('IESP-660')) # print(main('n1403')) # print(main('GANA-1910')) # print(main('heyzo-1031')) # print(main_us('x-art.19.11.03')) # print(main('032020-001')) # print(main('S2M-055')) # print(main('LUXU-1217')) # print(main('1101132', '')) # print(main('OFJE-318')) # print(main('110119-001')) # print(main('abs-001')) # print(main('SSIS-090', '')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) # print(main_us('x-art.19.11.03')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) # print(main_us('x-art.19.11.03', '')) diff --git a/src/models/crawlers/kin8.py b/src/models/crawlers/kin8.py index 4f51bba..d63065e 100644 --- a/src/models/crawlers/kin8.py +++ b/src/models/crawlers/kin8.py @@ -44,8 +44,7 @@ def get_actor(html): def get_tag(html): - result = html.xpath( - '//td[@class="movie_table_td" and contains(text(), "カテゴリー")]/following-sibling::td/div/a/text()') + result = html.xpath('//td[@class="movie_table_td" and contains(text(), "カテゴリー")]/following-sibling::td/div/a/text()') return ','.join(result) @@ -77,8 +76,7 @@ def get_extrafanart(html): if i: if 'http' not in i: i = f"https:{i}" - new_result.append( - i.replace('/2.jpg', '/2_lg.jpg').replace('/3.jpg', '/3_lg.jpg').replace('/4.jpg', '/4_lg.jpg')) + new_result.append(i.replace('/2.jpg', '/2_lg.jpg').replace('/3.jpg', '/3_lg.jpg').replace('/4.jpg', '/4_lg.jpg')) return new_result @@ -192,13 +190,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js diff --git a/src/models/crawlers/love6.py b/src/models/crawlers/love6.py index 2f9422e..fc58810 100644 --- a/src/models/crawlers/love6.py +++ b/src/models/crawlers/love6.py @@ -251,13 +251,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js diff --git a/src/models/crawlers/lulubar.py b/src/models/crawlers/lulubar.py index 92b8eed..1703287 100644 --- a/src/models/crawlers/lulubar.py +++ b/src/models/crawlers/lulubar.py @@ -89,8 +89,7 @@ def get_mosaic(html): def get_tag(html): - result = html.xpath( - '//div[@class="tag_box d-flex flex-wrap p-1 col-12 mb-1"]/a[@class="tag" and contains(@href,"bytagdetail")]/text()') + result = html.xpath('//div[@class="tag_box d-flex flex-wrap p-1 col-12 mb-1"]/a[@class="tag" and contains(@href,"bytagdetail")]/text()') new_list = [] for a in result: new_list.append(a.strip()) @@ -241,13 +240,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -255,12 +248,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # yapf: disable # print(main('TRE-82')) # 没有背景图,封面图查找路径变了 # print(main('gsad-18')) # 没有结果 - print(main('SSIS-463')) - # print(main('ebod-900')) # 双人 - # print(main('MDHT-0009')) # 国产 - # print(main('GHOV-21')) - # print(main('GHOV-28')) - # print(main('MIAE-346')) - # print(main('STARS-1919')) # poster图片 - # print(main('abw-157')) - # print(main('abs-141')) + print(main('SSIS-463')) # print(main('ebod-900')) # 双人 # print(main('MDHT-0009')) # 国产 # print(main('GHOV-21')) # print(main('GHOV-28')) # print(main('MIAE-346')) # print(main('STARS-1919')) # poster图片 # print(main('abw-157')) # print(main('abs-141')) diff --git a/src/models/crawlers/madouqu.py b/src/models/crawlers/madouqu.py index a3ce862..6c5f61e 100644 --- a/src/models/crawlers/madouqu.py +++ b/src/models/crawlers/madouqu.py @@ -180,13 +180,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js @@ -203,63 +197,5 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('mini06', file_path='mini06.全裸家政.只為弟弟的學費打工.被玩弄的淫亂家政小妹.mini傳媒')) # print(main('mini06.全裸家政.只为弟弟的学费打工.被玩弄的淫乱家政小妹.mini传媒', file_path='mini06.全裸家政.只为弟弟的学费打工.被玩弄的淫乱家政小妹.mini传媒')) # print(main('XSJ138', file_path='XSJ138.养子的秘密教学EP6.薇安姐内射教学.性视界出品')) - print(main('DW-006.AV帝王作品.Roxie出演.地方妈妈的性解放.双穴双屌', file_path='DW-006.AV帝王作品.Roxie出演.地方妈妈的性解放.双穴双屌')) - # print(main('MDJ001-EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.2021麻豆最强跨国合作', file_path='MDJ001-EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.2021麻豆最强跨国合作')) - # print(main('MKY-TN-003.周宁.乱伦黑料流出.最喜欢爸爸的鸡巴了.麻豆传媒MKY系列', file_path='MKY-TN-003.周宁.乱伦黑料流出.最喜欢爸爸的鸡巴了.麻豆传媒MKY系列')) - # print(main('MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样', file_path='MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样')) - # print(main('MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版', file_path='MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版')) - # print(main('MSD-023', file_path='MSD023.袁子仪.杨柳.可爱女孩非亲妹.渴望已久的(非)近亲性爱.麻豆传媒映画.Model.Seeding系列.mp4')) - # print(main('', file_path='夏日回忆 贰')) - # print(main('MDX-0016')) - # print(main('MDSJ-0004')) - # print(main('RS-020')) - # print(main('PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒', file_path='PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒')) - # print(main('老公在外出差家里的娇妻被入室小偷强迫性交 - 美酱')) - # print(main('', file_path='夏日回忆 贰 HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) - # print(main('', file_path='HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) - # print(main('', file_path="【HongKongDoll玩偶姐姐.短篇集.情人节特辑.Valentine's Day Special-cd2")) - # print(main('', file_path='PMC-062 唐茜.綠帽丈夫連同新弟怒操出軌老婆.強拍淫蕩老婆被操 唐茜.ts')) - # print(main('', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画')) - # print(main('淫欲游戏王.EP6', appoint_number='淫欲游戏王.EP5', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) # EP不带.才能搜到 - # print(main('', file_path='PMS-003.职场冰与火.EP3设局.宁静.苏文文.设局我要女人都臣服在我胯下.蜜桃影像传媒')) - # print(main('', file_path='PMS-001 性爱公寓EP04 仨人.蜜桃影像传媒.ts')) - # print(main('', file_path='PMS-001.性爱公寓EP03.ts')) - # print(main('', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.ts')) - # print(main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) - # main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts') - # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱')) # 简体搜不到 - # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木结爱.TS')) - # '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛', '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-', ' 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛'] - # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱.TS')) - # print(main('', file_path='PMS-001 性爱公寓EP02 女王 蜜桃影像传媒 -莉娜乔安.TS')) - # print(main('91CM-081', file_path='91CM-081.田恬.李琼.继母与女儿.三.爸爸不在家先上妹妹再玩弄母亲.果冻传媒.mp4')) - # print(main('91CM-081', file_path='MDJ-0001.EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.麻豆传媒映画.mp4')) - # print(main('91CM-081', file_path='MDJ0001 EP2 AV 淫兽鬼父 陈美惠 .TS')) - # print(main('91CM-081', file_path='MXJ-0005.EP1.弥生美月.小恶魔高校生.与老师共度的放浪补课.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='PH-US-002.色控.音乐老师全裸诱惑.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.TS')) - # print(main('91CM-081', file_path='MD-0140-2.蜜苏.家有性事EP2.爱在身边.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDUS系列[中文字幕].LAX0025.性感尤物渴望激情猛操.RUCK ME LIKE A SEX DOLL.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='REAL野性派001-朋友的女友讓我最上火.TS')) - # print(main('91CM-081', file_path='MDS-009.张芸熙.巨乳旗袍诱惑.搔首弄姿色气满点.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDS005 被雇主强上的熟女家政妇 大声呻吟被操到高潮 杜冰若.mp4.TS')) - # print(main('91CM-081', file_path='TT-005.孟若羽.F罩杯性感巨乳DJ.麻豆出品x宫美娱乐.TS')) - # print(main('91CM-081', file_path='台湾第一女优吴梦梦.OL误上痴汉地铁.惨遭多人轮番奸玩.麻豆传媒映画代理出品.TS')) - # print(main('91CM-081', file_path='PsychoPorn色控.找来大奶姐姐帮我乳交.麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='鲍鱼游戏SquirtGame.吸舔碰糖.失败者屈辱凌辱.TS')) - # print(main('91CM-081', file_path='导演系列 外卖员的色情体验 麻豆传媒映画.TS')) - # print(main('91CM-081', file_path='MDS007 骚逼女友在作妖-硬上男友当玩具 叶一涵.TS')) - # print(main('MDM-002')) # 去掉标题最后的发行商 - # print(main('MDS-007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS - # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 - # print(main('91CM-090')) # 带横线才能搜到 - # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 - # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 - # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) - # print(main('MD-0020')) - # print(main('mds009')) - # print(main('mds02209')) - # print(main('女王的SM调教')) - # print(main('91CM202')) - # print(main('91CM-202')) + print(main('DW-006.AV帝王作品.Roxie出演.地方妈妈的性解放.双穴双屌', + file_path='DW-006.AV帝王作品.Roxie出演.地方妈妈的性解放.双穴双屌')) # print(main('MDJ001-EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.2021麻豆最强跨国合作', file_path='MDJ001-EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.2021麻豆最强跨国合作')) # print(main('MKY-TN-003.周宁.乱伦黑料流出.最喜欢爸爸的鸡巴了.麻豆传媒MKY系列', file_path='MKY-TN-003.周宁.乱伦黑料流出.最喜欢爸爸的鸡巴了.麻豆传媒MKY系列')) # print(main('MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样', file_path='MAN麻豆女性向系列.MAN-0011.岚湘庭.当男人恋爱时.我可以带你去流浪.也知道下场不怎么样')) # print(main('MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版', file_path='MDL-0009-2.楚梦舒.苏语棠.致八零年代的我们.年少的性欲和冲动.麻豆传媒映画原创中文收藏版')) # print(main('MSD-023', file_path='MSD023.袁子仪.杨柳.可爱女孩非亲妹.渴望已久的(非)近亲性爱.麻豆传媒映画.Model.Seeding系列.mp4')) # print(main('', file_path='夏日回忆 贰')) # print(main('MDX-0016')) # print(main('MDSJ-0004')) # print(main('RS-020')) # print(main('PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒', file_path='PME-018.雪霏.禽兽小叔迷奸大嫂.性感身材任我玩弄.蜜桃影像传媒')) # print(main('老公在外出差家里的娇妻被入室小偷强迫性交 - 美酱')) # print(main('', file_path='夏日回忆 贰 HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) # print(main('', file_path='HongKongDoll玩偶姐姐.短篇集.夏日回忆 贰.Summer Memories.Part 2.mp4')) # print(main('', file_path="【HongKongDoll玩偶姐姐.短篇集.情人节特辑.Valentine's Day Special-cd2")) # print(main('', file_path='PMC-062 唐茜.綠帽丈夫連同新弟怒操出軌老婆.強拍淫蕩老婆被操 唐茜.ts')) # print(main('', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画')) # print(main('淫欲游戏王.EP6', appoint_number='淫欲游戏王.EP5', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) # EP不带.才能搜到 # print(main('', file_path='PMS-003.职场冰与火.EP3设局.宁静.苏文文.设局我要女人都臣服在我胯下.蜜桃影像传媒')) # print(main('', file_path='PMS-001 性爱公寓EP04 仨人.蜜桃影像传媒.ts')) # print(main('', file_path='PMS-001.性爱公寓EP03.ts')) # print(main('', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.ts')) # print(main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts')) # main('', file_path='淫欲游戏王.EP6.情欲射龙门.性爱篇.郭童童.李娜.双英战龙根3P混战.麻豆传媒映画.ts') # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱')) # 简体搜不到 # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木结爱.TS')) # '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛', '麻豆傳媒映畫原版 兔子先生 拉麵店搭訕超可愛少女下-', ' 兔子先生 拉麵店搭訕超可愛少女下-柚木結愛'] # print(main('', file_path='麻豆傳媒映畫原版 兔子先生 我的女友是女優 女友是AV女優是怎樣的體驗-美雪樱.TS')) # print(main('', file_path='PMS-001 性爱公寓EP02 女王 蜜桃影像传媒 -莉娜乔安.TS')) # print(main('91CM-081', file_path='91CM-081.田恬.李琼.继母与女儿.三.爸爸不在家先上妹妹再玩弄母亲.果冻传媒.mp4')) # print(main('91CM-081', file_path='MDJ-0001.EP3.陈美惠.淫兽寄宿家庭.我和日本父子淫乱的一天.麻豆传媒映画.mp4')) # print(main('91CM-081', file_path='MDJ0001 EP2 AV 淫兽鬼父 陈美惠 .TS')) # print(main('91CM-081', file_path='MXJ-0005.EP1.弥生美月.小恶魔高校生.与老师共度的放浪补课.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MKY-HS-004.周寗.催情民宿.偷下春药3P干爆夫妇.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='PH-US-002.色控.音乐老师全裸诱惑.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDX-0236-02.沈娜娜.青梅竹马淫乱3P.麻豆传媒映画x逼哩逼哩blibli.TS')) # print(main('91CM-081', file_path='MD-0140-2.蜜苏.家有性事EP2.爱在身边.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDUS系列[中文字幕].LAX0025.性感尤物渴望激情猛操.RUCK ME LIKE A SEX DOLL.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='REAL野性派001-朋友的女友讓我最上火.TS')) # print(main('91CM-081', file_path='MDS-009.张芸熙.巨乳旗袍诱惑.搔首弄姿色气满点.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDS005 被雇主强上的熟女家政妇 大声呻吟被操到高潮 杜冰若.mp4.TS')) # print(main('91CM-081', file_path='TT-005.孟若羽.F罩杯性感巨乳DJ.麻豆出品x宫美娱乐.TS')) # print(main('91CM-081', file_path='台湾第一女优吴梦梦.OL误上痴汉地铁.惨遭多人轮番奸玩.麻豆传媒映画代理出品.TS')) # print(main('91CM-081', file_path='PsychoPorn色控.找来大奶姐姐帮我乳交.麻豆传媒映画.TS')) # print(main('91CM-081', file_path='鲍鱼游戏SquirtGame.吸舔碰糖.失败者屈辱凌辱.TS')) # print(main('91CM-081', file_path='导演系列 外卖员的色情体验 麻豆传媒映画.TS')) # print(main('91CM-081', file_path='MDS007 骚逼女友在作妖-硬上男友当玩具 叶一涵.TS')) # print(main('MDM-002')) # 去掉标题最后的发行商 # print(main('MDS-007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 # print(main('91CM-090')) # 带横线才能搜到 # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) # print(main('MD-0020')) # print(main('mds009')) # print(main('mds02209')) # print(main('女王的SM调教')) # print(main('91CM202')) # print(main('91CM-202')) diff --git a/src/models/crawlers/mdtv.py b/src/models/crawlers/mdtv.py index 0f1b166..5c56ecf 100644 --- a/src/models/crawlers/mdtv.py +++ b/src/models/crawlers/mdtv.py @@ -253,8 +253,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file release = get_release(cover_url) year = get_year(release) studio = get_studio(series, tag, lable_list) - number, title = get_real_number_title(number, title, number_list, appoint_number, appoint_url, lable_list, - tag, actor, series) + number, title = get_real_number_title(number, title, number_list, appoint_number, appoint_url, lable_list, tag, actor, series) try: dic = { @@ -309,13 +308,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) return js @@ -372,18 +365,5 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('', file_path='WTB-075 酒店妹包养软饭男 为了让他振作只好以身相许 乌托邦.ts')) # 标题里有\t # print(main('', file_path='杏吧八戒1 - 3000约操18岁大一新生,苗条身材白嫩紧致.ts')) # 分词匹配,带标点或者整个标题去匹配 # print(main('', file_path='萝莉社 女大学生找模特兼职 被要求裸露拍摄 被套路内射.ts')) # 分词匹配,带标点或者整个标题去匹配 - print(main('', file_path='/sp/sp6/国产测试/RS-023 AV1.mp4')) - # print(main('MDM-002')) # 去掉标题最后的发行商 - # print(main('MDS-0007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS - # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 - # print(main('91CM-090')) # 带横线才能搜到 - # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 - # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 - # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) - # print(main('MD-0020')) - # print(main('mds009')) - # print(main('女王的SM调教')) - # print(main('91CM202')) - # print(main('必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts', file_path='必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts')) - # print(main('', file_path='素人自制舒舒 富婆偷情被偷拍 亏大了!50W买个视频还被操.ts')) - # print(main('', file_path='/sp/sp3/国产/2021年10月份 國產原創原版合集/20211003 91CM-191 你好同学ep5 MSD011/[c0e0.com]实战现场 .TS')) + print(main('', + file_path='/sp/sp6/国产测试/RS-023 AV1.mp4')) # print(main('MDM-002')) # 去掉标题最后的发行商 # print(main('MDS-0007')) # 数字要四位才能搜索到,即 MDS-0007 MDJ001 EP1 我的女优物语陈美惠.TS # print(main('MDS-007', file_path='MDJ001 EP1 我的女优物语陈美惠.TS')) # 数字要四位才能搜索到,即 MDJ-0001.EP1 # print(main('91CM-090')) # 带横线才能搜到 # print(main('台湾SWAG chloebabe 剩蛋特辑 干爆小鹿')) # 带空格才能搜到 # print(main('淫欲游戏王EP2')) # 不带空格才能搜到 # print(main('台湾SWAG-chloebabe-剩蛋特輯-幹爆小鹿')) # print(main('MD-0020')) # print(main('mds009')) # print(main('女王的SM调教')) # print(main('91CM202')) # print(main('必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts', file_path='必射客 没钱买披萨只好帮外送员解决问题 大象传媒.ts')) # print(main('', file_path='素人自制舒舒 富婆偷情被偷拍 亏大了!50W买个视频还被操.ts')) # print(main('', file_path='/sp/sp3/国产/2021年10月份 國產原創原版合集/20211003 91CM-191 你好同学ep5 MSD011/[c0e0.com]实战现场 .TS')) diff --git a/src/models/crawlers/mgstage.py b/src/models/crawlers/mgstage.py index a0c5396..65225f2 100644 --- a/src/models/crawlers/mgstage.py +++ b/src/models/crawlers/mgstage.py @@ -25,11 +25,9 @@ def getTitle(html): def getActor(html): - result = str(html.xpath('//th[contains(text(),"出演")]/../td/a/text()')).replace('\\n', '').strip(" ['']").replace( - '/', ',').replace('\'', '').replace(' ', '') + result = str(html.xpath('//th[contains(text(),"出演")]/../td/a/text()')).replace('\\n', '').strip(" ['']").replace('/', ',').replace('\'', '').replace(' ', '') if not result: - result = str(html.xpath('//th[contains(text(),"出演")]/../td/text()')).replace('\\n', '').strip( - " ['']").replace('/', ',').replace('\'', '').replace(' ', '') + result = str(html.xpath('//th[contains(text(),"出演")]/../td/text()')).replace('\\n', '').strip(" ['']").replace('/', ',').replace('\'', '').replace(' ', '') return result @@ -247,13 +245,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp', short_n 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js diff --git a/src/models/crawlers/mmtv.py b/src/models/crawlers/mmtv.py index 10628a0..0c695ae 100644 --- a/src/models/crawlers/mmtv.py +++ b/src/models/crawlers/mmtv.py @@ -297,13 +297,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -318,12 +312,5 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('c0930-ki221218')) # print(main('c0930-hitozuma1407')) # print(main('h0930-ori1665')) - print(main('h0930-ori1665', appoint_url='https://7mm002.com/zh/amateur_content/107108/content.html')) - # print(main('RBD-293')) - # print(main('LUXU-728')) # 无结果 - # print(main('fc2-1050737')) # 标题中有/ - # print(main('fc2-2724807')) - # print(main('luxu-1257')) - # print(main('heyzo-1031')) - # print(main('ABP-905')) - # print(main('heyzo-1031', '')) + print(main('h0930-ori1665', + appoint_url='https://7mm002.com/zh/amateur_content/107108/content.html')) # print(main('RBD-293')) # print(main('LUXU-728')) # 无结果 # print(main('fc2-1050737')) # 标题中有/ # print(main('fc2-2724807')) # print(main('luxu-1257')) # print(main('heyzo-1031')) # print(main('ABP-905')) # print(main('heyzo-1031', '')) diff --git a/src/models/crawlers/mywife.py b/src/models/crawlers/mywife.py index 41ab130..9c1c471 100644 --- a/src/models/crawlers/mywife.py +++ b/src/models/crawlers/mywife.py @@ -33,8 +33,7 @@ def get_first_url(html, key): def get_second_url(html): - result = html.xpath( - '//a[@class="wp-block-button__link has-luminous-vivid-amber-to-luminous-vivid-orange-gradient-background has-background"]/@href') + result = html.xpath('//a[@class="wp-block-button__link has-luminous-vivid-amber-to-luminous-vivid-orange-gradient-background has-background"]/@href') return result[0] if result else '' @@ -96,12 +95,7 @@ def get_wiki_data(): actor = each.xpath("div[@class='wiki-section-body-3']/a[@rel='nofollow']/text()") if actor: actor = actor[0] - mywife_dic[number_id] = { - 'number': number_id, - 'actor': actor, - 'poster': poster, - 'website': website, - } + mywife_dic[number_id] = {'number': number_id, 'actor': actor, 'poster': poster, 'website': website, } return mywife_dic except: # print(traceback.format_exc()) @@ -305,13 +299,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -323,10 +311,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): # print(main('mywife-1525')) # 无 No. # print(main('mywife-1578')) # 无 No. # print(main('mywife-1370')) # 无 No. - print(main('mywife-1307')) # 无 No. - # print(main('mywife-1161')) # 无 No. 其实是 No - # print(main('mywife No.1161')) - # print(main('mywife No.1164')) - # print(main('mywife No.1167')) - # print(main('mywife No.1171')) - # print(main('mywife No.1229')) + print(main('mywife-1307')) # 无 No. # print(main('mywife-1161')) # 无 No. 其实是 No # print(main('mywife No.1161')) # print(main('mywife No.1164')) # print(main('mywife No.1167')) # print(main('mywife No.1171')) # print(main('mywife No.1229')) diff --git a/src/models/crawlers/official.py b/src/models/crawlers/official.py index 7b7efb4..ef3fed2 100644 --- a/src/models/crawlers/official.py +++ b/src/models/crawlers/official.py @@ -24,8 +24,7 @@ def get_title(html): def get_actor(html): - actor_list = html.xpath( - '//a[@class="c-tag c-main-bg-hover c-main-font c-main-bd" and contains(@href, "/actress/")]/text()') + actor_list = html.xpath('//a[@class="c-tag c-main-bg-hover c-main-font c-main-bd" and contains(@href, "/actress/")]/text()') new_list = [each.strip() for each in actor_list] return ','.join(new_list) @@ -225,33 +224,9 @@ def main(number, appoint_url='', log_info='', req_web='', language=''): except Exception as e: # print(traceback.format_exc()) debug_info = str(e) - dic = { - 'title': '', - 'cover': '', - 'website': '', - 'log_info': log_info, - 'error_info': debug_info, - 'req_web': req_web, - } - dic = { - 'official': { - 'zh_cn': dic, - 'zh_tw': dic, - 'jp': dic - }, - website_name: { - 'zh_cn': dic, - 'zh_tw': dic, - 'jp': dic - }, - } - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + dic = {'title': '', 'cover': '', 'website': '', 'log_info': log_info, 'error_info': debug_info, 'req_web': req_web, } + dic = {'official': {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}, website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}, } + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -283,6 +258,4 @@ def main(number, appoint_url='', log_info='', req_web='', language=''): # print(main('SSIS-623', '')) # print(main('MIDV-002', '')) # print(main('MIDV256', '')) - print(main('SSNI-531')) - # print(main('SSIS-090', '')) - # print(main('SNIS-016', '')) + print(main('SSNI-531')) # print(main('SSIS-090', '')) # print(main('SNIS-016', '')) diff --git a/src/models/crawlers/prestige.py b/src/models/crawlers/prestige.py index 4a4e83f..4ca134c 100644 --- a/src/models/crawlers/prestige.py +++ b/src/models/crawlers/prestige.py @@ -209,31 +209,12 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): 'error_info': debug_info, 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } - dic = { - 'official': { - 'zh_cn': dic, - 'zh_tw': dic, - 'jp': dic - }, - website_name: { - 'zh_cn': dic, - 'zh_tw': dic, - 'jp': dic - }, - } - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + dic = {'official': {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}, website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}, } + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js if __name__ == '__main__': # yapf: disable # print(main('abw-130')) - print(main('FCP-150')) - # print(main('fakwm-064', appoint_url='https://www.prestige-av.com/goods/dcb86b74-195b-46c4-8ced-71f5f3ce5c3c?skuId=ABW-344')) # 有导演 - # print(main('ABW-343')) # 无图 + print(main('FCP-150')) # print(main('fakwm-064', appoint_url='https://www.prestige-av.com/goods/dcb86b74-195b-46c4-8ced-71f5f3ce5c3c?skuId=ABW-344')) # 有导演 # print(main('ABW-343')) # 无图 diff --git a/src/models/crawlers/theporndb.py b/src/models/crawlers/theporndb.py index b773a48..1da21d7 100644 --- a/src/models/crawlers/theporndb.py +++ b/src/models/crawlers/theporndb.py @@ -392,17 +392,16 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file except: # print(traceback.format_exc()) req_web = req_web + '(%ss) ' % (round((time.time() - start_time), )) - return theporndb_movies.main(number, appoint_url=appoint_url, log_info=log_info, req_web=req_web, - language='zh_cn', file_path=file_path, appoint_number=appoint_number) + return theporndb_movies.main(number, + appoint_url=appoint_url, + log_info=log_info, + req_web=req_web, + language='zh_cn', + file_path=file_path, + appoint_number=appoint_number) dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js @@ -428,26 +427,5 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file # print(main('', file_path='AdultTime.20.02.17.Angela.White.Full.Body.Physical.Exam.XXX.1080p.MP4-KTR.mp4')) # 无命中演员,视为失败 # print(main('', file_path='SexArt_12.04.13-Elle Alexandra & Lexi Bloom & Malena Morgan-Stepping-Out_SexArt-1080p.mp4')) # 多个,按相似度命中 # print(main('', file_path='SexArt.12.04.13 Sex Art.mp4')) # 多个,按相似度命中 - print(main('nubilefilms-all-work-and-no-play', file_path='')) - # print(main('', file_path='SexArt_12.04.13-Elle Alexandra & Malena Morgan-Under-The-Elle-Tree_SexArt-1080p.mp4')) # 多个,按相似度命中 - # print(main('', file_path='SexArt_12.04.13-Elle Alexandra & Rilee Marks-Whispers_SexArt-1080p.mp4')) # 多个,按相似度命中 - # print(main('', file_path='SexArt_12.04.13-Hayden Hawkens & Malena Morgan-Golden_SexArt-1080p.mp4')) # 多个,按相似度命中 - # print(main('', file_path='SexArt_12.04.13-Hayden Hawkens-Butterfly-Blue_SexArt-1080p.mp4')) # 多个,按相似度命中 - # print(main('', file_path='SexArt_12.04.13-Lexi Bloom & Logan Pierce-My-First_SexArt-1080p.mp4')) # 多个,按相似度命中 - # print(main('', file_path='LittleCaprice-Dreams.23.02.18.sky.pierce.and.little.caprice.nasstyx.4k.mp4')) # 日期不对,缺失演员,标题名顺序不匹配,待调研方案 - # print(main('', file_path='LittleCaprice-Dreams.23.02.18.nasstyx.little.caprice.sky.pierce.max.4k.mp4')) # 缺失演员 - # print(main('', file_path='ClubSeventeen.18.09.24.Alecia.Fox.Hardcore.XXX.2160p.MP4-KTR[rarbg].mp4')) # 系列转换 - # print(main('', file_path='ClubSeventeen.18.06.11.Alecia.Fox.And.Gia.Mulino.Lesbian.XXX.2160p.MP4-KTR[rarbg].mp4')) # 系列转换 - # print(main('', file_path='ClubSeventeen.18.07.23.Alecia.Fox.And.Angela.Allison.Lesbian.XXX.2160p.MP4-KTR[rarbg].mp4')) # 系列转换 - # print(main('', file_path='ClubSeventeen.18.10.09.Alecia.Fox.Solo.XXX.2160p.MP4-KTR[rarbg].mp4')) # 多个,按相似度命中 - # print(main('', file_path='WhiteTeensBlackCocks.17.07.09.Alecia.Fox.XXX.2160p.MP4-KTR[rarbg].mp4')) # 缺失资源 - # print(main('', file_path='Z:\\分类\\A-日本系列-1080P\\working2\\问题\\blacked.23.02.04.agatha.vega.lika.star.and.jazlyn.ray.mp4')) # 缺失资源 - # print(main('', file_path='brazzersexxtra.23.02.09.aria.lee.and.lulu.chu.pervy.practices.part.1.mp4')) - # print(main('', file_path='brazzersexxtra.23.02.09.lulu.chu.pervy.practices.part.2..mp4')) - # print(main('blacked-2015-03-22-karla-kush', file_path='blacked-2015-03-22-karla-kush.ts')) - # print(main('', file_path='tft-2019-01-14-rachael-cavalli-my-teachers-secrets.ts')) - # print(main('', file_path='hussie-pass-bts-new-boobies-a-brand-new-girl.ts')) # 演员没有性别 - # print(main('SWhores.23.02.14', file_path='SWhores.23.02.14..Anal Girl with No Extras.1080P.ts')) # 未获取到演员 - # print(main('', file_path='/test/work/CzechStreets.2019-01-01.18 Y O Virtuoso with Ddd Tits.Nada.mp4')) # 未获取到演员 - # print(main('Evolvedfights.20.10.30', - # file_path='AARM-018 - 2021-09-28 - 未知演员 - アロマ企画,アロマ企画/evolvedfights.20.10.30.kay.carter.vs.nathan.bronson.mp4')) + print(main('nubilefilms-all-work-and-no-play', + file_path='')) # print(main('', file_path='SexArt_12.04.13-Elle Alexandra & Malena Morgan-Under-The-Elle-Tree_SexArt-1080p.mp4')) # 多个,按相似度命中 # print(main('', file_path='SexArt_12.04.13-Elle Alexandra & Rilee Marks-Whispers_SexArt-1080p.mp4')) # 多个,按相似度命中 # print(main('', file_path='SexArt_12.04.13-Hayden Hawkens & Malena Morgan-Golden_SexArt-1080p.mp4')) # 多个,按相似度命中 # print(main('', file_path='SexArt_12.04.13-Hayden Hawkens-Butterfly-Blue_SexArt-1080p.mp4')) # 多个,按相似度命中 # print(main('', file_path='SexArt_12.04.13-Lexi Bloom & Logan Pierce-My-First_SexArt-1080p.mp4')) # 多个,按相似度命中 # print(main('', file_path='LittleCaprice-Dreams.23.02.18.sky.pierce.and.little.caprice.nasstyx.4k.mp4')) # 日期不对,缺失演员,标题名顺序不匹配,待调研方案 # print(main('', file_path='LittleCaprice-Dreams.23.02.18.nasstyx.little.caprice.sky.pierce.max.4k.mp4')) # 缺失演员 # print(main('', file_path='ClubSeventeen.18.09.24.Alecia.Fox.Hardcore.XXX.2160p.MP4-KTR[rarbg].mp4')) # 系列转换 # print(main('', file_path='ClubSeventeen.18.06.11.Alecia.Fox.And.Gia.Mulino.Lesbian.XXX.2160p.MP4-KTR[rarbg].mp4')) # 系列转换 # print(main('', file_path='ClubSeventeen.18.07.23.Alecia.Fox.And.Angela.Allison.Lesbian.XXX.2160p.MP4-KTR[rarbg].mp4')) # 系列转换 # print(main('', file_path='ClubSeventeen.18.10.09.Alecia.Fox.Solo.XXX.2160p.MP4-KTR[rarbg].mp4')) # 多个,按相似度命中 # print(main('', file_path='WhiteTeensBlackCocks.17.07.09.Alecia.Fox.XXX.2160p.MP4-KTR[rarbg].mp4')) # 缺失资源 # print(main('', file_path='Z:\\分类\\A-日本系列-1080P\\working2\\问题\\blacked.23.02.04.agatha.vega.lika.star.and.jazlyn.ray.mp4')) # 缺失资源 # print(main('', file_path='brazzersexxtra.23.02.09.aria.lee.and.lulu.chu.pervy.practices.part.1.mp4')) # print(main('', file_path='brazzersexxtra.23.02.09.lulu.chu.pervy.practices.part.2..mp4')) # print(main('blacked-2015-03-22-karla-kush', file_path='blacked-2015-03-22-karla-kush.ts')) # print(main('', file_path='tft-2019-01-14-rachael-cavalli-my-teachers-secrets.ts')) # print(main('', file_path='hussie-pass-bts-new-boobies-a-brand-new-girl.ts')) # 演员没有性别 # print(main('SWhores.23.02.14', file_path='SWhores.23.02.14..Anal Girl with No Extras.1080P.ts')) # 未获取到演员 # print(main('', file_path='/test/work/CzechStreets.2019-01-01.18 Y O Virtuoso with Ddd Tits.Nada.mp4')) # 未获取到演员 # print(main('Evolvedfights.20.10.30', # file_path='AARM-018 - 2021-09-28 - 未知演员 - アロマ企画,アロマ企画/evolvedfights.20.10.30.kay.carter.vs.nathan.bronson.mp4')) diff --git a/src/models/crawlers/theporndb_movies.py b/src/models/crawlers/theporndb_movies.py index 9277163..c06384e 100644 --- a/src/models/crawlers/theporndb_movies.py +++ b/src/models/crawlers/theporndb_movies.py @@ -400,13 +400,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn', file 'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )), } dic = {website_name: {'zh_cn': dic, 'zh_tw': dic, 'jp': dic}} - js = json.dumps( - dic, - ensure_ascii=False, - sort_keys=False, - indent=4, - separators=(',', ': '), - ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=False, indent=4, separators=(',', ': '), ) # .encode('UTF-8') return js diff --git a/src/models/crawlers/xcity.py b/src/models/crawlers/xcity.py index 477918a..81e7fb3 100644 --- a/src/models/crawlers/xcity.py +++ b/src/models/crawlers/xcity.py @@ -130,9 +130,7 @@ def getDirector(html): def getExtrafanart(html): result = html.xpath('//a[contains(@class, "thumb")]/@href') if result: - result = str(result).replace('//faws.xcity.jp/scene/small/', 'https://faws.xcity.jp/').strip(' []').replace("'", - '').replace( - ', ', ',') + result = str(result).replace('//faws.xcity.jp/scene/small/', 'https://faws.xcity.jp/').strip(' []').replace("'", '').replace(', ', ',') result = result.split(',') else: result = '' @@ -175,8 +173,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): try: if not real_url: - url_search = 'https://xcity.jp/result_published/?q=' + \ - number.replace('-', '') + url_search = 'https://xcity.jp/result_published/?q=' + number.replace('-', '') debug_info = '搜索地址: %s ' % url_search log_info += web_info + debug_info @@ -287,13 +284,4 @@ def main(number, appoint_url='', log_info='', req_web='', language='jp'): if __name__ == '__main__': - print(main('STVF010')) - # print(main('MXGS563')) - # print(main('xc-1280')) - # print(main('xv-163')) - # print(main('sea-081')) - # print(main('IA-28')) - # print(main('xc-1298')) - # print(main('DMOW185')) - # print(main('EMOT007')) - # print(main('EMOT007', "https://xcity.jp/avod/detail/?id=147036")) + print(main('STVF010')) # print(main('MXGS563')) # print(main('xc-1280')) # print(main('xv-163')) # print(main('sea-081')) # print(main('IA-28')) # print(main('xc-1298')) # print(main('DMOW185')) # print(main('EMOT007')) # print(main('EMOT007', "https://xcity.jp/avod/detail/?id=147036")) diff --git a/src/models/tools/actress_db.py b/src/models/tools/actress_db.py index 0c1a4b6..1478da0 100644 --- a/src/models/tools/actress_db.py +++ b/src/models/tools/actress_db.py @@ -43,8 +43,7 @@ def update_actor_info_from_db(cls, actor_info: EMbyActressInfo) -> bool: return False name, alias = s show_log_text(f" ✅ 数据库中存在姓名: {alias}") - res = cur.execute( - f"select Href,Cup,Height,Bust,Waist,Hip,Birthday,Birthplace,Account,CareerPeriod from Info where Name = '{name}'") + res = cur.execute(f"select Href,Cup,Height,Bust,Waist,Hip,Birthday,Birthplace,Account,CareerPeriod from Info where Name = '{name}'") href, cup, height, bust, waist, hip, birthday, birthplace, account, career_period = res.fetchone() cur.close() # 添加标签 diff --git a/src/models/tools/emby_actor_image.py b/src/models/tools/emby_actor_image.py index 4f49114..37ea73e 100644 --- a/src/models/tools/emby_actor_image.py +++ b/src/models/tools/emby_actor_image.py @@ -57,8 +57,7 @@ def _get_emby_actor_list(): result, response = get_html(url, proxies=False, json_data=True) if not result: - signal.show_log_text( - f'🔴 {server_name} 连接失败!请检查 {server_name} 地址 和 API 密钥是否正确填写! {response}') + signal.show_log_text(f'🔴 {server_name} 连接失败!请检查 {server_name} 地址 和 API 密钥是否正确填写! {response}') signal.show_log_text(traceback.format_exc()) actor_list = response['Items'] @@ -74,13 +73,9 @@ def _upload_actor_photo(url, pic_path): b6_pic = base64.b64encode(f.read()) # 读取文件内容, 转换为base64编码 if pic_path.endswith('jpg'): - header = { - "Content-Type": 'image/jpeg', - } + header = {"Content-Type": 'image/jpeg', } else: - header = { - "Content-Type": 'image/png', - } + header = {"Content-Type": 'image/png', } requests.post(url=url, data=b6_pic, headers=header) return True except: @@ -191,14 +186,7 @@ def _get_gfriends_actor_data(): actor_url = f'{raw_url}/master/Content/{each_key}/{value}' new_gfriends_actor_data[key] = actor_url with open(gfriends_json_path, 'w', encoding='utf-8') as f: - json.dump( - new_gfriends_actor_data, - f, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': '), - ) + json.dump(new_gfriends_actor_data, f, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '), ) return new_gfriends_actor_data else: return _get_local_actor_photo() @@ -226,8 +214,7 @@ def _get_graphis_pic(actor_name): backdrop_path = big_old if 'graphis_backgrop' not in emby_on: backdrop_path = fix_old - url = f'https://graphis.ne.jp/monthly/?S=1&K={actor_name}' - # https://graphis.ne.jp/monthly/?S=1&K=夢乃あいか + url = f'https://graphis.ne.jp/monthly/?S=1&K={actor_name}' # https://graphis.ne.jp/monthly/?S=1&K=夢乃あいか # 查看本地有没有缓存 logs = '' @@ -330,12 +317,10 @@ def _update_emby_actor_photo_execute(actor_list, gfriends_actor_data): pic_path = gfriends_actor_data.get(f'{jp_name}.png') if not pic_path: if actor_imagetages: - signal.show_log_text( - f'\n{deal_percent} ✅ {i}/{count_all} 没有找到头像!继续使用原有头像! 👩🏻 {actor_name} {logs}\n{actor_homepage}') + signal.show_log_text(f'\n{deal_percent} ✅ {i}/{count_all} 没有找到头像!继续使用原有头像! 👩🏻 {actor_name} {logs}\n{actor_homepage}') succ += 1 continue - signal.show_log_text( - f'\n{deal_percent} 🔴 {i}/{count_all} 没有找到头像! 👩🏻 {actor_name} {logs}\n{actor_homepage}') + signal.show_log_text(f'\n{deal_percent} 🔴 {i}/{count_all} 没有找到头像! 👩🏻 {actor_name} {logs}\n{actor_homepage}') fail += 1 continue else: @@ -348,8 +333,7 @@ def _update_emby_actor_photo_execute(actor_list, gfriends_actor_data): file_path = os.path.join(actor_folder, file_name) if not os.path.isfile(file_path): if not download_file_with_filepath({'logs': ''}, pic_path, file_path, actor_folder): - signal.show_log_text( - f'\n{deal_percent} 🔴 {i}/{count_all} 头像下载失败! 👩🏻 {actor_name} {logs}\n{actor_homepage}') + signal.show_log_text(f'\n{deal_percent} 🔴 {i}/{count_all} 头像下载失败! 👩🏻 {actor_name} {logs}\n{actor_homepage}') fail += 1 continue pic_path = file_path @@ -375,15 +359,12 @@ def _update_emby_actor_photo_execute(actor_list, gfriends_actor_data): logs += ' ✅ 使用 Gfriends 头像和背景!' else: logs += ' ✅ 使用本地头像库头像和背景!' - signal.show_log_text( - f'\n{deal_percent} ✅ {i}/{count_all} 头像更新成功! 👩🏻 {actor_name} {logs}\n{actor_homepage}') + signal.show_log_text(f'\n{deal_percent} ✅ {i}/{count_all} 头像更新成功! 👩🏻 {actor_name} {logs}\n{actor_homepage}') succ += 1 else: - signal.show_log_text( - f'\n{deal_percent} 🔴 {i}/{count_all} 头像上传失败! 👩🏻 {actor_name} {logs}\n{actor_homepage}') + signal.show_log_text(f'\n{deal_percent} 🔴 {i}/{count_all} 头像上传失败! 👩🏻 {actor_name} {logs}\n{actor_homepage}') fail += 1 - signal.show_log_text( - f'\n\n 🎉🎉🎉 演员头像补全完成!用时: {get_used_time(start_time)}秒 成功: {succ} 失败: {fail} 跳过: {skip}\n') + signal.show_log_text(f'\n\n 🎉🎉🎉 演员头像补全完成!用时: {get_used_time(start_time)}秒 成功: {succ} 失败: {fail} 跳过: {skip}\n') def _get_local_actor_photo(): @@ -408,5 +389,6 @@ def _get_local_actor_photo(): return False return local_actor_photo_dic + if __name__ == '__main__': - _get_gfriends_actor_data() \ No newline at end of file + _get_gfriends_actor_data() diff --git a/src/models/tools/emby_actor_info.py b/src/models/tools/emby_actor_info.py index a690351..5c0f51a 100644 --- a/src/models/tools/emby_actor_info.py +++ b/src/models/tools/emby_actor_info.py @@ -26,8 +26,7 @@ from models.data_models import EMbyActressInfo from models.signals import signal from models.tools.actress_db import ActressDB -from models.tools.emby_actor_image import _generate_server_url, _get_emby_actor_list, _get_gfriends_actor_data, \ - update_emby_actor_photo +from models.tools.emby_actor_image import _generate_server_url, _get_emby_actor_list, _get_gfriends_actor_data, update_emby_actor_photo def creat_kodi_actors(add: bool): @@ -73,12 +72,10 @@ def update_emby_actor_info(): # 已有资料时跳过 # http://192.168.5.191:8096/emby/Persons/梦乃爱华?api_key=ee9a2f2419704257b1dd60b975f2d64e - actor_homepage, actor_person, pic_url, backdrop_url, backdrop_url_0, update_url = _generate_server_url( - actor) + actor_homepage, actor_person, pic_url, backdrop_url, backdrop_url_0, update_url = _generate_server_url(actor) result, res = get_html(actor_person, proxies=False, json_data=True) if not result: - signal.show_log_text( - f"🔴 {i}/{total} {actor_name}: {server_name} 获取演员信息错误!\n 错误信息: {res}") + signal.show_log_text(f"🔴 {i}/{total} {actor_name}: {server_name} 获取演员信息错误!\n 错误信息: {res}") continue if res.get('Overview') and 'actor_info_miss' in emby_on: signal.show_log_text(f"✅ {i}/{total} {actor_name}: {server_name} 已有演员信息!跳过!") @@ -168,8 +165,7 @@ def show_emby_actor_list(mode): for actor_js in actor_list: actor_name = actor_js['Name'] actor_imagetages = actor_js["ImageTags"] - actor_homepage, actor_person, pic_url, backdrop_url, backdrop_url_0, update_url = _generate_server_url( - actor_js) + actor_homepage, actor_person, pic_url, backdrop_url, backdrop_url_0, update_url = _generate_server_url(actor_js) # http://192.168.5.191:8096/web/index.html#!/item?id=2146&serverId=57cdfb2560294a359d7778e7587cdc98 if actor_imagetages: @@ -195,8 +191,7 @@ def show_emby_actor_list(mode): # http://192.168.5.191:8096/emby/Persons/梦乃爱华?api_key=ee9a2f2419704257b1dd60b975f2d64e result, res = get_html(actor_person, proxies=False, json_data=True) if not result: - signal.show_log_text( - f"\n🔴 {count}/{total} Emby 获取演员信息错误!👩🏻 {actor_name} \n 错误信息: {res}") + signal.show_log_text(f"\n🔴 {count}/{total} Emby 获取演员信息错误!👩🏻 {actor_name} \n 错误信息: {res}") continue overview = res.get('Overview') @@ -207,40 +202,32 @@ def show_emby_actor_list(mode): if mode == 1: if actor_imagetages and overview: - signal.show_log_text( - f"\n✅ {count}/{total} 已有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n✅ {count}/{total} 已有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") succ += 1 elif actor_imagetages: - signal.show_log_text( - f"\n🔴 {count}/{total} 没有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n🔴 {count}/{total} 没有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") fail_noinfo += 1 elif overview: - signal.show_log_text( - f"\n🔴 {count}/{total} 已有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n🔴 {count}/{total} 已有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") fail_nopic += 1 else: - signal.show_log_text( - f"\n🔴 {count}/{total} 没有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n🔴 {count}/{total} 没有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") fail += 1 count += 1 elif mode == 2 and actor_imagetages and overview: - signal.show_log_text( - f"\n✅ {count}/{total} 已有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n✅ {count}/{total} 已有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") count += 1 succ += 1 elif mode == 3 and not actor_imagetages and overview: - signal.show_log_text( - f"\n🔴 {count}/{total} 已有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n🔴 {count}/{total} 已有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") count += 1 fail_nopic += 1 elif mode == 4 and actor_imagetages and not overview: - signal.show_log_text( - f"\n🔴 {count}/{total} 没有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n🔴 {count}/{total} 没有信息!已有头像! 👩🏻 {actor_name} \n{actor_homepage}") count += 1 fail_noinfo += 1 elif mode == 5 and not actor_imagetages and not overview: - signal.show_log_text( - f"\n🔴 {count}/{total} 没有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") + signal.show_log_text(f"\n🔴 {count}/{total} 没有信息!没有头像! 👩🏻 {actor_name} \n{actor_homepage}") count += 1 fail += 1 elif mode == 6 and overview: @@ -252,8 +239,7 @@ def show_emby_actor_list(mode): signal.show_log_text(f'\n\n🎉🎉🎉 查询完成! 用时: {get_used_time(start_time)}秒') if mode == 1: - signal.show_log_text( - f'👩🏻 演员数量: {total} ✅ 有头像有信息: {succ} 🔴 有头像没信息: {fail_noinfo} 🔴 没头像有信息: {fail_nopic} 🔴 没头像没信息: {fail}\n') + signal.show_log_text(f'👩🏻 演员数量: {total} ✅ 有头像有信息: {succ} 🔴 有头像没信息: {fail_noinfo} 🔴 没头像有信息: {fail_nopic} 🔴 没头像没信息: {fail}\n') elif mode == 2: other = total - succ signal.show_log_text(f'👩🏻 演员数量: {total} ✅ 有头像有信息: {succ} 🔴 其他: {other}\n') @@ -282,8 +268,8 @@ def _get_wiki_detail(url, url_log, actor_info: EMbyActressInfo): signal.show_log_text(" 🔴 维基百科演员页没有该词条!") return False - av_key = ['女优', '女優', '男优', '男優', '(AV)导演', 'AV导演', 'AV監督', '成人电影', '成人影片', '映画監督', - 'アダルトビデオ監督', '电影导演', '配音員', '配音员', '声優', '声优', 'グラビアアイドル', 'モデル'] + av_key = ['女优', '女優', '男优', '男優', '(AV)导演', 'AV导演', 'AV監督', '成人电影', '成人影片', '映画監督', 'アダルトビデオ監督', '电影导演', '配音員', '配音员', + '声優', '声优', 'グラビアアイドル', 'モデル'] for key in av_key: if key in res: signal.show_log_text(f" 🎉 页面内容命中关键词: {key},识别为女优或写真偶像或导演!\n") @@ -324,8 +310,7 @@ def _get_wiki_detail(url, url_log, actor_info: EMbyActressInfo): result = re.findall(r'(\d+)年(\d+)月(\d+)日', info_right) if result: result = result[0] - year = str(result[0]) if len(result[0]) == 4 else '19' + str(result[0]) if len( - result[0]) == 2 else '1970' + year = str(result[0]) if len(result[0]) == 4 else '19' + str(result[0]) if len(result[0]) == 2 else '1970' month = str(result[1]) if len(result[1]) == 2 else '0' + str(result[1]) day = str(result[2]) if len(result[2]) == 2 else '0' + str(result[2]) brithday = f"{year}-{month}-{day}" @@ -363,8 +348,8 @@ def _get_wiki_detail(url, url_log, actor_info: EMbyActressInfo): # 简历 try: s = actor_introduce_0.find(class_='toctext', - text=['简历', '簡歷', '个人简历', '個人簡歷', '略歴', '経歴', '来歴', '生平', - '生平与职业生涯', '略歴・人物']).find_previous_sibling().string + text=['简历', '簡歷', '个人简历', '個人簡歷', '略歴', '経歴', '来歴', '生平', '生平与职业生涯', + '略歴・人物']).find_previous_sibling().string if s: ff = actor_output.find(id=f'mf-section-{s}') if ff: @@ -392,8 +377,7 @@ def _get_wiki_detail(url, url_log, actor_info: EMbyActressInfo): # 为英文时要单独进行翻译 if tag_req and langid.classify(tag_req)[0] == 'en' and translate_by_list: for each in translate_by_list: - signal.show_log_text( - f" 🐙 识别到演员描述信息为英文({tag_req}),请求 {each.capitalize()} 进行翻译...") + signal.show_log_text(f" 🐙 识别到演员描述信息为英文({tag_req}),请求 {each.capitalize()} 进行翻译...") if each == 'youdao': # 使用有道翻译 t, o, r = youdao_translate(tag_req, '') elif each == 'google': # 使用 google 翻译 @@ -425,11 +409,9 @@ def _get_wiki_detail(url, url_log, actor_info: EMbyActressInfo): actor_info.taglines = [t] if overview_req: overview = o - overview = overview.replace('\n= = = = = = = = = =个人资料\n', - '\n===== 个人资料 =====\n') + overview = overview.replace('\n= = = = = = = = = =个人资料\n', '\n===== 个人资料 =====\n') overview = overview.replace('\n=====人物介绍\n', '\n===== 人物介绍 =====\n') - overview = overview.replace('\n= = = = =个人鉴定= = = = =\n', - '\n===== 个人经历 =====\n') + overview = overview.replace('\n= = = = =个人鉴定= = = = =\n', '\n===== 个人经历 =====\n') overview = overview.replace('\n=====个人日历=====\n', '\n===== 个人经历 =====\n') overview = overview.replace('\n=====个人费用=====\n', '\n===== 个人资料 =====\n') overview = overview.replace('\n===== 个人协助 =====\n', '\n===== 人物介绍 =====\n') @@ -447,8 +429,7 @@ def _get_wiki_detail(url, url_log, actor_info: EMbyActressInfo): # 外部链接 overview += f'\n===== 外部链接 =====\n{url_log}' - overview = overview.replace('\n', '
').replace('这篇报道有多个问题。请协助改善和在笔记页上的讨论。', - '').strip() + overview = overview.replace('\n', '
').replace('这篇报道有多个问题。请协助改善和在笔记页上的讨论。', '').strip() # 语言替换和转换 taglines = actor_info.taglines @@ -783,11 +764,8 @@ def _deal_kodi_actors(gfriends_actor_data, add): net_file_name = re.findall(r'^[^?]+', net_file_name)[0] local_file_path = os.path.join(actor_folder, net_file_name) if not os.path.isfile(local_file_path): - if not download_file_with_filepath({'logs': ''}, net_pic_path, - local_file_path, - actor_folder): - signal.show_log_text( - f'🔴 {actor_name} 头像下载失败!{net_pic_path}') + if not download_file_with_filepath({'logs': ''}, net_pic_path, local_file_path, actor_folder): + signal.show_log_text(f'🔴 {actor_name} 头像下载失败!{net_pic_path}') failed.add(each) download_failed.add(each) continue @@ -806,8 +784,7 @@ def _deal_kodi_actors(gfriends_actor_data, add): except: signal.show_traceback_log(traceback.format_exc()) if add: - signal.show_log_text( - f'\n🎉 操作已完成! 共有演员: {len(all_actor)}, 已有头像: {len(success)}, 没有头像: {len(failed)}, 下载失败: {len(download_failed)}, 没有资源: {len(no_pic)}') + signal.show_log_text(f'\n🎉 操作已完成! 共有演员: {len(all_actor)}, 已有头像: {len(success)}, 没有头像: {len(failed)}, 下载失败: {len(download_failed)}, 没有资源: {len(no_pic)}') else: signal.show_log_text(f'\n🎉 操作已完成! 共清理了 {len(actor_clear)} 个 .actors 文件夹!') return diff --git a/src/models/tools/missing.py b/src/models/tools/missing.py index 92f78b8..7aaffb4 100644 --- a/src/models/tools/missing.py +++ b/src/models/tools/missing.py @@ -95,8 +95,7 @@ def _get_actor_numbers(actor_url, actor_single_url): else: video_date = '%s/%s/%s' % (time_list[0], time_list[1], time_list[2]) # self.show_log_text('{} {:<10}{:\u3000>5} {}'.format(video_date, video_number, download_info, video_url)) - Flags.actor_numbers_dic[actor_url].update( - {video_number: [video_number, video_date, video_url, download_info, video_title, single_info]}) + Flags.actor_numbers_dic[actor_url].update({video_number: [video_number, video_date, video_url, download_info, video_title, single_info]}) i += 1 @@ -116,8 +115,7 @@ def _get_actor_missing_numbers(actor_name, actor_url, actor_flag): # 演员信息排版和显示 actor_info = Flags.actor_numbers_dic.get(actor_url) len_single = len(Flags.actor_numbers_dic.get(actor_single_url)) - signal.show_log_text('🎉 获取完毕!共找到 [ %s ] 番号数量(%s)单体数量(%s)(%ss)' % ( - actor_name, len(actor_info), len_single, get_used_time(start_time))) + signal.show_log_text('🎉 获取完毕!共找到 [ %s ] 番号数量(%s)单体数量(%s)(%ss)' % (actor_name, len(actor_info), len_single, get_used_time(start_time))) if actor_info: actor_numbers = actor_info.keys() all_list = set() @@ -125,13 +123,10 @@ def _get_actor_missing_numbers(actor_name, actor_url, actor_flag): not_download_magnet_list = set() not_download_cnword_list = set() for actor_number in actor_numbers: - video_number, video_date, video_url, download_info, video_title, single_info = actor_info.get( - actor_number) + video_number, video_date, video_url, download_info, video_title, single_info = actor_info.get(actor_number) if actor_flag: video_url = video_title[:30] - number_str = ( - '{:>13} {:<10} {} {:\u3000>5} {}'.format(video_date, video_number, single_info, download_info, - video_url)) + number_str = ('{:>13} {:<10} {} {:\u3000>5} {}'.format(video_date, video_number, single_info, download_info, video_url)) all_list.add(number_str) if actor_number not in Flags.local_number_set: not_download_list.add(number_str) @@ -148,31 +143,28 @@ def _get_actor_missing_numbers(actor_name, actor_url, actor_flag): not_download_magnet_list = sorted(not_download_magnet_list, reverse=True) not_download_cnword_list = sorted(not_download_cnword_list, reverse=True) - signal.show_log_text('\n👩 [ %s ] 的全部网络番号(%s)...\n%s' % (actor_name, len(all_list), ('=' * 97))) + signal.show_log_text(f'\n👩 [ {actor_name} ] 的全部网络番号({len(all_list)})...\n{("=" * 97)}') if all_list: for each in all_list: signal.show_log_text(each) else: signal.show_log_text('🎉 没有缺少的番号...\n') - signal.show_log_text( - '\n👩 [ %s ] 本地缺失的番号(%s)...\n%s' % (actor_name, len(not_download_list), ('=' * 97))) + signal.show_log_text(f"\n👩 [ {actor_name} ] 本地缺失的番号({len(not_download_list)})...\n{('=' * 97)}") if not_download_list: for each in not_download_list: signal.show_log_text(each) else: signal.show_log_text('🎉 没有缺少的番号...\n') - signal.show_log_text('\n👩 [ %s ] 本地缺失的有磁力的番号(%s)...\n%s' % ( - actor_name, len(not_download_magnet_list), ('=' * 97))) + signal.show_log_text(f'\n👩 [ {actor_name} ] 本地缺失的有磁力的番号({len(not_download_magnet_list)})...\n{("=" * 97)}') if not_download_magnet_list: for each in not_download_magnet_list: signal.show_log_text(each) else: signal.show_log_text('🎉 没有缺少的番号...\n') - signal.show_log_text('\n👩 [ %s ] 本地缺失的有字幕的番号(%s)...\n%s' % ( - actor_name, len(not_download_cnword_list), ('=' * 97))) + signal.show_log_text(f'\n👩 [ {actor_name} ] 本地缺失的有字幕的番号({len(not_download_cnword_list)})...\n{("=" * 97)}') if not_download_cnword_list: for each in not_download_cnword_list: signal.show_log_text(each) @@ -209,17 +201,14 @@ def check_missing_number(actor_flag): for i in new_movie_path_list: movie_list = movie_lists('', movie_type, i) # 获取所有需要刮削的影片列表 all_movie_list.extend(movie_list) - signal.show_log_text( - '🎉 获取完毕!共找到视频数量(%s)(%ss)' % (len(all_movie_list), get_used_time(start_time))) + signal.show_log_text(f'🎉 获取完毕!共找到视频数量({len(all_movie_list)})({get_used_time(start_time)}s)') # 获取本地番号 start_time_local = time.time() - signal.show_log_text( - '\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n⏳ 开始获取本地视频的番号信息...') + signal.show_log_text('\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n⏳ 开始获取本地视频的番号信息...') local_number_list = resources.userdata_path('number_list.json') if not os.path.exists(local_number_list): - signal.show_log_text( - ' 提示:正在生成本地视频的番号信息数据...(第一次较慢,请耐心等待,以后只需要查找新视频,速度很快)') + signal.show_log_text(' 提示:正在生成本地视频的番号信息数据...(第一次较慢,请耐心等待,以后只需要查找新视频,速度很快)') with open(local_number_list, 'w', encoding='utf-8') as f: f.write('{}') with open(local_number_list, 'r', encoding='utf-8') as data: @@ -244,8 +233,7 @@ def check_missing_number(actor_flag): else: has_sub = False if not number: - json_data_temp, number, folder_old_path, file_name, file_ex, sub_list, file_show_name, file_show_path = get_file_info( - movie_path, copy_sub=False) + json_data_temp, number, folder_old_path, file_name, file_ex, sub_list, file_show_name, file_show_path = get_file_info(movie_path, copy_sub=False) has_sub = json_data_temp['has_sub'] # 视频中文字幕标识 cn_word_icon = '🀄️' if has_sub else '' signal.show_log_text(' 发现新番号:{:<10} {}'.format(number, cn_word_icon)) @@ -257,24 +245,14 @@ def check_missing_number(actor_flag): Flags.local_number_cnword_set.add(number) # 添加到本地有字幕的番号集合 with open(local_number_list, 'w', encoding='utf-8') as f: - json.dump( - json_data_new, - f, - ensure_ascii=False, - sort_keys=True, - indent=4, - separators=(',', ': '), - ) + json.dump(json_data_new, f, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '), ) Flags.local_number_flag = new_movie_path_list - signal.show_log_text( - '🎉 获取完毕!共获取番号数量(%s)(%ss)' % (len(json_data_new), get_used_time(start_time_local))) + signal.show_log_text(f'🎉 获取完毕!共获取番号数量({len(json_data_new)})({get_used_time(start_time_local)}s)') # 查询演员番号 if config.actors_name: actor_list = re.split(r'[,,]', config.actors_name) - signal.show_log_text( - '\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n🔍 需要查询的演员:\n %s' % ( - ', '.join(actor_list))) + signal.show_log_text(f'\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n🔍 需要查询的演员:\n {", ".join(actor_list)}') for actor_name in actor_list: if not actor_name: continue @@ -283,14 +261,12 @@ def check_missing_number(actor_flag): else: actor_url = resources.get_actor_data(actor_name).get('href') if actor_url: - signal.show_log_text( - '\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n⏳ 从 JAVDB 获取 [ %s ] 的所有番号列表...' % actor_name) + signal.show_log_text(f'\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n⏳ 从 JAVDB 获取 [ {actor_name} ] 的所有番号列表...') _get_actor_missing_numbers(actor_name, actor_url, actor_flag) else: - signal.show_log_text( - '\n🔴 未找到 [ %s ] 的主页地址,你可以填写演员的 JAVDB 主页地址替换演员名称...' % actor_name) + signal.show_log_text(f'\n🔴 未找到 [ {actor_name} ] 的主页地址,你可以填写演员的 JAVDB 主页地址替换演员名称...') else: signal.show_log_text('\n🔴 没有要查询的演员!') - signal.show_log_text('\n🎉 查询完毕!共用时(%ss)' % (get_used_time(start_time))) + signal.show_log_text(f'\n🎉 查询完毕!共用时({get_used_time(start_time)}s)') signal.reset_buttons_status.emit()