From 6fdfdb754b4212d88804c77e9707ced329dd59fb Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 04:32:04 +0800 Subject: [PATCH 1/7] add mgstage film actor's thumb from seeaawiki_av_neme --- Function/getHtml.py | 6 ++--- Getter/jav321.py | 5 ++++ Getter/mgstage.py | 9 +++++-- Getter/seesaawiki_av_neme.py | 47 ++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 Getter/seesaawiki_av_neme.py diff --git a/Function/getHtml.py b/Function/getHtml.py index 1671ad5d9..6506b1f43 100644 --- a/Function/getHtml.py +++ b/Function/getHtml.py @@ -16,7 +16,7 @@ def get_config(): # ========================================================================网页请求 -def get_html(url, cookies=None): +def get_html(url, cookies=None,encode='utf-8'): config = get_config() retry_count = 0 proxy = '' @@ -37,14 +37,14 @@ def get_html(url, cookies=None): 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/60.0.3100.0 Safari/537.36'} getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies) - getweb.encoding = 'utf-8' + getweb.encoding = encode return getweb.text else: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/68.0.3440.106 Safari/537.36'} getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) - getweb.encoding = 'utf-8' + getweb.encoding = encode return getweb.text except Exception as error_info: i += 1 diff --git a/Getter/jav321.py b/Getter/jav321.py index 674701df5..6551485cd 100644 --- a/Getter/jav321.py +++ b/Getter/jav321.py @@ -132,6 +132,11 @@ def main(number, isuncensored=False): 'website': getWebsite(detail_page), 'source': 'jav321.py', } + wikiActor = getActorFromSeesaawiki(number, dic) + acotrList = dic['actor'].split(',') + acotrList.extend(wikiActor['actor']) + dic['actor'] = ','.join(acotrList) + dic['actor_photo'] ={**dic['actor_photo'],**wikiActor['actor_photo']} except TimeoutError: dic = { 'title': '', diff --git a/Getter/mgstage.py b/Getter/mgstage.py index 4b748ca06..ff4cfce9c 100644 --- a/Getter/mgstage.py +++ b/Getter/mgstage.py @@ -2,7 +2,7 @@ from lxml import etree import json from Function.getHtml import get_html - +from Getter.seesaawiki_av_neme import getActorFromSeesaawiki def getTitle(htmlcode): try: @@ -128,7 +128,7 @@ def main(number): 'release': getRelease(htmlcode).strip(','), 'number': getNum(htmlcode).strip(','), 'cover': getCover(htmlcode).strip(','), - 'extrafanart': getExtraFanart(htmlcode).strip(','), + 'extrafanart': getExtraFanart(htmlcode), 'imagecut': 0, 'tag': getTag(htmlcode).strip(','), 'series': getSeries(htmlcode).strip(','), @@ -138,6 +138,11 @@ def main(number): 'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/', 'source': 'mgstage.py', } + wikiActor = getActorFromSeesaawiki(number, dic) + acotrList = dic['actor'].split(',') + acotrList.extend(wikiActor['actor']) + dic['actor'] = ','.join(acotrList) + dic['actor_photo'] ={**dic['actor_photo'],**wikiActor['actor_photo']} except TimeoutError: dic = { 'title': '', diff --git a/Getter/seesaawiki_av_neme.py b/Getter/seesaawiki_av_neme.py new file mode 100644 index 000000000..5da91c1a2 --- /dev/null +++ b/Getter/seesaawiki_av_neme.py @@ -0,0 +1,47 @@ +import re +from lxml import etree +import json +from Function.getHtml import get_html + +def getActorFromSeesaawiki(number, dic): + global localDic + localDic=dic.copy() + return getActor(number) + +def getActor(number): + wiki_html = get_html('https://seesaawiki.jp/av_neme/search?keywords=' + str(number),"","EUC-JP") + html = etree.fromstring(wiki_html, etree.HTMLParser()) + result = html.xpath('//*[@id="page-body-inner"]//h3//a') + hrefs = html.xpath('//*[@id="page-body-inner"]//h3//a/@href') + result = list(filter(filterActor, result)) + actors = [] + actorPhotoDic = {} + for actor in result: + actors.append(actor.text) + photo = getActorPhoto(actor.text, actor.attrib['href']) + p2 = {actor.text: photo} + actorPhotoDic.update(p2) + dic = { + 'actor': actors, + 'actor_photo':actorPhotoDic + } + return dic + +def getActorPhoto(actor, href): + print(href) + actor_html=get_html(href,"","EUC-JP") + html = etree.fromstring(actor_html, etree.HTMLParser()) + p = str(html.xpath('//*[@id="content_block_1-body"]/a/img/@src')).strip(" ['']") + return p + +def stripActor(actor): + actor = actor.strip().strip("'") + return actor +def filterActor(actor): + print(localDic) + if "年" in actor.text: + return False + if actor.text ==localDic['series']: + return False + return True + From 83c1d4422e2678133d59cba9d0f0a25601dd9e6f Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 04:38:30 +0800 Subject: [PATCH 2/7] fix jav321 import error --- Getter/jav321.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Getter/jav321.py b/Getter/jav321.py index 6551485cd..2a8a2d414 100644 --- a/Getter/jav321.py +++ b/Getter/jav321.py @@ -2,7 +2,7 @@ from lxml import etree import json from Function.getHtml import post_html - +from Getter.seesaawiki_av_neme import getActorFromSeesaawiki def getActorPhoto(actor): data = {} From 05b744cec1af4450a35a02652fe27ea26cbf6a30 Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 06:28:33 +0800 Subject: [PATCH 3/7] fix mgstage title and outline have emby illegal character '&' --- Function/Function.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Function/Function.py b/Function/Function.py index 8db00eb38..06890fd00 100644 --- a/Function/Function.py +++ b/Function/Function.py @@ -227,6 +227,7 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据 number = json_data['number'] actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 release = json_data['release'] + outline = json_data['outline'] try: cover_small = json_data['cover_small'] except: @@ -249,6 +250,8 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据 title = title.replace(' ', '.') title = title.replace('【', '') title = title.replace('】', '') + title = title.replace('&', '') + outline = outline.replace('&', '') release = release.replace('/', '-') tmpArr = cover_small.split(',') if len(tmpArr) > 0: @@ -272,6 +275,7 @@ def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据 json_data['naming_media'] = naming_media json_data['naming_file'] = naming_file json_data['folder_name'] = folder_name + json_data['outline'] = outline return json_data From 33a50f1370cd08f8785ba23522876a9415c40a85 Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 07:28:27 +0800 Subject: [PATCH 4/7] fix download large thumb from mgstage --- AVDC_Main.py | 5 ++++- Getter/mgstage.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/AVDC_Main.py b/AVDC_Main.py index 23feedf02..bbb8920f7 100644 --- a/AVDC_Main.py +++ b/AVDC_Main.py @@ -1013,7 +1013,10 @@ def thumbDownload(self, json_data, path, naming_rule, Config, filepath, failed_f return i = 1 while i <= int(Config['proxy']['retry']): - self.DownloadFileWithFilename(json_data['cover'], thumb_name, path, Config, filepath, + download_url = json_data['cover'] + if len(json_data['largeImage']) > 0: + download_url = json_data['largeImage'] # mgstage 封面下载大图 + self.DownloadFileWithFilename(download_url, thumb_name, path, Config, filepath, failed_folder) if not check_pic(path + '/' + thumb_name): print('[!]Image Download Failed! Trying again. ' + str(i) + '/' + Config['proxy']['retry']) diff --git a/Getter/mgstage.py b/Getter/mgstage.py index ff4cfce9c..5ef6495eb 100644 --- a/Getter/mgstage.py +++ b/Getter/mgstage.py @@ -107,6 +107,11 @@ def getOutline(htmlcode): def getScore(htmlcode): return str(re.findall(r'5点満点中 (\S+)点', htmlcode)).strip(" ['']") +def getLargeImage(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']") + return result + def main(number): try: @@ -137,6 +142,7 @@ def main(number): 'director': '', 'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/', 'source': 'mgstage.py', + 'largeImage':getLargeImage(htmlcode).strip(',') } wikiActor = getActorFromSeesaawiki(number, dic) acotrList = dic['actor'].split(',') From 150bfa08987bd10a4141216cd722ed596d3d09d4 Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 09:36:09 +0800 Subject: [PATCH 5/7] fix null largeImage error --- AVDC_Main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AVDC_Main.py b/AVDC_Main.py index bbb8920f7..305a9d718 100644 --- a/AVDC_Main.py +++ b/AVDC_Main.py @@ -1014,7 +1014,7 @@ def thumbDownload(self, json_data, path, naming_rule, Config, filepath, failed_f i = 1 while i <= int(Config['proxy']['retry']): download_url = json_data['cover'] - if len(json_data['largeImage']) > 0: + if "largeImage" in json_data.keys() && len(json_data['largeImage']) > 0: download_url = json_data['largeImage'] # mgstage 封面下载大图 self.DownloadFileWithFilename(download_url, thumb_name, path, Config, filepath, failed_folder) From c0359d958bddf4c029e723869e4899d8d9a3ebf7 Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 09:36:34 +0800 Subject: [PATCH 6/7] fix null largeImage error --- AVDC_Main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AVDC_Main.py b/AVDC_Main.py index 305a9d718..1a9f0846e 100644 --- a/AVDC_Main.py +++ b/AVDC_Main.py @@ -1014,7 +1014,7 @@ def thumbDownload(self, json_data, path, naming_rule, Config, filepath, failed_f i = 1 while i <= int(Config['proxy']['retry']): download_url = json_data['cover'] - if "largeImage" in json_data.keys() && len(json_data['largeImage']) > 0: + if "largeImage" in json_data.keys() and len(json_data['largeImage']) > 0: download_url = json_data['largeImage'] # mgstage 封面下载大图 self.DownloadFileWithFilename(download_url, thumb_name, path, Config, filepath, failed_folder) From b06828c4c4d688ab7dede679d33d80e215407b52 Mon Sep 17 00:00:00 2001 From: xsxiaosa Date: Mon, 27 Jul 2020 11:58:59 +0800 Subject: [PATCH 7/7] =?UTF-8?q?=E6=9C=89largeImage=E5=AD=98=E5=9C=A8?= =?UTF-8?q?=E4=BD=9C=E4=B8=BAthumb=E6=97=B6=EF=BC=8C=E7=9B=B4=E6=8E=A5?= =?UTF-8?q?=E4=B8=8B=E8=BD=BDcover=E4=BD=9C=E4=B8=BAposter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AVDC_Main.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/AVDC_Main.py b/AVDC_Main.py index 1a9f0846e..edfc51fc5 100644 --- a/AVDC_Main.py +++ b/AVDC_Main.py @@ -1027,7 +1027,28 @@ def thumbDownload(self, json_data, path, naming_rule, Config, filepath, failed_f self.add_text_main('[+]Thumb Downloaded! ' + thumb_name) else: os.remove(path + '/' + thumb_name) - raise Exception("The Size of Thumb is Error! Deleted " + thumb_name + '!') + raise Exception("The p of Thumb is Error! Deleted " + thumb_name + '!') + # ========================================================================下载poster缩略图 + def posterDownload(self, json_data, path, naming_rule, Config, filepath, failed_folder): + poster_name = naming_rule + '-poster.jpg' + if os.path.exists(path + '/' + poster_name): + self.add_text_main('[+]Poseter Existed! ' + poster_name) + return + i = 1 + while i <= int(Config['proxy']['retry']): + download_url = json_data['cover'] + self.DownloadFileWithFilename(download_url, poster_name, path, Config, filepath, + failed_folder) + if not check_pic(path + '/' + poster_name): + print('[!]Image Download Failed! Trying again. ' + str(i) + '/' + Config['proxy']['retry']) + i = i + 1 + else: + break + if check_pic(path + '/' + poster_name): + self.add_text_main('[+]Poster Downloaded! ' + poster_name) + else: + os.remove(path + '/' + poster_name) + raise Exception("The Size of Poster is Error! Deleted " + poster_name + '!') def deletethumb(self,path, naming_rule): try: @@ -1542,6 +1563,8 @@ def Core_Main(self, filepath, number, mode, count): # imagecut 0 判断人脸位置裁剪缩略图为封面,1 裁剪右半面,3 下载小封面 self.thumbDownload(json_data, path, naming_rule, Config, filepath, failed_folder) if self.Ui.checkBox_download_poster.isChecked(): + if "largeImage" in json_data.keys() and len(json_data['largeImage']) > 0: ## 如果自带大封面了,直接下载小封面 + self.posterDownload(json_data, path, naming_rule, Config, filepath, failed_folder) if self.smallCoverDownload(path, naming_rule, json_data, Config, filepath, failed_folder) == 'small_cover_error': # 下载小封面 json_data['imagecut'] = 0