add wechat book api

wnma3mz · Feb 10, 2021 · 85a679d · 85a679d
1 parent 0a75499
commit 85a679d
Show file tree

Hide file tree

Showing 11 changed files with 365 additions and 323 deletions.
diff --git a/setup.py b/setup.py
@@ -6,17 +6,15 @@
 
 setuptools.setup(
     name="wechatarticles",
-    version="0.5.7",
+    version="0.5.8",
     author="wnma3mz",
     author_email="[email protected]",
     description="wechat articles scrapy",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/wnma3mz/wechat_articles_spider",
     packages=setuptools.find_packages(),
-    install_requires=[
-        'requests>=2.20.0', 'beautifulsoup4>=4.7.1'
-    ],
+    install_requires=["requests>=2.20.0", "beautifulsoup4>=4.7.1"],
     classifiers=(
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: Apache Software License",

diff --git a/wechatarticles/ArticlesAPI.py b/wechatarticles/ArticlesAPI.py
@@ -9,14 +9,16 @@ class ArticlesAPI(object):
     整合ArticlesInfo和ArticlesInfo, 方便调用
     """
 
-    def __init__(self,
-                 username=None,
-                 password=None,
-                 official_cookie=None,
-                 token=None,
-                 appmsg_token=None,
-                 wechat_cookie=None,
-                 outfile=None):
+    def __init__(
+        self,
+        username=None,
+        password=None,
+        official_cookie=None,
+        token=None,
+        appmsg_token=None,
+        wechat_cookie=None,
+        outfile=None,
+    ):
         """
         初始化参数
         Parameters
@@ -47,9 +49,9 @@ def __init__(self,
             raise SystemError("please check your paramse")
 
         # 支持两种方式， mitmproxy自动获取参数和手动获取参数
-        if (appmsg_token == None) and (wechat_cookie == None) and (outfile !=
-                                                                   None):
+        if (appmsg_token == None) and (wechat_cookie == None) and (outfile != None):
             from .ReadOutfile import Reader
+
             reader = Reader()
             reader.contral(outfile)
             self.appmsg_token, self.cookie = reader.request(outfile)
@@ -83,38 +85,38 @@ def complete_info(self, nickname, begin=0, count=5):
                 'comments': 文章评论信息
                     {
                         "base_resp": {
-                            "errmsg": "ok", 
+                            "errmsg": "ok",
                             "ret": 0
-                        }, 
+                        },
                         "elected_comment": [
                             {
-                                "content": 用户评论文字, 
-                                "content_id": "6846263421277569047", 
-                                "create_time": 1520098511, 
-                                "id": 3, 
-                                "is_from_friend": 0, 
-                                "is_from_me": 0, 
+                                "content": 用户评论文字,
+                                "content_id": "6846263421277569047",
+                                "create_time": 1520098511,
+                                "id": 3,
+                                "is_from_friend": 0,
+                                "is_from_me": 0,
                                 "is_top": 0, 是否被置顶
-                                "like_id": 10001, 
-                                "like_num": 3, 
-                                "like_status": 0, 
-                                "logo_url": "http://wx.qlogo.cn/mmhead/OibRNdtlJdkFLMHYLMR92Lvq0PicDpJpbnaicP3Z6kVcCicLPVjCWbAA9w/132", 
-                                "my_id": 23, 
-                                "nick_name": 评论用户的名字, 
+                                "like_id": 10001,
+                                "like_num": 3,
+                                "like_status": 0,
+                                "logo_url": "http://wx.qlogo.cn/mmhead/OibRNdtlJdkFLMHYLMR92Lvq0PicDpJpbnaicP3Z6kVcCicLPVjCWbAA9w/132",
+                                "my_id": 23,
+                                "nick_name": 评论用户的名字,
                                 "reply": {
                                     "reply_list": [ ]
                                 }
                             }
-                        ], 
+                        ],
                         "elected_comment_total_cnt": 3, 评论总数
-                        "enabled": 1, 
-                        "friend_comment": [ ], 
-                        "is_fans": 1, 
-                        "logo_url": "http://wx.qlogo.cn/mmhead/Q3auHgzwzM6GAic0FAHOu9Gtv5lEu5kUqO6y6EjEFjAhuhUNIS7Y2AQ/132", 
-                        "my_comment": [ ], 
-                        "nick_name": 当前用户名, 
+                        "enabled": 1,
+                        "friend_comment": [ ],
+                        "is_fans": 1,
+                        "logo_url": "http://wx.qlogo.cn/mmhead/Q3auHgzwzM6GAic0FAHOu9Gtv5lEu5kUqO6y6EjEFjAhuhUNIS7Y2AQ/132",
+                        "my_comment": [ ],
+                        "nick_name": 当前用户名,
                         "only_fans_can_comment": false
-                    }, 
+                    },
                 'cover': 封面的url'digest': 文章摘要,
                 'itemidx': 1,
                 'like_num': 18, 文章点赞数
@@ -128,15 +130,16 @@ def complete_info(self, nickname, begin=0, count=5):
         """
         # 获取文章数据
         artiacle_data = self.officical.articles(
-            nickname, begin=str(begin), count=str(count))
+            nickname, begin=str(begin), count=str(count)
+        )
 
         # 提取每个文章的url，获取文章的点赞、阅读、评论信息，并加入到原来的json中
         for data in artiacle_data:
             article_url = data["link"]
             comments = self.wechat.comments(article_url)
             read_like_nums = self.wechat.read_like_nums(article_url)
             data["comments"] = comments
-            data["read_num"], data["like_num"], data['old_like_num'] = read_like_nums
+            data["read_num"], data["like_num"], data["old_like_num"] = read_like_nums
 
         return artiacle_data
 
@@ -147,7 +150,7 @@ def __extract_info(self, articles_data):
             comments = self.wechat.comments(article_url)
             read_like_nums = self.wechat.read_like_nums(article_url)
             data["comments"] = comments
-            data["read_num"], data["like_num"], data['old_like_num'] = read_like_nums
+            data["read_num"], data["like_num"], data["old_like_num"] = read_like_nums
 
         return articles_data
 
@@ -172,38 +175,38 @@ def continue_info(self, nickname, begin=0):
                 'comments': 文章评论信息
                     {
                         "base_resp": {
-                            "errmsg": "ok", 
+                            "errmsg": "ok",
                             "ret": 0
-                        }, 
+                        },
                         "elected_comment": [
                             {
-                                "content": 用户评论文字, 
-                                "content_id": "6846263421277569047", 
-                                "create_time": 1520098511, 
-                                "id": 3, 
-                                "is_from_friend": 0, 
-                                "is_from_me": 0, 
+                                "content": 用户评论文字,
+                                "content_id": "6846263421277569047",
+                                "create_time": 1520098511,
+                                "id": 3,
+                                "is_from_friend": 0,
+                                "is_from_me": 0,
                                 "is_top": 0, 是否被置顶
-                                "like_id": 10001, 
-                                "like_num": 3, 
-                                "like_status": 0, 
-                                "logo_url": "http://wx.qlogo.cn/mmhead/OibRNdtlJdkFLMHYLMR92Lvq0PicDpJpbnaicP3Z6kVcCicLPVjCWbAA9w/132", 
-                                "my_id": 23, 
-                                "nick_name": 评论用户的名字, 
+                                "like_id": 10001,
+                                "like_num": 3,
+                                "like_status": 0,
+                                "logo_url": "http://wx.qlogo.cn/mmhead/OibRNdtlJdkFLMHYLMR92Lvq0PicDpJpbnaicP3Z6kVcCicLPVjCWbAA9w/132",
+                                "my_id": 23,
+                                "nick_name": 评论用户的名字,
                                 "reply": {
                                     "reply_list": [ ]
                                 }
                             }
-                        ], 
+                        ],
                         "elected_comment_total_cnt": 3, 评论总数
-                        "enabled": 1, 
-                        "friend_comment": [ ], 
-                        "is_fans": 1, 
-                        "logo_url": "http://wx.qlogo.cn/mmhead/Q3auHgzwzM6GAic0FAHOu9Gtv5lEu5kUqO6y6EjEFjAhuhUNIS7Y2AQ/132", 
-                        "my_comment": [ ], 
-                        "nick_name": 当前用户名, 
+                        "enabled": 1,
+                        "friend_comment": [ ],
+                        "is_fans": 1,
+                        "logo_url": "http://wx.qlogo.cn/mmhead/Q3auHgzwzM6GAic0FAHOu9Gtv5lEu5kUqO6y6EjEFjAhuhUNIS7Y2AQ/132",
+                        "my_comment": [ ],
+                        "nick_name": 当前用户名,
                         "only_fans_can_comment": false
-                    }, 
+                    },
                 'cover': 封面的url'digest': 文章摘要,
                 'itemidx': 1,
                 'like_num': 18, 文章点赞数
@@ -222,7 +225,9 @@ def continue_info(self, nickname, begin=0):
                 # 获取文章数据
                 artiacle_datas.append(
                     self.officical.articles(
-                        nickname, begin=str(begin), count=str(count)))
+                        nickname, begin=str(begin), count=str(count)
+                    )
+                )
             except Exception as e:
                 print(e)
                 break
@@ -232,6 +237,7 @@ def continue_info(self, nickname, begin=0):
 
         def flatten(x):
             return [y for l in x for y in flatten(l)] if type(x) is list else [x]
+
         # flatten = lambda x: [y for l in x for y in flatten(l)] if type(x) is list else [x]
         print("第{}篇文章爬取失败，请过段时间再次尝试或换个帐号继续爬取".format(begin))
         return self.__extract_info(flatten(artiacle_datas))
diff --git a/wechatarticles/ArticlesInfo.py b/wechatarticles/ArticlesInfo.py
@@ -8,13 +8,8 @@ class ArticlesInfo(object):
     """
     登录WeChat，获取更加详细的推文信息。如点赞数、阅读数、评论等
     """
-    def __init__(self,
-                 appmsg_token,
-                 cookie,
-                 proxies={
-                     'http': None,
-                     'https': None
-                 }):
+
+    def __init__(self, appmsg_token, cookie, proxies={"http": None, "https": None}):
         """
         初始化参数
         Parameters
@@ -32,9 +27,8 @@ def __init__(self,
         self.s.trust_env = False
         self.appmsg_token = appmsg_token
         self.headers = {
-            "User-Agent":
-            "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0Chrome/57.0.2987.132 MQQBrowser/6.2 Mobile",
-            "Cookie": cookie
+            "User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0Chrome/57.0.2987.132 MQQBrowser/6.2 Mobile",
+            "Cookie": cookie,
         }
         self.data = {
             "is_only_read": "1",
@@ -58,8 +52,7 @@ def __verify_url(self, article_url):
         verify_lst = ["mp.weixin.qq.com", "__biz", "mid", "sn", "idx"]
         for string in verify_lst:
             if string not in article_url:
-                raise Exception(
-                    "params is error, please check your article_url")
+                raise Exception("params is error, please check your article_url")
 
     def read_like_nums(self, article_url):
         """
@@ -76,8 +69,11 @@ def read_like_nums(self, article_url):
         """
         try:
             appmsgstat = self.__get_appmsgext(article_url)["appmsgstat"]
-            return appmsgstat["read_num"], appmsgstat["like_num"], appmsgstat[
-                "old_like_num"]
+            return (
+                appmsgstat["read_num"],
+                appmsgstat["like_num"],
+                appmsgstat["old_like_num"],
+            )
         except Exception:
             raise Exception("params is error, please check your article_url")
 
@@ -130,11 +126,10 @@ def comments(self, article_url):
         __biz, _, idx, _ = self.__get_params(article_url)
         getcomment_url = "https://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz={}&idx={}&comment_id={}&limit=100"
         try:
-            url = getcomment_url.format(__biz, idx,
-                                        self.__get_comment_id(article_url))
-            comment_json = self.s.get(url,
-                                      headers=self.headers,
-                                      proxies=self.proxies).json()
+            url = getcomment_url.format(__biz, idx, self.__get_comment_id(article_url))
+            comment_json = self.s.get(
+                url, headers=self.headers, proxies=self.proxies
+            ).json()
         except Exception as e:
             print(e)
             comment_json = {}
@@ -155,8 +150,7 @@ def __get_comment_id(self, article_url):
         """
         res = self.s.get(article_url, data=self.data, proxies=self.proxies)
         # 使用正则提取comment_id
-        comment_id = re.findall(r'comment_id = "\d+"',
-                                res.text)[0].split(" ")[-1][1:-1]
+        comment_id = re.findall(r'comment_id = "\d+"', res.text)[0].split(" ")[-1][1:-1]
         return comment_id
 
     def __get_params(self, article_url):
@@ -177,7 +171,7 @@ def __get_params(self, article_url):
 
         # 切分url, 提取相应的参数
         string_lst = article_url.split("?")[1].split("&")
-        dict_value = [string[string.index("=") + 1:] for string in string_lst]
+        dict_value = [string[string.index("=") + 1 :] for string in string_lst]
         __biz, mid, idx, sn, *_ = dict_value
         sn = sn[:-3] if sn[-3] == "#" else sn
 
@@ -213,21 +207,18 @@ def __get_appmsgext(self, article_url):
 
         # 将params参数换到data中请求。这一步貌似不换也行
         origin_url = "https://mp.weixin.qq.com/mp/getappmsgext?"
-        appmsgext_url = origin_url + "appmsg_token={}&x5=0".format(
-            self.appmsg_token)
+        appmsgext_url = origin_url + "appmsg_token={}&x5=0".format(self.appmsg_token)
         self.data["__biz"] = __biz
         self.data["mid"] = mid
         self.data["sn"] = sn
         self.data["idx"] = idx
 
         # appmsgext_url = origin_url + "__biz={}&mid={}&sn={}&idx={}&appmsg_token={}&x5=1".format(
         #     __biz, mid, sn, idx, self.appmsg_token)
-        appmsgext_json = requests.post(appmsgext_url,
-                                       headers=self.headers,
-                                       data=self.data,
-                                       proxies=self.proxies).json()
+        appmsgext_json = requests.post(
+            appmsgext_url, headers=self.headers, data=self.data, proxies=self.proxies
+        ).json()
 
         if "appmsgstat" not in appmsgext_json.keys():
-            raise Exception(
-                "get info error, please check your cookie and appmsg_token")
+            raise Exception("get info error, please check your cookie and appmsg_token")
         return appmsgext_json