Skip to content

Commit

Permalink
skip 404 content
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Jul 26, 2024
1 parent 5884e89 commit bb5e8b8
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/googlrot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os

from github import Github, Auth
from github.GithubException import UnknownObjectException as GithubUnknownObjectException
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorCollection

from pymongo.server_api import ServerApi
Expand Down Expand Up @@ -101,7 +102,14 @@ async def code_mode(g: Github, googl_perfix_queue_collection: AsyncIOMotorCollec
for result in g.search_code(f"goo.gl/{prefix} AND NOT is:fork"):
logger.info(f"Processing {result.repository.full_name} ==")

content = result.decoded_content.decode("utf-8")
try:
content = result.decoded_content.decode("utf-8")
except GithubUnknownObjectException as e:
if e.message and "404" in e.message:
logger.error(f"404: {e}, skip")
continue
else:
raise e
print("conetnt: ", content[:256])
for url in extractor.gen_urls(content):
assert isinstance(url, str)
Expand Down

0 comments on commit bb5e8b8

Please sign in to comment.