Skip to content

Commit

Permalink
Merge pull request #709 from Serene-Arc/black_formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Serene-Arc authored Dec 3, 2022
2 parents 0a3b3d7 + 0873a4a commit 60ce138
Show file tree
Hide file tree
Showing 63 changed files with 2,182 additions and 1,793 deletions.
112 changes: 57 additions & 55 deletions bdfr/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,53 +13,54 @@
logger = logging.getLogger()

_common_options = [
click.argument('directory', type=str),
click.option('--authenticate', is_flag=True, default=None),
click.option('--config', type=str, default=None),
click.option('--opts', type=str, default=None),
click.option('--disable-module', multiple=True, default=None, type=str),
click.option('--exclude-id', default=None, multiple=True),
click.option('--exclude-id-file', default=None, multiple=True),
click.option('--file-scheme', default=None, type=str),
click.option('--folder-scheme', default=None, type=str),
click.option('--ignore-user', type=str, multiple=True, default=None),
click.option('--include-id-file', multiple=True, default=None),
click.option('--log', type=str, default=None),
click.option('--saved', is_flag=True, default=None),
click.option('--search', default=None, type=str),
click.option('--submitted', is_flag=True, default=None),
click.option('--subscribed', is_flag=True, default=None),
click.option('--time-format', type=str, default=None),
click.option('--upvoted', is_flag=True, default=None),
click.option('-L', '--limit', default=None, type=int),
click.option('-l', '--link', multiple=True, default=None, type=str),
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new', 'controversial', 'rising', 'relevance')),
default=None),
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
click.option('-u', '--user', type=str, multiple=True, default=None),
click.option('-v', '--verbose', default=None, count=True),
click.argument("directory", type=str),
click.option("--authenticate", is_flag=True, default=None),
click.option("--config", type=str, default=None),
click.option("--opts", type=str, default=None),
click.option("--disable-module", multiple=True, default=None, type=str),
click.option("--exclude-id", default=None, multiple=True),
click.option("--exclude-id-file", default=None, multiple=True),
click.option("--file-scheme", default=None, type=str),
click.option("--folder-scheme", default=None, type=str),
click.option("--ignore-user", type=str, multiple=True, default=None),
click.option("--include-id-file", multiple=True, default=None),
click.option("--log", type=str, default=None),
click.option("--saved", is_flag=True, default=None),
click.option("--search", default=None, type=str),
click.option("--submitted", is_flag=True, default=None),
click.option("--subscribed", is_flag=True, default=None),
click.option("--time-format", type=str, default=None),
click.option("--upvoted", is_flag=True, default=None),
click.option("-L", "--limit", default=None, type=int),
click.option("-l", "--link", multiple=True, default=None, type=str),
click.option("-m", "--multireddit", multiple=True, default=None, type=str),
click.option(
"-S", "--sort", type=click.Choice(("hot", "top", "new", "controversial", "rising", "relevance")), default=None
),
click.option("-s", "--subreddit", multiple=True, default=None, type=str),
click.option("-t", "--time", type=click.Choice(("all", "hour", "day", "week", "month", "year")), default=None),
click.option("-u", "--user", type=str, multiple=True, default=None),
click.option("-v", "--verbose", default=None, count=True),
]

_downloader_options = [
click.option('--make-hard-links', is_flag=True, default=None),
click.option('--max-wait-time', type=int, default=None),
click.option('--no-dupes', is_flag=True, default=None),
click.option('--search-existing', is_flag=True, default=None),
click.option('--skip', default=None, multiple=True),
click.option('--skip-domain', default=None, multiple=True),
click.option('--skip-subreddit', default=None, multiple=True),
click.option('--min-score', type=int, default=None),
click.option('--max-score', type=int, default=None),
click.option('--min-score-ratio', type=float, default=None),
click.option('--max-score-ratio', type=float, default=None),
click.option("--make-hard-links", is_flag=True, default=None),
click.option("--max-wait-time", type=int, default=None),
click.option("--no-dupes", is_flag=True, default=None),
click.option("--search-existing", is_flag=True, default=None),
click.option("--skip", default=None, multiple=True),
click.option("--skip-domain", default=None, multiple=True),
click.option("--skip-subreddit", default=None, multiple=True),
click.option("--min-score", type=int, default=None),
click.option("--max-score", type=int, default=None),
click.option("--min-score-ratio", type=float, default=None),
click.option("--max-score-ratio", type=float, default=None),
]

_archiver_options = [
click.option('--all-comments', is_flag=True, default=None),
click.option('--comment-context', is_flag=True, default=None),
click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None),
click.option("--all-comments", is_flag=True, default=None),
click.option("--comment-context", is_flag=True, default=None),
click.option("-f", "--format", type=click.Choice(("xml", "json", "yaml")), default=None),
]


Expand All @@ -68,6 +69,7 @@ def wrap(func):
for opt in opts:
func = opt(func)
return func

return wrap


Expand All @@ -76,7 +78,7 @@ def cli():
pass


@cli.command('download')
@cli.command("download")
@_add_options(_common_options)
@_add_options(_downloader_options)
@click.pass_context
Expand All @@ -88,13 +90,13 @@ def cli_download(context: click.Context, **_):
reddit_downloader = RedditDownloader(config)
reddit_downloader.download()
except Exception:
logger.exception('Downloader exited unexpectedly')
logger.exception("Downloader exited unexpectedly")
raise
else:
logger.info('Program complete')
logger.info("Program complete")


@cli.command('archive')
@cli.command("archive")
@_add_options(_common_options)
@_add_options(_archiver_options)
@click.pass_context
Expand All @@ -106,13 +108,13 @@ def cli_archive(context: click.Context, **_):
reddit_archiver = Archiver(config)
reddit_archiver.download()
except Exception:
logger.exception('Archiver exited unexpectedly')
logger.exception("Archiver exited unexpectedly")
raise
else:
logger.info('Program complete')
logger.info("Program complete")


@cli.command('clone')
@cli.command("clone")
@_add_options(_common_options)
@_add_options(_archiver_options)
@_add_options(_downloader_options)
Expand All @@ -125,10 +127,10 @@ def cli_clone(context: click.Context, **_):
reddit_scraper = RedditCloner(config)
reddit_scraper.download()
except Exception:
logger.exception('Scraper exited unexpectedly')
logger.exception("Scraper exited unexpectedly")
raise
else:
logger.info('Program complete')
logger.info("Program complete")


def setup_logging(verbosity: int):
Expand All @@ -141,7 +143,7 @@ def filter(self, record: logging.LogRecord) -> bool:
stream = logging.StreamHandler(sys.stdout)
stream.addFilter(StreamExceptionFilter())

formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s")
stream.setFormatter(formatter)

logger.addHandler(stream)
Expand All @@ -151,10 +153,10 @@ def filter(self, record: logging.LogRecord) -> bool:
stream.setLevel(logging.DEBUG)
else:
stream.setLevel(9)
logging.getLogger('praw').setLevel(logging.CRITICAL)
logging.getLogger('prawcore').setLevel(logging.CRITICAL)
logging.getLogger('urllib3').setLevel(logging.CRITICAL)
logging.getLogger("praw").setLevel(logging.CRITICAL)
logging.getLogger("prawcore").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").setLevel(logging.CRITICAL)


if __name__ == '__main__':
if __name__ == "__main__":
cli()
28 changes: 14 additions & 14 deletions bdfr/archive_entry/base_archive_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ def compile(self) -> dict:
@staticmethod
def _convert_comment_to_dict(in_comment: Comment) -> dict:
out_dict = {
'author': in_comment.author.name if in_comment.author else 'DELETED',
'id': in_comment.id,
'score': in_comment.score,
'subreddit': in_comment.subreddit.display_name,
'author_flair': in_comment.author_flair_text,
'submission': in_comment.submission.id,
'stickied': in_comment.stickied,
'body': in_comment.body,
'is_submitter': in_comment.is_submitter,
'distinguished': in_comment.distinguished,
'created_utc': in_comment.created_utc,
'parent_id': in_comment.parent_id,
'replies': [],
"author": in_comment.author.name if in_comment.author else "DELETED",
"id": in_comment.id,
"score": in_comment.score,
"subreddit": in_comment.subreddit.display_name,
"author_flair": in_comment.author_flair_text,
"submission": in_comment.submission.id,
"stickied": in_comment.stickied,
"body": in_comment.body,
"is_submitter": in_comment.is_submitter,
"distinguished": in_comment.distinguished,
"created_utc": in_comment.created_utc,
"parent_id": in_comment.parent_id,
"replies": [],
}
in_comment.replies.replace_more(limit=None)
for reply in in_comment.replies:
out_dict['replies'].append(BaseArchiveEntry._convert_comment_to_dict(reply))
out_dict["replies"].append(BaseArchiveEntry._convert_comment_to_dict(reply))
return out_dict
2 changes: 1 addition & 1 deletion bdfr/archive_entry/comment_archive_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ def __init__(self, comment: praw.models.Comment):
def compile(self) -> dict:
self.source.refresh()
self.post_details = self._convert_comment_to_dict(self.source)
self.post_details['submission_title'] = self.source.submission.title
self.post_details["submission_title"] = self.source.submission.title
return self.post_details
38 changes: 19 additions & 19 deletions bdfr/archive_entry/submission_archive_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,32 @@ def compile(self) -> dict:
comments = self._get_comments()
self._get_post_details()
out = self.post_details
out['comments'] = comments
out["comments"] = comments
return out

def _get_post_details(self):
self.post_details = {
'title': self.source.title,
'name': self.source.name,
'url': self.source.url,
'selftext': self.source.selftext,
'score': self.source.score,
'upvote_ratio': self.source.upvote_ratio,
'permalink': self.source.permalink,
'id': self.source.id,
'author': self.source.author.name if self.source.author else 'DELETED',
'link_flair_text': self.source.link_flair_text,
'num_comments': self.source.num_comments,
'over_18': self.source.over_18,
'spoiler': self.source.spoiler,
'pinned': self.source.pinned,
'locked': self.source.locked,
'distinguished': self.source.distinguished,
'created_utc': self.source.created_utc,
"title": self.source.title,
"name": self.source.name,
"url": self.source.url,
"selftext": self.source.selftext,
"score": self.source.score,
"upvote_ratio": self.source.upvote_ratio,
"permalink": self.source.permalink,
"id": self.source.id,
"author": self.source.author.name if self.source.author else "DELETED",
"link_flair_text": self.source.link_flair_text,
"num_comments": self.source.num_comments,
"over_18": self.source.over_18,
"spoiler": self.source.spoiler,
"pinned": self.source.pinned,
"locked": self.source.locked,
"distinguished": self.source.distinguished,
"created_utc": self.source.created_utc,
}

def _get_comments(self) -> list[dict]:
logger.debug(f'Retrieving full comment tree for submission {self.source.id}')
logger.debug(f"Retrieving full comment tree for submission {self.source.id}")
comments = []
self.source.comments.replace_more(limit=None)
for top_level_comment in self.source.comments:
Expand Down
49 changes: 26 additions & 23 deletions bdfr/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,28 @@ def download(self):
for generator in self.reddit_lists:
for submission in generator:
try:
if (submission.author and submission.author.name in self.args.ignore_user) or \
(submission.author is None and 'DELETED' in self.args.ignore_user):
if (submission.author and submission.author.name in self.args.ignore_user) or (
submission.author is None and "DELETED" in self.args.ignore_user
):
logger.debug(
f'Submission {submission.id} in {submission.subreddit.display_name} skipped'
f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user')
f"Submission {submission.id} in {submission.subreddit.display_name} skipped"
f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user'
)
continue
if submission.id in self.excluded_submission_ids:
logger.debug(f'Object {submission.id} in exclusion list, skipping')
logger.debug(f"Object {submission.id} in exclusion list, skipping")
continue
logger.debug(f'Attempting to archive submission {submission.id}')
logger.debug(f"Attempting to archive submission {submission.id}")
self.write_entry(submission)
except prawcore.PrawcoreException as e:
logger.error(f'Submission {submission.id} failed to be archived due to a PRAW exception: {e}')
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}")

def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
supplied_submissions = []
for sub_id in self.args.link:
if len(sub_id) == 6:
supplied_submissions.append(self.reddit_instance.submission(id=sub_id))
elif re.match(r'^\w{7}$', sub_id):
elif re.match(r"^\w{7}$", sub_id):
supplied_submissions.append(self.reddit_instance.comment(id=sub_id))
else:
supplied_submissions.append(self.reddit_instance.submission(url=sub_id))
Expand All @@ -60,7 +62,7 @@ def get_user_data(self) -> list[Iterator]:
if self.args.user and self.args.all_comments:
sort = self.determine_sort_function()
for user in self.args.user:
logger.debug(f'Retrieving comments of user {user}')
logger.debug(f"Retrieving comments of user {user}")
results.append(sort(self.reddit_instance.redditor(user).comments, limit=self.args.limit))
return results

Expand All @@ -71,43 +73,44 @@ def _pull_lever_entry_factory(praw_item: Union[praw.models.Submission, praw.mode
elif isinstance(praw_item, praw.models.Comment):
return CommentArchiveEntry(praw_item)
else:
raise ArchiverError(f'Factory failed to classify item of type {type(praw_item).__name__}')
raise ArchiverError(f"Factory failed to classify item of type {type(praw_item).__name__}")

def write_entry(self, praw_item: Union[praw.models.Submission, praw.models.Comment]):
if self.args.comment_context and isinstance(praw_item, praw.models.Comment):
logger.debug(f'Converting comment {praw_item.id} to submission {praw_item.submission.id}')
logger.debug(f"Converting comment {praw_item.id} to submission {praw_item.submission.id}")
praw_item = praw_item.submission
archive_entry = self._pull_lever_entry_factory(praw_item)
if self.args.format == 'json':
if self.args.format == "json":
self._write_entry_json(archive_entry)
elif self.args.format == 'xml':
elif self.args.format == "xml":
self._write_entry_xml(archive_entry)
elif self.args.format == 'yaml':
elif self.args.format == "yaml":
self._write_entry_yaml(archive_entry)
else:
raise ArchiverError(f'Unknown format {self.args.format} given')
logger.info(f'Record for entry item {praw_item.id} written to disk')
raise ArchiverError(f"Unknown format {self.args.format} given")
logger.info(f"Record for entry item {praw_item.id} written to disk")

def _write_entry_json(self, entry: BaseArchiveEntry):
resource = Resource(entry.source, '', lambda: None, '.json')
resource = Resource(entry.source, "", lambda: None, ".json")
content = json.dumps(entry.compile())
self._write_content_to_disk(resource, content)

def _write_entry_xml(self, entry: BaseArchiveEntry):
resource = Resource(entry.source, '', lambda: None, '.xml')
content = dict2xml.dict2xml(entry.compile(), wrap='root')
resource = Resource(entry.source, "", lambda: None, ".xml")
content = dict2xml.dict2xml(entry.compile(), wrap="root")
self._write_content_to_disk(resource, content)

def _write_entry_yaml(self, entry: BaseArchiveEntry):
resource = Resource(entry.source, '', lambda: None, '.yaml')
resource = Resource(entry.source, "", lambda: None, ".yaml")
content = yaml.dump(entry.compile())
self._write_content_to_disk(resource, content)

def _write_content_to_disk(self, resource: Resource, content: str):
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w', encoding="utf-8") as file:
with open(file_path, "w", encoding="utf-8") as file:
logger.debug(
f'Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}'
f' format at {file_path}')
f"Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}"
f" format at {file_path}"
)
file.write(content)
2 changes: 1 addition & 1 deletion bdfr/cloner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ def download(self):
self._download_submission(submission)
self.write_entry(submission)
except prawcore.PrawcoreException as e:
logger.error(f'Submission {submission.id} failed to be cloned due to a PRAW exception: {e}')
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}")
Loading

0 comments on commit 60ce138

Please sign in to comment.