From d33a9b64e60d2bab03857e2ddab4d926f7790714 Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Sun, 24 Nov 2024 14:22:03 +0100 Subject: [PATCH] Retry the current action 5x every 60s pause instead of skipping to the next --- bdfr/archiver.py | 46 ++++++++++++++++++++++-------------------- bdfr/cloner.py | 24 ++++++++++++---------- bdfr/connector.py | 50 ++++++++++++++++++++++++---------------------- bdfr/downloader.py | 22 ++++++++++---------- 4 files changed, 75 insertions(+), 67 deletions(-) diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 52b4649a..c563f58a 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -31,28 +31,30 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl def download(self): for generator in self.reddit_lists: - try: - for submission in generator: - try: - if (submission.author and submission.author.name in self.args.ignore_user) or ( - submission.author is None and "DELETED" in self.args.ignore_user - ): - logger.debug( - f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to" - f" {submission.author.name if submission.author else 'DELETED'} being an ignored user" - ) - continue - if submission.id in self.excluded_submission_ids: - logger.debug(f"Object {submission.id} in exclusion list, skipping") - continue - logger.debug(f"Attempting to archive submission {submission.id}") - self.write_entry(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") - except prawcore.PrawcoreException as e: - logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") - logger.debug("Waiting 60 seconds to continue") - sleep(60) + for retry in range(5): + try: + for submission in generator: + try: + if (submission.author and submission.author.name in self.args.ignore_user) or ( + submission.author is None and "DELETED" in self.args.ignore_user + ): + logger.debug( + f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to" + f" {submission.author.name if submission.author else 'DELETED'} being an ignored user" + ) + continue + if submission.id in self.excluded_submission_ids: + logger.debug(f"Object {submission.id} in exclusion list, skipping") + continue + logger.debug(f"Attempting to archive submission {submission.id}") + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}") + break + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) def get_submissions_from_link(self) -> list[list[praw.models.Submission]]: supplied_submissions = [] diff --git a/bdfr/cloner.py b/bdfr/cloner.py index df71c286..5ec5b57f 100644 --- a/bdfr/cloner.py +++ b/bdfr/cloner.py @@ -20,14 +20,16 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl def download(self): for generator in self.reddit_lists: - try: - for submission in generator: - try: - self._download_submission(submission) - self.write_entry(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") - except prawcore.PrawcoreException as e: - logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") - logger.debug("Waiting 60 seconds to continue") - sleep(60) + for retry in range(5): + try: + for submission in generator: + try: + self._download_submission(submission) + self.write_entry(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}") + break + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) diff --git a/bdfr/connector.py b/bdfr/connector.py index 77a4a71a..5fbdb73f 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -362,32 +362,34 @@ def get_user_data(self) -> list[Iterator]: return [] generators = [] for user in self.args.user: - try: + for retry in range(5): try: - self.check_user_existence(user) - except errors.BulkDownloaderException as e: - logger.error(e) - continue - if self.args.submitted: - logger.debug(f"Retrieving submitted posts of user {user}") - generators.append( - self.create_filtered_listing_generator( - self.reddit_instance.redditor(user).submissions, + try: + self.check_user_existence(user) + except errors.BulkDownloaderException as e: + logger.error(e) + continue + if self.args.submitted: + logger.debug(f"Retrieving submitted posts of user {user}") + generators.append( + self.create_filtered_listing_generator( + self.reddit_instance.redditor(user).submissions, + ) ) - ) - if not self.authenticated and any((self.args.upvoted, self.args.saved)): - logger.warning("Accessing user lists requires authentication") - else: - if self.args.upvoted: - logger.debug(f"Retrieving upvoted posts of user {user}") - generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit)) - if self.args.saved: - logger.debug(f"Retrieving saved posts of user {user}") - generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit)) - except prawcore.PrawcoreException as e: - logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}") - logger.debug("Waiting 60 seconds to continue") - sleep(60) + if not self.authenticated and any((self.args.upvoted, self.args.saved)): + logger.warning("Accessing user lists requires authentication") + else: + if self.args.upvoted: + logger.debug(f"Retrieving upvoted posts of user {user}") + generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit)) + if self.args.saved: + logger.debug(f"Retrieving saved posts of user {user}") + generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit)) + break + except prawcore.PrawcoreException as e: + logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) return generators else: return [] diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 20984e69..a9731ed2 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -44,16 +44,18 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl def download(self): for generator in self.reddit_lists: - try: - for submission in generator: - try: - self._download_submission(submission) - except prawcore.PrawcoreException as e: - logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}") - except prawcore.PrawcoreException as e: - logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") - logger.debug("Waiting 60 seconds to continue") - sleep(60) + for retry in range(5): + try: + for submission in generator: + try: + self._download_submission(submission) + except prawcore.PrawcoreException as e: + logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}") + break + except prawcore.PrawcoreException as e: + logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}") + logger.debug("Waiting 60 seconds to continue") + sleep(60) def _download_submission(self, submission: praw.models.Submission): if submission.id in self.excluded_submission_ids: