diff --git a/bdfr/__main__.py b/bdfr/__main__.py index dadba517..51f1ee86 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -60,6 +60,7 @@ click.option("--max-score", type=int, default=None), click.option("--min-score-ratio", type=float, default=None), click.option("--max-score-ratio", type=float, default=None), + click.option("--stop-on-exist", is_flag=True, default=None), ] _archiver_options = [ diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 05fc27e8..16438ce3 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -53,6 +53,7 @@ def __init__(self): self.upvoted: bool = False self.user: list[str] = [] self.verbose: int = 0 + self.stop_on_exist: bool = False # Archiver-specific options self.all_comments = False diff --git a/bdfr/connector.py b/bdfr/connector.py index 77a4a71a..c33c82ff 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -61,6 +61,7 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl self._apply_logging_handlers(itertools.chain(logging_handlers, [file_log])) self.run_time = datetime.now().isoformat() self._setup_internal_objects() + self.existcount=0 self.reddit_lists = self.retrieve_reddit_lists() @@ -350,6 +351,8 @@ def get_multireddits(self) -> list[Iterator]: def create_filtered_listing_generator(self, reddit_source) -> Iterator: sort_function = self.determine_sort_function() + if self.args.stop_on_exist and sort_function != praw.models.Subreddit.new: + logger.warning("Stopping downloads when an old duplicate is encountered works best when sorted by new.") if self.sort_filter in (RedditTypes.SortType.TOP, RedditTypes.SortType.CONTROVERSIAL): return sort_function(reddit_source, limit=self.args.limit, time_filter=self.time_filter.value) else: diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 20984e69..0ccc64fa 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -111,10 +111,16 @@ def _download_submission(self, submission: praw.models.Submission): for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory): if destination.exists(): logger.debug(f"File {destination} from submission {submission.id} already exists, continuing") + if self.args.stop_on_exist and not submission.stickied: + self.existcount+=1 + if self.existcount>=5: + logger.warning(f"Prevously-downloaded threshold met, exiting") + exit(0) continue elif not self.download_filter.check_resource(res): logger.debug(f"Download filter removed {submission.id} file with URL {submission.url}") continue + self.existcount=0 try: res.download({"max_wait_time": self.args.max_wait_time}) except errors.BulkDownloaderException as e: