Skip to content

Commit

Permalink
Stop on existing
Browse files Browse the repository at this point in the history
  • Loading branch information
Erik Johnson committed Nov 24, 2024
1 parent 8c293a4 commit 0b4326e
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 0 deletions.
1 change: 1 addition & 0 deletions bdfr/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
click.option("--max-score", type=int, default=None),
click.option("--min-score-ratio", type=float, default=None),
click.option("--max-score-ratio", type=float, default=None),
click.option("--stop-on-exist", is_flag=True, default=None),
]

_archiver_options = [
Expand Down
1 change: 1 addition & 0 deletions bdfr/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self):
self.upvoted: bool = False
self.user: list[str] = []
self.verbose: int = 0
self.stop_on_exist: bool = False

# Archiver-specific options
self.all_comments = False
Expand Down
3 changes: 3 additions & 0 deletions bdfr/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl
self._apply_logging_handlers(itertools.chain(logging_handlers, [file_log]))
self.run_time = datetime.now().isoformat()
self._setup_internal_objects()
self.existcount=0

self.reddit_lists = self.retrieve_reddit_lists()

Expand Down Expand Up @@ -350,6 +351,8 @@ def get_multireddits(self) -> list[Iterator]:

def create_filtered_listing_generator(self, reddit_source) -> Iterator:
sort_function = self.determine_sort_function()
if self.args.stop_on_exist and sort_function != praw.models.Subreddit.new:
logger.warning("Stopping downloads when an old duplicate is encountered works best when sorted by new.")
if self.sort_filter in (RedditTypes.SortType.TOP, RedditTypes.SortType.CONTROVERSIAL):
return sort_function(reddit_source, limit=self.args.limit, time_filter=self.time_filter.value)
else:
Expand Down
6 changes: 6 additions & 0 deletions bdfr/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,16 @@ def _download_submission(self, submission: praw.models.Submission):
for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory):
if destination.exists():
logger.debug(f"File {destination} from submission {submission.id} already exists, continuing")
if self.args.stop_on_exist and not submission.stickied:
self.existcount+=1
if self.existcount>=5:
logger.warning(f"Prevously-downloaded threshold met, exiting")
exit(0)
continue
elif not self.download_filter.check_resource(res):
logger.debug(f"Download filter removed {submission.id} file with URL {submission.url}")
continue
self.existcount=0
try:
res.download({"max_wait_time": self.args.max_wait_time})
except errors.BulkDownloaderException as e:
Expand Down

0 comments on commit 0b4326e

Please sign in to comment.