Skip to content

Commit

Permalink
Merge pull request #341 from openzim/empty_playlist
Browse files Browse the repository at this point in the history
Ignore empty playlists
  • Loading branch information
benoit74 authored Sep 24, 2024
2 parents 0b9bed5 + b8f83e4 commit dfcf7ae
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- Ignore empty playlists (#340)

## [3.1.0] - 2024-09-05

### Added
Expand Down
9 changes: 9 additions & 0 deletions scraper/src/youtube2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,9 +589,15 @@ def extract_videos_list(self):
if all_videos is None:
all_videos = {}

empty_playlists = []
# we only return video_ids that we'll use later on. per-playlist JSON stored
for playlist in self.playlists:
videos_json = get_videos_json(playlist.playlist_id)
if len(videos_json) == 0:
logger.warning(
f"Playlist '{playlist.playlist_id}' is empty, will be ignored"
)
empty_playlists.append(playlist)
# filter in videos within date range and filter away deleted videos
skip_outofrange = functools.partial(
skip_outofrange_videos, self.dateafter
Expand All @@ -602,6 +608,9 @@ def extract_videos_list(self):
{v["contentDetails"]["videoId"]: v for v in filter_videos}
)
save_json(self.cache_dir, "videos", all_videos)

for playlist in empty_playlists:
self.playlists.remove(playlist)
self.videos_ids = [*all_videos.keys()] # unpacking so it's subscriptable

def download_video_files(self, max_concurrency):
Expand Down

0 comments on commit dfcf7ae

Please sign in to comment.