Skip to content

Commit

Permalink
Merge pull request #376 from openzim/empty_playlist
Browse files Browse the repository at this point in the history
Check for empty playlists after filtering, and after downloading videos
  • Loading branch information
benoit74 authored Nov 1, 2024
2 parents bc1de9e + 8e351ba commit 7e2dfb5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- Check for empty playlists after filtering, and after downloading videos (#375)

## [3.2.1] - 2024-11-01

### Deprecated
Expand Down
30 changes: 21 additions & 9 deletions scraper/src/youtube2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,18 +565,19 @@ def extract_videos_list(self):
# we only return video_ids that we'll use later on. per-playlist JSON stored
for playlist in self.playlists:
videos_json = get_videos_json(playlist.playlist_id)
if len(videos_json) == 0:
logger.warning(
f"Playlist '{playlist.playlist_id}' is empty, will be ignored"
)
empty_playlists.append(playlist)
# filter in videos within date range and filter away deleted videos
skip_outofrange = functools.partial(
skip_outofrange_videos, self.dateafter
)
filter_videos = filter(skip_outofrange, videos_json)
filter_videos = filter(skip_deleted_videos, filter_videos)
filter_videos = filter(skip_non_public_videos, filter_videos)
filter_videos = list(filter_videos)
if len(filter_videos) == 0:
logger.warning(
f"Playlist '{playlist.playlist_id}' is empty, will be ignored"
)
empty_playlists.append(playlist)
all_videos.update(
{v["contentDetails"]["videoId"]: v for v in filter_videos}
)
Expand Down Expand Up @@ -1154,10 +1155,21 @@ def get_playlist_slug(playlist) -> str:
home_playlist_list = []

main_playlist_slug = None
if len(self.playlists) > 0:
main_playlist_slug = get_playlist_slug(
self.playlists[0]
) # set first playlist as main playlist
empty_playlists = list(
filter(lambda playlist: len(get_videos_list(playlist)) == 0, self.playlists)
)
for empty_playlist in empty_playlists:
logger.warning(
f"Removing finally empty playlist {empty_playlist.playlist_id}"
)
self.playlists.remove(empty_playlist)

if len(self.playlists) == 0:
raise Exception("No playlist succeeded to download")

main_playlist_slug = get_playlist_slug(
self.playlists[0]
) # set first playlist as main playlist

for playlist in self.playlists:
playlist_slug = get_playlist_slug(playlist)
Expand Down

0 comments on commit 7e2dfb5

Please sign in to comment.