From 0e23dcb8ad64a09c4758801bd070e32fbfe07c32 Mon Sep 17 00:00:00 2001 From: Soulsuck24 <79275800+Soulsuck24@users.noreply.github.com> Date: Mon, 30 Jan 2023 14:52:08 -0500 Subject: [PATCH] Gfycat/Redgifs coverage Coverage for direct gfycat links that redirect to redgifs. The redirect through the sites themselves are broken but this fixes that. Coverage for o.imgur links and incorrect capitalisation of domains in download_factory. Changed tests for direct as gfycat is handled by the gfycat downloader. fix pornhub test as the previous video was removed. --- bdfr/site_downloaders/download_factory.py | 8 ++++---- bdfr/site_downloaders/gfycat.py | 2 +- bdfr/site_downloaders/imgur.py | 2 ++ tests/site_downloaders/test_direct.py | 7 +++++-- tests/site_downloaders/test_download_factory.py | 1 + tests/site_downloaders/test_gfycat.py | 11 ++++++++++- tests/site_downloaders/test_imgur.py | 2 ++ tests/site_downloaders/test_pornhub.py | 2 +- 8 files changed, 26 insertions(+), 9 deletions(-) diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 9006681c..578f4d32 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -24,11 +24,13 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> type[BaseDownloader]: - sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r"(i\.|m\.)?imgur", sanitised_url): + sanitised_url = DownloadFactory.sanitise_url(url).lower() + if re.match(r"(i\.|m\.|o\.)?imgur", sanitised_url): return Imgur elif re.match(r"(i\.|thumbs\d\.|v\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url): return Redgifs + elif re.match(r"(thumbs\.|giant\.)?gfycat\.", sanitised_url): + return Gfycat elif re.match(r".*/.*\.[a-zA-Z34]{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource( sanitised_url ): @@ -41,8 +43,6 @@ def pull_lever(url: str) -> type[BaseDownloader]: return Gallery elif re.match(r"patreon\.com.*", sanitised_url): return Gallery - elif re.match(r"gfycat\.", sanitised_url): - return Gfycat elif re.match(r"reddit\.com/r/", sanitised_url): return SelfPost elif re.match(r"(m\.)?youtu\.?be", sanitised_url): diff --git a/bdfr/site_downloaders/gfycat.py b/bdfr/site_downloaders/gfycat.py index d7c60ca6..45246894 100644 --- a/bdfr/site_downloaders/gfycat.py +++ b/bdfr/site_downloaders/gfycat.py @@ -23,7 +23,7 @@ def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> l @staticmethod def _get_link(url: str) -> set[str]: - gfycat_id = re.match(r".*/(.*?)/?$", url).group(1) + gfycat_id = re.match(r".*/(.*?)(?:/?|-.*|\..{3-4})$", url).group(1) url = "https://gfycat.com/" + gfycat_id response = Gfycat.retrieve_url(url) diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index bfcecc09..26c76e63 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -38,6 +38,8 @@ def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> l @staticmethod def _get_data(link: str) -> dict: try: + if link.endswith("/"): + link = link.removesuffix("/") if re.search(r".*/(.*?)(gallery/|a/)", link): imgur_id = re.match(r".*/(?:gallery/|a/)(.*?)(?:/.*)?$", link).group(1) link = f"https://api.imgur.com/3/album/{imgur_id}" diff --git a/tests/site_downloaders/test_direct.py b/tests/site_downloaders/test_direct.py index 14190eef..c4279cdc 100644 --- a/tests/site_downloaders/test_direct.py +++ b/tests/site_downloaders/test_direct.py @@ -13,8 +13,11 @@ @pytest.mark.parametrize( ("test_url", "expected_hash"), ( - ("https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4", "48f9bd4dbec1556d7838885612b13b39"), - ("https://giant.gfycat.com/DazzlingSilkyIguana.mp4", "808941b48fc1e28713d36dd7ed9dc648"), + ("https://i.redd.it/q6ebualjxzea1.jpg", "6ec154859c777cb401132bb991cb3635"), + ( + "https://file-examples.com/wp-content/uploads/2017/11/file_example_MP3_700KB.mp3", + "3caa342e241ddb7d76fd24a834094101", + ), ), ) def test_download_resource(test_url: str, expected_hash: str): diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index f95e609b..0706c69d 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -31,6 +31,7 @@ ), ("https://i.redd.it/affyv0axd5k61.png", Direct), ("https://i.imgur.com/bZx1SJQ.jpg", Imgur), + ("https://i.Imgur.com/bZx1SJQ.jpg", Imgur), ("https://imgur.com/BuzvZwb.gifv", Imgur), ("https://imgur.com/a/MkxAzeg", Imgur), ("https://m.imgur.com/a/py3RW0j", Imgur), diff --git a/tests/site_downloaders/test_gfycat.py b/tests/site_downloaders/test_gfycat.py index 0cfb36f4..2821a7e9 100644 --- a/tests/site_downloaders/test_gfycat.py +++ b/tests/site_downloaders/test_gfycat.py @@ -15,11 +15,17 @@ ( ("https://gfycat.com/definitivecaninecrayfish", "https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4"), ("https://gfycat.com/dazzlingsilkyiguana", "https://giant.gfycat.com/DazzlingSilkyIguana.mp4"), + ("https://gfycat.com/WearyComposedHairstreak", "https://thumbs4.redgifs.com/WearyComposedHairstreak.mp4"), + ( + "https://thumbs.gfycat.com/ComposedWholeBullfrog-size_restricted.gif", + "https://thumbs4.redgifs.com/ComposedWholeBullfrog.mp4", + ), + ("https://giant.gfycat.com/ComposedWholeBullfrog.mp4", "https://thumbs4.redgifs.com/ComposedWholeBullfrog.mp4"), ), ) def test_get_link(test_url: str, expected_url: str): result = Gfycat._get_link(test_url) - assert result.pop() == expected_url + assert expected_url in result.pop() @pytest.mark.online @@ -28,6 +34,9 @@ def test_get_link(test_url: str, expected_url: str): ( ("https://gfycat.com/definitivecaninecrayfish", "48f9bd4dbec1556d7838885612b13b39"), ("https://gfycat.com/dazzlingsilkyiguana", "808941b48fc1e28713d36dd7ed9dc648"), + ("https://gfycat.com/WearyComposedHairstreak", "5f82ba1ba23cc927c9fbb0c0421953a5"), + ("https://thumbs.gfycat.com/ComposedWholeBullfrog-size_restricted.gif", "5292343665a13b5369d889d911ae284d"), + ("https://giant.gfycat.com/ComposedWholeBullfrog.mp4", "5292343665a13b5369d889d911ae284d"), ), ) def test_download_resource(test_url: str, expected_hash: str): diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py index 744488bb..46dd9794 100644 --- a/tests/site_downloaders/test_imgur.py +++ b/tests/site_downloaders/test_imgur.py @@ -15,6 +15,7 @@ ( ("https://imgur.com/a/xWZsDDP", ("f551d6e6b0fef2ce909767338612e31b",)), ("https://imgur.com/gallery/IjJJdlC", ("740b006cf9ec9d6f734b6e8f5130bdab",)), + ("https://imgur.com/gallery/IjJJdlC/", ("740b006cf9ec9d6f734b6e8f5130bdab",)), ( "https://imgur.com/a/dcc84Gt", ( @@ -32,6 +33,7 @@ "fb6c913d721c0bbb96aa65d7f560d385", ), ), + ("https://o.imgur.com/jZw9gq2.jpg", ("6d6ea9aa1d98827a05425338afe675bc",)), ("https://i.imgur.com/lFJai6i.gifv", ("01a6e79a30bec0e644e5da12365d5071",)), ("https://i.imgur.com/ywSyILa.gifv?", ("56d4afc32d2966017c38d98568709b45",)), ("https://imgur.com/ubYwpbk.GIFV", ("d4a774aac1667783f9ed3a1bd02fac0c",)), diff --git a/tests/site_downloaders/test_pornhub.py b/tests/site_downloaders/test_pornhub.py index d9971cb2..894b9f8c 100644 --- a/tests/site_downloaders/test_pornhub.py +++ b/tests/site_downloaders/test_pornhub.py @@ -14,7 +14,7 @@ @pytest.mark.slow @pytest.mark.parametrize( ("test_url", "expected_hash"), - (("https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497", "ad52a0f4fce8f99df0abed17de1d04c7"),), + (("https://www.pornhub.com/view_video.php?viewkey=ph5eafee2d174ff", "d15090cbbaa8ee90500a257c7899ff84"),), ) def test_hash_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock()