From 5139bbda85b67f76983208e09313051bfa059440 Mon Sep 17 00:00:00 2001 From: M Bussonnier Date: Fri, 4 Oct 2024 18:55:08 -0700 Subject: [PATCH] BUG: fetch_string_and_headers compat: raise in and out of pyodide (#129) * fetch_string_and_headers compat: raise in and out of pyodide Currently only the not_in_pyodide will raise on non-success, because this is the default behavior of urllib, the in_pyodide will not, so I added a raise_for_status. It is better to raise, as otherwise the package parser will potentially get proper URL and not manage to parse it, and decide there is no wheels, while we actually just got an error (404, or maybe 500). In addition wraps both case in a custom local HttpStatusError, so that we can actually catch these errors in the right places when we encounter them. Also add handling for PyPI 404 Now that warehouse set cors to 404, (https://github.com/pypi/warehouse/pull/16339) we need to change the checked exceptions as there is no more network errors. * Remove compat with Pyodide < 0.27 * Update test_compat.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Gyeongjae Choi Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- micropip/_compat/__init__.py | 3 +++ micropip/_compat/_compat_in_pyodide.py | 17 ++++++++++-- micropip/_compat/_compat_not_in_pyodide.py | 16 +++++++++++- micropip/_compat/compatibility_layer.py | 8 ++++++ micropip/package_index.py | 13 +++++++--- tests/conftest.py | 5 ++++ tests/test_compat.py | 30 ++++++++++++++++++++++ tests/test_package_index.py | 4 +-- 8 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 tests/test_compat.py diff --git a/micropip/_compat/__init__.py b/micropip/_compat/__init__.py index 3934238..445e63d 100644 --- a/micropip/_compat/__init__.py +++ b/micropip/_compat/__init__.py @@ -34,6 +34,8 @@ to_js = compatibility_layer.to_js +HttpStatusError = compatibility_layer.HttpStatusError + __all__ = [ "REPODATA_INFO", @@ -45,4 +47,5 @@ "loadPackage", "get_dynlibs", "to_js", + "HttpStatusError", ] diff --git a/micropip/_compat/_compat_in_pyodide.py b/micropip/_compat/_compat_in_pyodide.py index 671fc44..be839de 100644 --- a/micropip/_compat/_compat_in_pyodide.py +++ b/micropip/_compat/_compat_in_pyodide.py @@ -7,7 +7,7 @@ from pyodide._package_loader import get_dynlibs from pyodide.ffi import IN_BROWSER, to_js -from pyodide.http import pyfetch +from pyodide.http import HttpStatusError, pyfetch from .compatibility_layer import CompatibilityLayer @@ -28,6 +28,15 @@ class CompatibilityInPyodide(CompatibilityLayer): + class HttpStatusError(Exception): + status_code: int + message: str + + def __init__(self, status_code: int, message: str): + self.status_code = status_code + self.message = message + super().__init__(message) + @staticmethod def repodata_info() -> dict[str, str]: return REPODATA_INFO @@ -50,7 +59,11 @@ async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes: async def fetch_string_and_headers( url: str, kwargs: dict[str, str] ) -> tuple[str, dict[str, str]]: - response = await pyfetch(url, **kwargs) + try: + response = await pyfetch(url, **kwargs) + response.raise_for_status() + except HttpStatusError as e: + raise CompatibilityInPyodide.HttpStatusError(e.status, str(e)) from e content = await response.string() headers: dict[str, str] = response.headers diff --git a/micropip/_compat/_compat_not_in_pyodide.py b/micropip/_compat/_compat_not_in_pyodide.py index 5fb3cec..96cc53b 100644 --- a/micropip/_compat/_compat_not_in_pyodide.py +++ b/micropip/_compat/_compat_not_in_pyodide.py @@ -1,6 +1,7 @@ import re from pathlib import Path from typing import IO, TYPE_CHECKING, Any +from urllib.error import HTTPError from urllib.request import Request, urlopen from urllib.response import addinfourl @@ -15,6 +16,15 @@ class CompatibilityNotInPyodide(CompatibilityLayer): # Vendored from packaging _canonicalize_regex = re.compile(r"[-_.]+") + class HttpStatusError(Exception): + status_code: int + message: str + + def __init__(self, status_code: int, message: str): + self.status_code = status_code + self.message = message + super().__init__(message) + class loadedPackages(CompatibilityLayer.loadedPackages): @staticmethod def to_py(): @@ -40,7 +50,11 @@ async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes: async def fetch_string_and_headers( url: str, kwargs: dict[str, Any] ) -> tuple[str, dict[str, str]]: - response = CompatibilityNotInPyodide._fetch(url, kwargs=kwargs) + try: + response = CompatibilityNotInPyodide._fetch(url, kwargs=kwargs) + except HTTPError as e: + raise CompatibilityNotInPyodide.HttpStatusError(e.code, str(e)) from e + headers = {k.lower(): v for k, v in response.headers.items()} return response.read().decode(), headers diff --git a/micropip/_compat/compatibility_layer.py b/micropip/_compat/compatibility_layer.py index 07a0c37..b6c6c86 100644 --- a/micropip/_compat/compatibility_layer.py +++ b/micropip/_compat/compatibility_layer.py @@ -13,6 +13,14 @@ class CompatibilityLayer(ABC): All of the following methods / properties must be implemented for use both inside and outside of pyodide. """ + class HttpStatusError(ABC, Exception): + status_code: int + message: str + + @abstractmethod + def __init__(self, status_code: int, message: str): + pass + class loadedPackages(ABC): @staticmethod @abstractmethod diff --git a/micropip/package_index.py b/micropip/package_index.py index 2772b7b..dd3fff8 100644 --- a/micropip/package_index.py +++ b/micropip/package_index.py @@ -10,7 +10,7 @@ from packaging.utils import InvalidWheelFilename from packaging.version import InvalidVersion, Version -from ._compat import fetch_string_and_headers +from ._compat import HttpStatusError, fetch_string_and_headers from ._utils import is_package_compatible, parse_version from .externals.mousebender.simple import from_project_details_html from .wheelinfo import WheelInfo @@ -276,11 +276,16 @@ async def query_package( try: metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs) - except OSError: - continue + except HttpStatusError as e: + if e.status_code == 404: + continue + raise content_type = headers.get("content-type", "").lower() - parser = _select_parser(content_type, name) + try: + parser = _select_parser(content_type, name) + except ValueError as e: + raise ValueError(f"Error trying to decode url: {url}") from e return parser(metadata) else: raise ValueError( diff --git a/tests/conftest.py b/tests/conftest.py index c53d723..af55b88 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -340,6 +340,7 @@ def mock_fetch(monkeypatch, mock_importlib): def _mock_package_index_gen( httpserver, pkgs=("black", "pytest", "numpy", "pytz", "snowballstemmer"), + pkgs_not_found=(), content_type="application/json", suffix="_json.json.gz", ): @@ -355,6 +356,10 @@ def _mock_package_index_gen( content_type=content_type, headers={"Access-Control-Allow-Origin": "*"}, ) + for pkg in pkgs_not_found: + httpserver.expect_request(f"/{base}/{pkg}/").respond_with_data( + "Not found", status=404, content_type="text/plain" + ) index_url = httpserver.url_for(base) diff --git a/tests/test_compat.py b/tests/test_compat.py new file mode 100644 index 0000000..6a886de --- /dev/null +++ b/tests/test_compat.py @@ -0,0 +1,30 @@ +""" +test that function in compati behave the same + +""" + +import pytest +from pytest_pyodide import run_in_pyodide + + +@pytest.mark.driver_timeout(10) +def test_404(selenium_standalone_micropip, httpserver, request): + selenium_standalone_micropip.set_script_timeout(11) + + @run_in_pyodide(packages=["micropip", "packaging"]) + async def _inner_test_404_raise(selenium, url): + import pytest + + from micropip._compat import HttpStatusError, fetch_string_and_headers + + with pytest.raises(HttpStatusError): + await fetch_string_and_headers(url, {}) + + httpserver.expect_request("/404").respond_with_data( + "Not found", + status=404, + content_type="text/plain", + headers={"Access-Control-Allow-Origin": "*"}, + ) + url_404 = httpserver.url_for("/404") + _inner_test_404_raise(selenium_standalone_micropip, url_404) diff --git a/tests/test_package_index.py b/tests/test_package_index.py index 9097877..5552f60 100644 --- a/tests/test_package_index.py +++ b/tests/test_package_index.py @@ -135,8 +135,8 @@ def test_contain_placeholder(): ) async def test_query_package(mock_fixture, pkg1, pkg2, request): gen_mock_server = request.getfixturevalue(mock_fixture) - pkg1_index_url = gen_mock_server(pkgs=[pkg1]) - pkg2_index_url = gen_mock_server(pkgs=[pkg2]) + pkg1_index_url = gen_mock_server(pkgs=[pkg1], pkgs_not_found=[pkg2]) + pkg2_index_url = gen_mock_server(pkgs=[pkg2], pkgs_not_found=[pkg1]) for _index_urls in ( pkg1_index_url,