From 853b144e128e9150a30a78169b2a68e82e54b47d Mon Sep 17 00:00:00 2001 From: Jared Deckard Date: Mon, 26 Feb 2024 13:40:05 -0600 Subject: [PATCH] URL User Info Validation (#2244) * Restrict URL userinfo to NWG RFC 3986 * Update changelog --------- Co-authored-by: Steven Loria --- CHANGELOG.rst | 6 ++++++ src/marshmallow/validate.py | 4 ++-- tests/test_validate.py | 9 +++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8a399e82a..5ebce133d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,12 @@ Changelog 3.21.0 (unreleased) ******************* +Bug fixes: + +- Fix validation of ``URL`` fields to allow missing user field, + per NWG RFC 3986 (:issue:`2232`). Thanks :user:`ddennerline3` for reporting + and :user:`deckar01` for the PR. + Other changes: - *Backwards-incompatible*: ``__version__``, ``__parsed_version__``, and ``__version_info__`` diff --git a/src/marshmallow/validate.py b/src/marshmallow/validate.py index 708fed85a..9fb16c86d 100644 --- a/src/marshmallow/validate.py +++ b/src/marshmallow/validate.py @@ -131,9 +131,9 @@ def _regex_generator( # this is validated separately against allowed schemes, so in the regex # we simply want to capture its existence r"(?:[a-z0-9\.\-\+]*)://", - # basic_auth, for URLs encoding a username:password + # userinfo, for URLs encoding authentication # e.g. 'ftp://foo:bar@ftp.example.org/' - r"(?:[^:@]+?(:[^:@]*?)?@|)", + r"(?:(?:[a-z0-9\-._~!$&'()*+,;=:]|%[0-9a-f]{2})*@)?", # netloc, the hostname/domain part of the URL plus the optional port r"(?:", "|".join(hostname_variants), diff --git a/tests/test_validate.py b/tests/test_validate.py index 0d7c56cfa..fcda4e816 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -29,6 +29,9 @@ "http://www.example.com:8000/foo", "http://user@example.com", "http://user:pass@example.com", + "http://:pass@example.com", + "http://@example.com", + "http://AZaz09-._~%2A!$&'()*+,;=:@example.com", ], ) def test_url_absolute_valid(valid_url): @@ -58,6 +61,12 @@ def test_url_absolute_valid(valid_url): " ", "", None, + "http://user@pass@example.com", + "http://@pass@example.com", + "http://@@example.com", + "http://^@example.com", + "http://%0G@example.com", + "http://%@example.com", ], ) def test_url_absolute_invalid(invalid_url):