Skip to content

Commit

Permalink
Unresolver: allow a full URL when un-resolving a domain (#11632)
Browse files Browse the repository at this point in the history
This also checks that the URL has a valid protocol. This is needed for readthedocs/readthedocs-corporate#1887.
  • Loading branch information
stsewd authored Oct 24, 2024
1 parent 647b10b commit c352332
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
12 changes: 11 additions & 1 deletion readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def unresolve_url(self, url, append_indexhtml=True):
parsed_url = urlparse(url)
if parsed_url.scheme not in ["http", "https"]:
raise InvalidSchemeError(parsed_url.scheme)
domain = self.get_domain_from_host(parsed_url.netloc)
domain = parsed_url.hostname
unresolved_domain = self.unresolve_domain(domain)
return self._unresolve(
unresolved_domain=unresolved_domain,
Expand Down Expand Up @@ -551,8 +551,18 @@ def unresolve_domain(self, domain):
Unresolve domain by extracting relevant information from it.
:param str domain: Domain to extract the information from.
It can be a full URL, in that case, only the domain is used.
:returns: A UnresolvedDomain object.
"""
parsed_domain = urlparse(domain)
if parsed_domain.scheme:
if parsed_domain.scheme not in ["http", "https"]:
raise InvalidSchemeError(parsed_domain.scheme)
domain = parsed_domain.hostname

if not domain:
raise InvalidSubdomainError(domain)

public_domain = self.get_domain_from_host(settings.PUBLIC_DOMAIN)
external_domain = self.get_domain_from_host(
settings.RTD_EXTERNAL_VERSION_DOMAIN
Expand Down
28 changes: 28 additions & 0 deletions readthedocs/rtd_tests/tests/test_unresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
InvalidExternalVersionError,
InvalidPathForVersionedProjectError,
InvalidSchemeError,
InvalidSubdomainError,
SuspiciousHostnameError,
TranslationNotFoundError,
TranslationWithoutVersionError,
VersionNotFoundError,
unresolve,
unresolver,
)
from readthedocs.projects.constants import SINGLE_VERSION_WITHOUT_TRANSLATIONS
from readthedocs.projects.models import Domain
Expand Down Expand Up @@ -372,8 +374,34 @@ def test_unresolve_invalid_scheme(self):
"fttp://pip.readthedocs.io/en/latest/",
"fttps://pip.readthedocs.io/en/latest/",
"ssh://pip.readthedocs.io/en/latest/",
"javascript://pip.readthedocs.io/en/latest/",
"://pip.readthedocs.io/en/latest/",
]
for url in invalid_urls:
with pytest.raises(InvalidSchemeError):
unresolve(url)

# A triple slash is interpreted as a URL without domain,
# we don't support that.
with pytest.raises(InvalidSubdomainError):
unresolve("https:///pip.readthedocs.io/en/latest/")

def test_unresolve_domain_with_full_url(self):
result = unresolver.unresolve_domain("https://pip.readthedocs.io/en/latest/")
self.assertIsNone(result.domain)
self.assertEqual(result.project, self.pip)
self.assertTrue(result.is_from_public_domain)
self.assertEqual(result.source_domain, "pip.readthedocs.io")

def test_unresolve_domain_with_full_url_invalid_protocol(self):
invalid_protocols = [
"fttp",
"fttps",
"ssh",
"javascript",
]
for protocol in invalid_protocols:
with pytest.raises(InvalidSchemeError):
unresolver.unresolve_domain(
f"{protocol}://pip.readthedocs.io/en/latest/"
)

0 comments on commit c352332

Please sign in to comment.