diff --git a/schema_salad/fetcher.py b/schema_salad/fetcher.py index 23e6bb6b..72613ea0 100644 --- a/schema_salad/fetcher.py +++ b/schema_salad/fetcher.py @@ -101,7 +101,9 @@ def check_exists(self, url: str) -> bool: split = urllib.parse.urlsplit(url) scheme, path = split.scheme, split.path - if scheme in ["http", "https"] and self.session is not None: + if scheme in ["http", "https"]: + if self.session is None: + raise ValidationException(f"Can't check {scheme} URL, session is None") try: resp = self.session.head(url, allow_redirects=True) resp.raise_for_status() @@ -113,7 +115,7 @@ def check_exists(self, url: str) -> bool: return os.path.exists(urllib.request.url2pathname(str(path))) if scheme == "mailto": return True - raise ValidationException(f"Unsupported scheme in url: {url}") + raise ValidationException(f"Unsupported scheme '{scheme}' in url: {url}") def urljoin(self, base_url: str, url: str) -> str: if url.startswith("_:"): diff --git a/schema_salad/tests/test_ref_resolver.py b/schema_salad/tests/test_ref_resolver.py index 15997607..387e84d0 100644 --- a/schema_salad/tests/test_ref_resolver.py +++ b/schema_salad/tests/test_ref_resolver.py @@ -249,3 +249,13 @@ def aa2(item: Union[CommentedMap, CommentedSeq]) -> bool: with open(path) as f: content = f.read() assert {"foo": "bar", "baz": content, "quux": content} == r3 + + +def test_check_exists_follows_redirects() -> None: + fetcher = DefaultFetcher({}, Session()) + # This tests that the redirect from http to https doesn't cause a + # false positive, this URL doesn't exist and it should return + # False because if it follows the redirect it'll get a 404. Tests + # for previous bug where allow_redirects wasn't set and as a + # result it would return True even though the result was 3xx. + assert not fetcher.check_exists("http://commonwl.org/does/not/exist")