From aeb807d588ab93c1de7128d56d1dd60d3ef97e6b Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:32:08 -0800 Subject: [PATCH] =?UTF-8?q?Use=20jsonschema=20=E2=89=A54.18.0=20and=20new?= =?UTF-8?q?=20referencing=20library?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In v4.18.0, jsonschema.RefResolver was deprecated in favor of the new referencing library.¹ The intro² and API³ docs were helpful in determining the necessary changes. I've tested that our new usage is not backwards compatible with v4.17.3 and thus updated the minimum requirement to v4.18.0. I chose v0.29.1 as the minimum supported version of referencing because that was the version released alongside jsonschema v4.18.0. The default behavior no longer tries to access the network, so I've reworded the retrieval function comment and error message. Local reference mismatches are now a "PointerToNowhere" error instead of an "Unresolvable JSON pointer" error. It shows the entire schema JSON in the output which can seem unnecessarily verbose, but I think it's fine since this is only intended to show on internal errors with the schema. ¹ https://github.com/python-jsonschema/jsonschema/blob/93e0caa5752947ec77333da81a634afe41a022ed/CHANGELOG.rst#v4180 ² https://python-jsonschema.readthedocs.io/en/stable/referencing/#introduction-to-the-referencing-api ³ https://referencing.readthedocs.io/en/stable/api/#referencing.Registry.with_contents --- augur/validate.py | 30 +++++++++++++++++------------- setup.py | 3 ++- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/augur/validate.py b/augur/validate.py index 364a5d785..1f6bc4ba2 100644 --- a/augur/validate.py +++ b/augur/validate.py @@ -9,6 +9,8 @@ import jsonschema.exceptions import re from itertools import groupby +from referencing import Registry +from referencing.exceptions import NoSuchResource from textwrap import indent from typing import Iterable, Union from augur.data import as_file @@ -48,22 +50,24 @@ def load_json_schema(path, refs=None): for k, v in refs.items(): with as_file(v) as file, open_file(file, "r") as fh: schema_store[k] = json.load(fh) - resolver = jsonschema.RefResolver.from_schema(schema,store=schema_store) - schema_validator = Validator(schema, resolver=resolver) + + # Create a dummy retrieval function to handle URIs not present in + # schema_store. This often indicates a typo (the $ref doesn't match the + # key of the schema_store) or we forgot to add a local mapping for a new + # $ref. + def retrieve(uri): + # Take advantage of the fact that BaseException is not handled by + # Registry.get_or_retrieve. This means the custom error message is + # printed instead of the less helpful default: + # jsonschema.exceptions._WrappedReferencingError: Unresolvable: https://… + raise BaseException(f"The schema used for validation could not resolve a local file for {uri!r}. " + + "Please check the schema used and update the appropriate schema_store as needed." ) + + registry = Registry(retrieve=retrieve).with_contents(schema_store.items()) + schema_validator = Validator(schema, registry=registry) else: schema_validator = Validator(schema) - # By default $ref URLs which we don't define in a schema_store are fetched - # by jsonschema. This often indicates a typo (the $ref doesn't match the key - # of the schema_store) or we forgot to add a local mapping for a new $ref. - # Either way, Augur should not be accessing the network. - def resolve_remote(url): - # The exception type is not important as jsonschema will catch & re-raise as a RefResolutionError - raise Exception(f"The schema used for validation attempted to fetch the remote URL {url!r}. " + - "Augur should resolve schema references to local files, please check the schema used " + - "and update the appropriate schema_store as needed." ) - schema_validator.resolver.resolve_remote = resolve_remote - return schema_validator diff --git a/setup.py b/setup.py index 81aad167d..57b6a5f1c 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ "cvxopt >=1.1.9, ==1.*", "importlib_resources >=5.3.0; python_version < '3.11'", "isodate ==0.6.*", - "jsonschema >=3.0.0, ==3.*", + "jsonschema >=4.18.0, ==4.*", "networkx >= 2.5, <4", "numpy ==1.*", "packaging >=19.2", @@ -65,6 +65,7 @@ "phylo-treetime >=0.11.2, <0.12", "pyfastx >=1.0.0, <3.0", "python_calamine >=0.2.0", + "referencing >=0.29.1, <1.0", "scipy ==1.*", "xopen[zstd] >=1.7.0, <3" # TODO: Deprecated, remove v1 support around November 2024 ],