From cecd5e9063003ae95d6c014af9234f50396607d5 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Mon, 10 Jul 2023 16:59:00 -0700 Subject: [PATCH 1/3] Replace pkg_resources.resource_stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several alternatives¹. Only one (importlib.resources.as_file + open) avoids reading the entire file into memory by using a temporary file on disk. ¹ https://stackoverflow.com/a/58941536 --- LICENSE.nextstrain-cli | 27 +++++++++++++++++++++++++ augur/data/__init__.py | 32 ++++++++++++++++++++++++++++++ augur/util_support/color_parser.py | 7 +++---- augur/utils.py | 7 +++---- setup.py | 1 + 5 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 LICENSE.nextstrain-cli create mode 100644 augur/data/__init__.py diff --git a/LICENSE.nextstrain-cli b/LICENSE.nextstrain-cli new file mode 100644 index 000000000..6a5c63890 --- /dev/null +++ b/LICENSE.nextstrain-cli @@ -0,0 +1,27 @@ +This license applies to the original copy of resource functions from the +Nextstrain CLI project into this project, incorporated in +"augur/data/__init__.py". Any subsequent modifications to this project's copy of +those functions are licensed under the license of this project, not of +Nextstrain CLI. + +MIT License + +Copyright (c) 2018–2021 Trevor Bedford and Richard Neher + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/augur/data/__init__.py b/augur/data/__init__.py new file mode 100644 index 000000000..e733a7d56 --- /dev/null +++ b/augur/data/__init__.py @@ -0,0 +1,32 @@ +""" +Resource files. +""" +# Originally copied from nextstrain/cli/resources/__init__.py in the Nextstrain +# CLI project¹. +# +# ¹ + +# We gate usage of the stdlib implementation on 3.11 because that's the first +# version with a full adapter for making the new files() / Traversable API +# backwards compatible with importers only providing the original path() / +# ResourceReader API. The PyPI backport, on the other hand, contains the full +# adapter since 5.3.0, which we declare as our minimum version in setup.py, so +# we use that even on 3.9 and 3.10. +# +# We're using the new API at all because the original one is being deprecated +# and we want to avoid warnings both from the stdlib implementation on 3.11 and +# from the PyPI backport implementation on older Python versions. +# -trs, 13 Sept 2022 +import sys + +if sys.version_info >= (3, 11): + from importlib.resources import files as _files, as_file as _as_file +else: + from importlib_resources import files as _files, as_file as _as_file + +from pathlib import Path +from typing import ContextManager + + +def as_file(path: str) -> ContextManager[Path]: + return _as_file(_files(__name__) / path) diff --git a/augur/util_support/color_parser.py b/augur/util_support/color_parser.py index 43cf5f6b9..28ac3a1d8 100644 --- a/augur/util_support/color_parser.py +++ b/augur/util_support/color_parser.py @@ -1,8 +1,7 @@ from collections import defaultdict -from io import TextIOWrapper import functools -from pkg_resources import resource_stream +from augur.data import as_file from augur.util_support.color_parser_line import ColorParserLine @@ -17,8 +16,8 @@ def mapping(self): colors = {} if self.use_defaults: - with resource_stream("augur", "data/colors.tsv") as stream: - with TextIOWrapper(stream, "utf-8") as defaults: + with as_file("colors.tsv") as file: + with open(file, encoding="utf-8") as defaults: colors = {**colors, **self.parse_file(defaults)} if self.mapping_filename: diff --git a/augur/utils.py b/augur/utils.py index 3ef91f61c..d687e62a5 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -5,10 +5,9 @@ import os, json, sys import pandas as pd from collections import defaultdict, OrderedDict -from pkg_resources import resource_stream -from io import TextIOWrapper from .__version__ import __version__ +from augur.data import as_file from augur.io.file import open_file from augur.types import ValidationMode @@ -243,8 +242,8 @@ def add_line_to_coordinates(line): else: print("WARNING: geo-coordinate file contains invalid line. Please make sure not to mix tabs and spaces as delimiters (use only tabs):",line) if use_defaults: - with resource_stream(__package__, "data/lat_longs.tsv") as stream: - with TextIOWrapper(stream, "utf-8") as defaults: + with as_file("lat_longs.tsv") as file: + with open(file, encoding="utf-8") as defaults: for line in defaults: add_line_to_coordinates(line) if overrides: diff --git a/setup.py b/setup.py index 6674da85a..dde6715bf 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ "bcbio-gff >=0.7.0, ==0.7.*", "biopython >=1.67, !=1.77, !=1.78", "cvxopt >=1.1.9, ==1.*", + "importlib_resources >=5.3.0; python_version < '3.11'", "isodate ==0.6.*", "jsonschema >=3.0.0, ==3.*", "networkx >= 2.5, ==2.*", From 13bad551dba56b5196ea6549c2041c9a45aa3ec6 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:15:16 -0700 Subject: [PATCH 2/3] Replace pkg_resources.resource_string Use an approach similar to the previous commit. --- augur/validate.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/augur/validate.py b/augur/validate.py index e8e316257..aefdf5589 100644 --- a/augur/validate.py +++ b/augur/validate.py @@ -3,16 +3,15 @@ """ import sys -import os from collections import defaultdict import json import jsonschema import jsonschema.exceptions import re from itertools import groupby -from pkg_resources import resource_string from textwrap import indent from typing import Iterable, Union +from augur.data import as_file from augur.io.print import print_err from augur.io.json import shorten_as_json from .validate_export import verifyMainJSONIsInternallyConsistent, verifyMetaAndOrTreeJSONsAreInternallyConsistent @@ -31,7 +30,8 @@ def load_json_schema(path, refs=None): (located in augur/data) ''' try: - schema = json.loads(resource_string(__package__, os.path.join("data", path))) + with as_file(path) as file, open(file, "r", encoding = "utf-8") as fh: + schema = json.load(fh) except json.JSONDecodeError as err: raise ValidateError("Schema {} is not a valid JSON file. Error: {}".format(path, err)) # check loaded schema is itself valid -- see http://python-jsonschema.readthedocs.io/en/latest/errors/ @@ -43,7 +43,10 @@ def load_json_schema(path, refs=None): if refs: # Make the validator aware of additional schemas - schema_store = {k: json.loads(resource_string(__package__, os.path.join("data", v))) for k,v in refs.items()} + schema_store = dict() + for k, v in refs.items(): + with as_file(v) as file, open(file, "r", encoding = "utf-8") as fh: + schema_store[k] = json.load(fh) resolver = jsonschema.RefResolver.from_schema(schema,store=schema_store) schema_validator = Validator(schema, resolver=resolver) else: From 2b584098bced0aa6d04fc6a059d01255894b01ba Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:16:12 -0700 Subject: [PATCH 3/3] Replace pkg_resources.parse_version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit packaging.version.Version is the official replacement¹. ¹ https://packaging.pypa.io/en/latest/version.html#packaging.version.Version --- devel/release | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devel/release b/devel/release index cf42810c5..0b864b83d 100755 --- a/devel/release +++ b/devel/release @@ -88,9 +88,9 @@ assert-version-is-new() { version-is-gt() { python3 /dev/stdin "$1" "$2" <<<"$(cut -c 9- <<<' from sys import argv, exit - from pkg_resources import parse_version + from packaging.version import Version - version = list(map(parse_version, argv[1:3])) + version = list(map(Version, argv[1:3])) gt = version[1] > version[0]