From 31388b97c8cac7591e0cb15a433096b6408a417e Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 7 Sep 2024 17:20:11 +0200 Subject: [PATCH 1/6] Add strict static typing --- src/pystow/api.py | 12 ++++++-- src/pystow/cli.py | 1 + src/pystow/impl.py | 2 +- src/pystow/utils.py | 71 ++++++++++++++++++++++++++++----------------- tox.ini | 2 +- 5 files changed, 57 insertions(+), 31 deletions(-) diff --git a/src/pystow/api.py b/src/pystow/api.py index fb16837..7445bb5 100644 --- a/src/pystow/api.py +++ b/src/pystow/api.py @@ -4,7 +4,15 @@ from contextlib import contextmanager from pathlib import Path -from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Union +from typing import ( + TYPE_CHECKING, + Any, + ContextManager, + Mapping, + Optional, + Sequence, + Union, +) from .constants import JSON, Opener, Provider from .impl import Module @@ -1197,7 +1205,7 @@ def ensure_tar_df( force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, read_csv_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> "pd.DataFrame": """Download a tar file and open an inner file as a dataframe with :mod:`pandas`. :param key: The module name diff --git a/src/pystow/cli.py b/src/pystow/cli.py index 55fb618..b2a4021 100644 --- a/src/pystow/cli.py +++ b/src/pystow/cli.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # flake8: noqa +# type: ignore """Command line interface for PyStow.""" diff --git a/src/pystow/impl.py b/src/pystow/impl.py index 32cc5f0..c6cfd53 100644 --- a/src/pystow/impl.py +++ b/src/pystow/impl.py @@ -1127,7 +1127,7 @@ def ensure_tar_xml( force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, parse_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> "lxml.etree.ElementTree": """Download a tar file and open an inner file as an XML with :mod:`lxml`. :param subkeys: diff --git a/src/pystow/utils.py b/src/pystow/utils.py index 801f912..2a9ff92 100644 --- a/src/pystow/utils.py +++ b/src/pystow/utils.py @@ -18,7 +18,16 @@ from io import BytesIO, StringIO from pathlib import Path, PurePosixPath from subprocess import check_output # noqa: S404 -from typing import Any, Collection, Iterable, Iterator, Mapping, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Iterable, + Iterator, + Mapping, + Optional, + Union, +) from urllib.parse import urlparse from urllib.request import urlretrieve from uuid import uuid4 @@ -34,6 +43,12 @@ README_TEXT, ) +if TYPE_CHECKING: + import lxml.etree + import numpy.typing + import pandas + import rdflib + __all__ = [ # Data Structures "HexDigestMismatch", @@ -484,11 +499,12 @@ def n() -> str: return str(uuid4()) -def get_df_io(df, sep: str = "\t", index: bool = False, **kwargs) -> BytesIO: +def get_df_io( + df: "pandas.DataFrame", sep: str = "\t", index: bool = False, **kwargs: Any +) -> BytesIO: """Get the dataframe as bytes. :param df: A dataframe - :type df: pandas.DataFrame :param sep: The separator in the dataframe. Overrides Pandas default to use a tab. :param index: Should the index be output? Overrides the Pandas default to be false. :param kwargs: Additional kwargs to pass to :func:`pandas.DataFrame.to_csv`. @@ -501,7 +517,7 @@ def get_df_io(df, sep: str = "\t", index: bool = False, **kwargs) -> BytesIO: return bio -def get_np_io(arr, **kwargs) -> BytesIO: +def get_np_io(arr, **kwargs: Any) -> BytesIO: """Get the numpy object as bytes. :param arr: Array-like @@ -519,7 +535,7 @@ def get_np_io(arr, **kwargs) -> BytesIO: def write_pickle_gz( obj, path: Union[str, Path], - **kwargs, + **kwargs: Any, ) -> None: """Write an object to a gzipped pickle. @@ -533,11 +549,11 @@ def write_pickle_gz( def write_lzma_csv( - df, + df: "pandas.DataFrame", path: Union[str, Path], sep="\t", index: bool = False, - **kwargs, + **kwargs: Any, ): """Write a dataframe as an lzma-compressed file. @@ -556,12 +572,12 @@ def write_lzma_csv( def write_zipfile_csv( - df, + df: "pandas.DataFrame", path: Union[str, Path], inner_path: str, sep="\t", index: bool = False, - **kwargs, + **kwargs: Any, ) -> None: """Write a dataframe to an inner CSV file to a zip archive. @@ -581,7 +597,9 @@ def write_zipfile_csv( file.write(bytes_io.read()) -def read_zipfile_csv(path: Union[str, Path], inner_path: str, sep: str = "\t", **kwargs): +def read_zipfile_csv( + path: Union[str, Path], inner_path: str, sep: str = "\t", **kwargs: Any +) -> "pandas.DataFrame": """Read an inner CSV file from a zip archive. :param path: The path to the zip archive @@ -589,7 +607,6 @@ def read_zipfile_csv(path: Union[str, Path], inner_path: str, sep: str = "\t", * :param sep: The separator in the dataframe. Overrides Pandas default to use a tab. :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`. :return: A dataframe - :rtype: pandas.DataFrame """ import pandas as pd @@ -599,7 +616,7 @@ def read_zipfile_csv(path: Union[str, Path], inner_path: str, sep: str = "\t", * def write_zipfile_xml( - element_tree, + element_tree: "lxml.etree.ElementTree", path: Union[str, Path], inner_path: str, **kwargs, @@ -607,7 +624,6 @@ def write_zipfile_xml( """Write an XML element tree to an inner XML file to a zip archive. :param element_tree: An XML element tree - :type element_tree: lxml.etree.ElementTree :param path: The path to the resulting zip archive :param inner_path: The path inside the zip archive to write the dataframe :param kwargs: Additional kwargs to pass to :func:`tostring` @@ -620,14 +636,15 @@ def write_zipfile_xml( file.write(etree.tostring(element_tree, **kwargs)) -def read_zipfile_xml(path: Union[str, Path], inner_path: str, **kwargs): +def read_zipfile_xml( + path: Union[str, Path], inner_path: str, **kwargs: Any +) -> "lxml.etree.ElementTree": """Read an inner XML file from a zip archive. :param path: The path to the zip archive :param inner_path: The path inside the zip archive to the xml file :param kwargs: Additional kwargs to pass to :func:`lxml.etree.parse` :return: An XML element tree - :rtype: lxml.etree.ElementTree """ from lxml import etree @@ -637,7 +654,7 @@ def read_zipfile_xml(path: Union[str, Path], inner_path: str, **kwargs): def write_zipfile_np( - arr, + arr: "numpy.typing.ArrayLike", path: Union[str, Path], inner_path: str, **kwargs, @@ -657,14 +674,13 @@ def write_zipfile_np( file.write(bytes_io.read()) -def read_zip_np(path: Union[str, Path], inner_path: str, **kwargs): +def read_zip_np(path: Union[str, Path], inner_path: str, **kwargs: Any) -> "numpy.typing.ArrayLike": """Read an inner numpy array-like from a zip archive. :param path: The path to the zip archive :param inner_path: The path inside the zip archive to the dataframe :param kwargs: Additional kwargs to pass to :func:`numpy.load`. :return: A numpy array or other object - :rtype: numpy.typing.ArrayLike """ import numpy as np @@ -673,14 +689,13 @@ def read_zip_np(path: Union[str, Path], inner_path: str, **kwargs): return np.load(file, **kwargs) -def read_zipfile_rdf(path: Union[str, Path], inner_path: str, **kwargs): +def read_zipfile_rdf(path: Union[str, Path], inner_path: str, **kwargs: Any) -> "rdflib.Graph": """Read an inner RDF file from a zip archive. :param path: The path to the zip archive :param inner_path: The path inside the zip archive to the dataframe :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`. :return: A dataframe - :rtype: rdflib.Graph """ import rdflib @@ -692,7 +707,7 @@ def read_zipfile_rdf(path: Union[str, Path], inner_path: str, **kwargs): def write_tarfile_csv( - df, + df: "pandas.DataFrame", path: Union[str, Path], inner_path: str, sep: str = "\t", @@ -718,7 +733,9 @@ def write_tarfile_csv( tar_file.addfile(tarinfo, BytesIO(s.encode("utf-8"))) -def read_tarfile_csv(path: Union[str, Path], inner_path: str, sep: str = "\t", **kwargs): +def read_tarfile_csv( + path: Union[str, Path], inner_path: str, sep: str = "\t", **kwargs: Any +) -> "pandas.DataFrame": """Read an inner CSV file from a tar archive. :param path: The path to the tar archive @@ -726,7 +743,6 @@ def read_tarfile_csv(path: Union[str, Path], inner_path: str, sep: str = "\t", * :param sep: The separator in the dataframe. Overrides Pandas default to use a tab. :param kwargs: Additional kwargs to pass to :func:`pandas.read_csv`. :return: A dataframe - :rtype: pandas.DataFrame """ import pandas as pd @@ -735,7 +751,9 @@ def read_tarfile_csv(path: Union[str, Path], inner_path: str, sep: str = "\t", * return pd.read_csv(file, sep=sep, **kwargs) -def read_tarfile_xml(path: Union[str, Path], inner_path: str, **kwargs): +def read_tarfile_xml( + path: Union[str, Path], inner_path: str, **kwargs: Any +) -> "lxml.etree.ElementTree": """Read an inner XML file from a tar archive. :param path: The path to the tar archive @@ -751,13 +769,12 @@ def read_tarfile_xml(path: Union[str, Path], inner_path: str, **kwargs): return etree.parse(file, **kwargs) -def read_rdf(path: Union[str, Path], **kwargs): +def read_rdf(path: Union[str, Path], **kwargs: Any) -> "rdflib.Graph": """Read an RDF file with :mod:`rdflib`. :param path: The path to the RDF file :param kwargs: Additional kwargs to pass to :func:`rdflib.Graph.parse` :return: A parsed RDF graph - :rtype: rdflib.Graph """ import rdflib @@ -773,7 +790,7 @@ def read_rdf(path: Union[str, Path], **kwargs): return graph -def write_sql(df, name: str, path: Union[str, Path], **kwargs) -> None: +def write_sql(df: "pandas.DataFrame", name: str, path: Union[str, Path], **kwargs: Any) -> None: """Write a dataframe as a SQL table. :param df: A dataframe diff --git a/tox.ini b/tox.ini index 5b01431..0ff69bd 100644 --- a/tox.ini +++ b/tox.ini @@ -106,7 +106,7 @@ deps = types-requests skip_install = true commands = - mypy --install-types --non-interactive --ignore-missing-imports src/pystow/ + mypy --install-types --non-interactive --ignore-missing-imports --strict src/pystow/ description = Run the mypy tool to check static typing on the project. [testenv:pyroma] From 5da0a28623ca89d586c9291b802c7b85a6338c48 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 7 Sep 2024 17:28:59 +0200 Subject: [PATCH 2/6] More types --- src/pystow/api.py | 3 ++- src/pystow/constants.py | 4 ++-- src/pystow/impl.py | 41 ++++++++++++++++++++++++----------------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/pystow/api.py b/src/pystow/api.py index 7445bb5..f0b7a9e 100644 --- a/src/pystow/api.py +++ b/src/pystow/api.py @@ -8,6 +8,7 @@ TYPE_CHECKING, Any, ContextManager, + Generator, Mapping, Optional, Sequence, @@ -1630,7 +1631,7 @@ def ensure_open_sqlite_gz( name: Optional[str] = None, force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> Generator[str, None, None]: """Ensure and connect to a gzipped SQLite database. :param key: diff --git a/src/pystow/constants.py b/src/pystow/constants.py index 719fb58..1f531a7 100644 --- a/src/pystow/constants.py +++ b/src/pystow/constants.py @@ -3,7 +3,7 @@ """PyStow constants.""" from textwrap import dedent -from typing import IO, Any, Callable, Iterator +from typing import IO, Any, Callable, Generator, Iterator __all__ = [ "PYSTOW_NAME_ENVVAR", @@ -68,6 +68,6 @@ """ ) -Opener = Iterator[IO] +Opener = Generator[IO, None, None] JSON = Any Provider = Callable[..., None] diff --git a/src/pystow/impl.py b/src/pystow/impl.py index c6cfd53..61749c9 100644 --- a/src/pystow/impl.py +++ b/src/pystow/impl.py @@ -7,11 +7,21 @@ import json import logging import lzma +import sqlite3 import tarfile import zipfile from contextlib import closing, contextmanager from pathlib import Path -from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generator, + Mapping, + Optional, + Sequence, + Union, +) from . import utils from .constants import JSON, Opener, Provider @@ -35,11 +45,12 @@ try: import pickle5 as pickle except ImportError: - import pickle # type:ignore + import pickle if TYPE_CHECKING: import botocore.client import lxml.etree + import numpy import pandas as pd import rdflib @@ -182,7 +193,7 @@ def ensure_custom( name: str, force: bool = False, provider: Provider, - **kwargs, + **kwargs: Any, ) -> Path: """Ensure a file is present, and run a custom create function otherwise. @@ -659,7 +670,7 @@ def dump_df( name: str, obj: "pd.DataFrame", sep: str = "\t", - index=False, + index: bool = False, to_csv_kwargs: Optional[Mapping[str, Any]] = None, ) -> None: """Dump a dataframe to a TSV file with :mod:`pandas`. @@ -729,7 +740,7 @@ def ensure_json_bz2( download_kwargs: Optional[Mapping[str, Any]] = None, open_kwargs: Optional[Mapping[str, Any]] = None, json_load_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> JSON: """Download BZ2-compressed JSON and open with :mod:`json`. :param subkeys: @@ -1102,7 +1113,7 @@ def dump_xml( obj: "lxml.etree.ElementTree", open_kwargs: Optional[Mapping[str, Any]] = None, write_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> None: """Dump an XML element tree to a file with :mod:`lxml`. :param subkeys: @@ -1200,7 +1211,7 @@ def ensure_zip_np( force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, load_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> "numpy.typing.ArrayLike": """Download a zip file and open an inner file as an array-like with :mod:`numpy`. :param subkeys: @@ -1269,11 +1280,11 @@ def ensure_rdf( cache_path = path.with_suffix(path.suffix + ".pickle.gz") if cache_path.exists() and not force: with gzip.open(cache_path, "rb") as file: - return pickle.load(file) # type: ignore + return pickle.load(file) rv = read_rdf(path=path, **(parse_kwargs or {})) with gzip.open(cache_path, "wb") as file: - pickle.dump(rv, file, protocol=pickle.HIGHEST_PROTOCOL) # type: ignore + pickle.dump(rv, file, protocol=pickle.HIGHEST_PROTOCOL) return rv def load_rdf( @@ -1303,7 +1314,7 @@ def dump_rdf( obj: "rdflib.Graph", format: str = "turtle", serialize_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> None: """Dump an RDF graph to a file with :mod:`rdflib`. :param subkeys: @@ -1409,7 +1420,7 @@ def ensure_open_sqlite( name: Optional[str] = None, force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a SQLite database. :param subkeys: @@ -1435,8 +1446,6 @@ def ensure_open_sqlite( >>> with module.ensure_open_sqlite(url=url) as conn: >>> df = pd.read_sql(sql, conn) """ - import sqlite3 - path = self.ensure( *subkeys, url=url, name=name, force=force, download_kwargs=download_kwargs ) @@ -1451,7 +1460,7 @@ def ensure_open_sqlite_gz( name: Optional[str] = None, force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, - ): + ) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a SQLite database that's gzipped. Unfortunately, it's a paid feature to directly read gzipped sqlite files, @@ -1480,8 +1489,6 @@ def ensure_open_sqlite_gz( >>> with module.ensure_open_sqlite_gz(url=url) as conn: >>> df = pd.read_sql(sql, conn) """ - import sqlite3 - path = self.ensure_gunzip( *subkeys, url=url, name=name, force=force, download_kwargs=download_kwargs ) @@ -1489,7 +1496,7 @@ def ensure_open_sqlite_gz( yield conn -def _clean_csv_kwargs(read_csv_kwargs): +def _clean_csv_kwargs(read_csv_kwargs: Mapping[str, Any] | None) -> Dict[str, Any]: read_csv_kwargs = {} if read_csv_kwargs is None else dict(read_csv_kwargs) read_csv_kwargs.setdefault("sep", "\t") return read_csv_kwargs From 78f04153a24393026b4b916b6e2f9401bb27a9d7 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 7 Sep 2024 17:39:54 +0200 Subject: [PATCH 3/6] Finish --- src/pystow/__main__.py | 1 + src/pystow/api.py | 27 ++++++++++++++------------- src/pystow/cache.py | 6 +++--- src/pystow/cli.py | 2 +- src/pystow/config_api.py | 8 ++++---- src/pystow/constants.py | 6 ++++-- src/pystow/impl.py | 6 +++--- src/pystow/utils.py | 31 +++++++++++++++---------------- 8 files changed, 45 insertions(+), 42 deletions(-) diff --git a/src/pystow/__main__.py b/src/pystow/__main__.py index 0405cdf..350e058 100644 --- a/src/pystow/__main__.py +++ b/src/pystow/__main__.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# type: ignore """Command line interface for PyStow.""" diff --git a/src/pystow/api.py b/src/pystow/api.py index f0b7a9e..7ac6d9e 100644 --- a/src/pystow/api.py +++ b/src/pystow/api.py @@ -2,6 +2,7 @@ """API functions for PyStow.""" +import sqlite3 from contextlib import contextmanager from pathlib import Path from typing import ( @@ -15,7 +16,7 @@ Union, ) -from .constants import JSON, Opener, Provider +from .constants import JSON, BytesOpener, Opener, Provider from .impl import Module if TYPE_CHECKING: @@ -124,7 +125,7 @@ def open( name: str, mode: str = "r", open_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> Opener: """Open a file that exists already. :param key: @@ -152,7 +153,7 @@ def open_gz( name: str, mode: str = "rt", open_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> Opener: """Open a gzipped file that exists already. :param key: @@ -214,7 +215,7 @@ def ensure_custom( name: str, force: bool = False, provider: Provider, - **kwargs, + **kwargs: Any, ) -> Path: """Ensure a file is present, and run a custom create function otherwise. @@ -391,7 +392,7 @@ def ensure_open_zip( download_kwargs: Optional[Mapping[str, Any]] = None, mode: str = "r", open_kwargs: Optional[Mapping[str, Any]] = None, -) -> Opener: +) -> BytesOpener: """Ensure a file is downloaded then open it with :mod:`zipfile`. :param key: @@ -489,7 +490,7 @@ def ensure_open_tarfile( download_kwargs: Optional[Mapping[str, Any]] = None, mode: str = "r", open_kwargs: Optional[Mapping[str, Any]] = None, -) -> Opener: +) -> BytesOpener: """Ensure a tar file is downloaded and open a file inside it. :param key: @@ -717,7 +718,7 @@ def dump_df( name: str, obj: "pd.DataFrame", sep: str = "\t", - index=False, + index: bool = False, to_csv_kwargs: Optional[Mapping[str, Any]] = None, ) -> None: """Dump a dataframe to a TSV file with :mod:`pandas`. @@ -1250,7 +1251,7 @@ def ensure_tar_xml( force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, parse_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> "lxml.etree.ElementTree": """Download a tar file and open an inner file as an XML with :mod:`lxml`. :param key: The module name @@ -1462,7 +1463,7 @@ def dump_rdf( obj: "rdflib.Graph", format: str = "turtle", serialize_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> None: """Dump an RDF graph to a file with :mod:`rdflib`. :param key: @@ -1489,7 +1490,7 @@ def ensure_from_s3( s3_key: Union[str, Sequence[str]], name: Optional[str] = None, force: bool = False, - **kwargs, + **kwargs: Any, ) -> Path: """Ensure a file is downloaded. @@ -1521,7 +1522,7 @@ def ensure_from_s3( """ _module = Module.from_key(key, ensure_exists=True) return _module.ensure_from_s3( - *subkeys, s3_bucket=s3_bucket, s3_key=s3_key, name=name, force=force + *subkeys, s3_bucket=s3_bucket, s3_key=s3_key, name=name, force=force, **kwargs ) @@ -1587,7 +1588,7 @@ def ensure_open_sqlite( name: Optional[str] = None, force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, -): +) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a SQLite database. :param key: @@ -1631,7 +1632,7 @@ def ensure_open_sqlite_gz( name: Optional[str] = None, force: bool = False, download_kwargs: Optional[Mapping[str, Any]] = None, -) -> Generator[str, None, None]: +) -> Generator[sqlite3.Connection, None, None]: """Ensure and connect to a gzipped SQLite database. :param key: diff --git a/src/pystow/cache.py b/src/pystow/cache.py index 04bff89..d1aca7b 100644 --- a/src/pystow/cache.py +++ b/src/pystow/cache.py @@ -25,7 +25,7 @@ try: import pickle5 as pickle except ImportError: - import pickle # type:ignore + import pickle if TYPE_CHECKING: import pandas as pd @@ -57,7 +57,7 @@ class Cached(Generic[X], ABC): def __init__( self, - path: Union[str, Path, os.PathLike], + path: Union[str, Path], force: bool = False, ) -> None: """Instantiate the decorator. @@ -165,7 +165,7 @@ class CachedDataFrame(Cached["pd.DataFrame"]): def __init__( self, - path: Union[str, Path, os.PathLike], + path: Union[str, Path], force: bool = False, sep: Optional[str] = None, dtype: Optional[Any] = None, diff --git a/src/pystow/cli.py b/src/pystow/cli.py index b2a4021..2de3bfb 100644 --- a/src/pystow/cli.py +++ b/src/pystow/cli.py @@ -11,7 +11,7 @@ @click.group() -def main(): +def main() -> None: """Run the PyStow CLI.""" diff --git a/src/pystow/config_api.py b/src/pystow/config_api.py index 91fa599..34d92b4 100644 --- a/src/pystow/config_api.py +++ b/src/pystow/config_api.py @@ -7,7 +7,7 @@ from functools import lru_cache from pathlib import Path from textwrap import dedent -from typing import Optional, Type, TypeVar +from typing import Any, Callable, Optional, Type, TypeVar, Union from .utils import getenv_path @@ -131,7 +131,7 @@ def get_config( default: Optional[X] = None, dtype: Optional[Type[X]] = None, raise_on_missing: bool = False, -): +) -> Any: """Get a configuration value. :param module: Name of the module (e.g., ``pybel``) to get configuration for @@ -159,13 +159,13 @@ def get_config( return _cast(rv, dtype) -def _cast(rv, dtype): +def _cast(rv: Any, dtype: Union[None, Callable[..., Any]]) -> Any: if not isinstance(rv, str): # if it's not a string, it doesn't need munging return rv if dtype in (None, str): # no munging necessary return rv if dtype in (int, float): - return dtype(rv) # type: ignore + return dtype(rv) if dtype is bool: if rv.lower() in ("t", "true", "yes", "1", 1, True): return True diff --git a/src/pystow/constants.py b/src/pystow/constants.py index 1f531a7..ec08963 100644 --- a/src/pystow/constants.py +++ b/src/pystow/constants.py @@ -2,8 +2,9 @@ """PyStow constants.""" +from io import BytesIO, StringIO from textwrap import dedent -from typing import IO, Any, Callable, Generator, Iterator +from typing import IO, Any, Callable, Generator __all__ = [ "PYSTOW_NAME_ENVVAR", @@ -68,6 +69,7 @@ """ ) -Opener = Generator[IO, None, None] +Opener = Generator[StringIO, None, None] +BytesOpener = Generator[IO[bytes], None, None] JSON = Any Provider = Callable[..., None] diff --git a/src/pystow/impl.py b/src/pystow/impl.py index 61749c9..e439136 100644 --- a/src/pystow/impl.py +++ b/src/pystow/impl.py @@ -24,7 +24,7 @@ ) from . import utils -from .constants import JSON, Opener, Provider +from .constants import JSON, BytesOpener, Opener, Provider from .utils import ( base_from_gzip_name, download_from_google, @@ -459,7 +459,7 @@ def ensure_open_tarfile( download_kwargs: Optional[Mapping[str, Any]] = None, mode: str = "r", open_kwargs: Optional[Mapping[str, Any]] = None, - ) -> Opener: + ) -> BytesOpener: """Ensure a tar file is downloaded and open a file inside it. :param subkeys: @@ -501,7 +501,7 @@ def ensure_open_zip( download_kwargs: Optional[Mapping[str, Any]] = None, mode: str = "r", open_kwargs: Optional[Mapping[str, Any]] = None, - ) -> Opener: + ) -> BytesOpener: """Ensure a file is downloaded then open it with :mod:`zipfile`. :param subkeys: diff --git a/src/pystow/utils.py b/src/pystow/utils.py index 2a9ff92..6e2beea 100644 --- a/src/pystow/utils.py +++ b/src/pystow/utils.py @@ -44,6 +44,7 @@ ) if TYPE_CHECKING: + import botocore.client import lxml.etree import numpy.typing import pandas @@ -119,7 +120,7 @@ def __init__(self, offending_hexdigests: Collection[HexDigestMismatch]): """ self.offending_hexdigests = offending_hexdigests - def __str__(self): # noqa:D105 + def __str__(self) -> str: # noqa:D105 return "\n".join( ( "Hexdigest of downloaded file does not match the expected ones!", @@ -243,7 +244,7 @@ def get_hashes( # calculate hash sums of file incrementally buffer = memoryview(bytearray(chunk_size)) with path.open("rb", buffering=0) as file: - for this_chunk_size in iter(lambda: file.readinto(buffer), 0): # type: ignore + for this_chunk_size in iter(lambda: file.readinto(buffer), 0): for alg in algorithms.values(): alg.update(buffer[:this_chunk_size]) @@ -281,7 +282,7 @@ def raise_on_digest_mismatch( raise HexDigestError(offending_hexdigests) -class TqdmReportHook(tqdm): +class TqdmReportHook(tqdm): # type:ignore """A custom progress bar that can be used with urllib. Based on https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5 @@ -387,7 +388,7 @@ def download( # Solution for progres bar from https://stackoverflow.com/a/63831344/5775947 total_size = int(response.headers.get("Content-Length", 0)) # Decompress if needed - response.raw.read = partial(response.raw.read, decode_content=True) + response.raw.read = partial(response.raw.read, decode_content=True) # type:ignore with tqdm.wrapattr(response.raw, "read", total=total_size, **_tqdm_kwargs) as fsrc: shutil.copyfileobj(fsrc, file) else: @@ -517,7 +518,7 @@ def get_df_io( return bio -def get_np_io(arr, **kwargs: Any) -> BytesIO: +def get_np_io(arr: "numpy.typing.ArrayLike", **kwargs: Any) -> BytesIO: """Get the numpy object as bytes. :param arr: Array-like @@ -533,7 +534,7 @@ def get_np_io(arr, **kwargs: Any) -> BytesIO: def write_pickle_gz( - obj, + obj: Any, path: Union[str, Path], **kwargs: Any, ) -> None: @@ -551,10 +552,10 @@ def write_pickle_gz( def write_lzma_csv( df: "pandas.DataFrame", path: Union[str, Path], - sep="\t", + sep: str = "\t", index: bool = False, **kwargs: Any, -): +) -> None: """Write a dataframe as an lzma-compressed file. :param df: A dataframe @@ -575,7 +576,7 @@ def write_zipfile_csv( df: "pandas.DataFrame", path: Union[str, Path], inner_path: str, - sep="\t", + sep: str = "\t", index: bool = False, **kwargs: Any, ) -> None: @@ -619,7 +620,7 @@ def write_zipfile_xml( element_tree: "lxml.etree.ElementTree", path: Union[str, Path], inner_path: str, - **kwargs, + **kwargs: Any, ) -> None: """Write an XML element tree to an inner XML file to a zip archive. @@ -657,7 +658,7 @@ def write_zipfile_np( arr: "numpy.typing.ArrayLike", path: Union[str, Path], inner_path: str, - **kwargs, + **kwargs: Any, ) -> None: """Write a dataframe to an inner CSV file to a zip archive. @@ -712,7 +713,7 @@ def write_tarfile_csv( inner_path: str, sep: str = "\t", index: bool = False, - **kwargs, + **kwargs: Any, ) -> None: """Write a dataframe to an inner CSV file from a tar archive. @@ -782,9 +783,7 @@ def read_rdf(path: Union[str, Path], **kwargs: Any) -> "rdflib.Graph": path = Path(path) graph = rdflib.Graph() with ( - gzip.open(path, "rb") # type: ignore - if isinstance(path, Path) and path.suffix == ".gz" - else open(path) + gzip.open(path, "rb") if isinstance(path, Path) and path.suffix == ".gz" else open(path) ) as file: graph.parse(file, **kwargs) return graph @@ -891,7 +890,7 @@ def download_from_s3( s3_bucket: str, s3_key: str, path: Union[str, Path], - client=None, + client: Union[None, "botocore.client.BaseClient"] = None, client_kwargs: Optional[Mapping[str, Any]] = None, download_file_kwargs: Optional[Mapping[str, Any]] = None, force: bool = True, From 03e749bcd279333cee13a25f6b0ba62fb2f5e5ca Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 7 Sep 2024 17:41:32 +0200 Subject: [PATCH 4/6] Final cleanup --- setup.cfg | 2 +- src/pystow/api.py | 1 - src/pystow/cache.py | 1 - src/pystow/constants.py | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 0aa2212..9427f05 100644 --- a/setup.cfg +++ b/setup.cfg @@ -74,7 +74,7 @@ tests = pytest requests_file docs = - sphinx + sphinx<8.0 sphinx-rtd-theme sphinx-click sphinx-autodoc-typehints diff --git a/src/pystow/api.py b/src/pystow/api.py index 7ac6d9e..291b4e0 100644 --- a/src/pystow/api.py +++ b/src/pystow/api.py @@ -8,7 +8,6 @@ from typing import ( TYPE_CHECKING, Any, - ContextManager, Generator, Mapping, Optional, diff --git a/src/pystow/cache.py b/src/pystow/cache.py index d1aca7b..83651dd 100644 --- a/src/pystow/cache.py +++ b/src/pystow/cache.py @@ -5,7 +5,6 @@ import functools import json import logging -import os from abc import ABC, abstractmethod from pathlib import Path from typing import ( diff --git a/src/pystow/constants.py b/src/pystow/constants.py index ec08963..408a77c 100644 --- a/src/pystow/constants.py +++ b/src/pystow/constants.py @@ -2,7 +2,7 @@ """PyStow constants.""" -from io import BytesIO, StringIO +from io import StringIO from textwrap import dedent from typing import IO, Any, Callable, Generator From 1bd6470ba431e9bd8ae9c2270ce7c81bb96819d3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 7 Sep 2024 17:42:36 +0200 Subject: [PATCH 5/6] Update impl.py --- src/pystow/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pystow/impl.py b/src/pystow/impl.py index e439136..e6be937 100644 --- a/src/pystow/impl.py +++ b/src/pystow/impl.py @@ -1496,7 +1496,7 @@ def ensure_open_sqlite_gz( yield conn -def _clean_csv_kwargs(read_csv_kwargs: Mapping[str, Any] | None) -> Dict[str, Any]: +def _clean_csv_kwargs(read_csv_kwargs: Union[None, Mapping[str, Any]]) -> Dict[str, Any]: read_csv_kwargs = {} if read_csv_kwargs is None else dict(read_csv_kwargs) read_csv_kwargs.setdefault("sep", "\t") return read_csv_kwargs From 40c0414e3b9d7bc0884358757fead6643004ab52 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 7 Sep 2024 17:44:51 +0200 Subject: [PATCH 6/6] Update api.py --- src/pystow/api.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/pystow/api.py b/src/pystow/api.py index 291b4e0..44fee3a 100644 --- a/src/pystow/api.py +++ b/src/pystow/api.py @@ -5,15 +5,7 @@ import sqlite3 from contextlib import contextmanager from pathlib import Path -from typing import ( - TYPE_CHECKING, - Any, - Generator, - Mapping, - Optional, - Sequence, - Union, -) +from typing import TYPE_CHECKING, Any, Generator, Mapping, Optional, Sequence, Union from .constants import JSON, BytesOpener, Opener, Provider from .impl import Module