diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2a40e646..3ca9cf32 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,7 +22,7 @@ jobs: shell: bash -l {0} strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a1af21fa..0ad42e7b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,10 +37,8 @@ repos: ] # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194 # - repo: https://github.com/asottile/pyupgrade - # rev: v1.22.1 + # rev: v3.15.2 # hooks: # - id: pyupgrade # args: - # - "--py3-only" - # # remove on f-strings in Py3.7 - # - "--keep-percent-format" + # - "--py310-plus" diff --git a/ci/doc.yml b/ci/doc.yml index 1fffa7ee..7d7e9224 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -3,7 +3,7 @@ channels: - conda-forge - nodefaults dependencies: - - python>=3.9 + - python>=3.10 - "sphinx" - pip - pip: diff --git a/pyproject.toml b/pyproject.toml index 074bbd7b..423574df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py index 07902fd3..e9f1b2a7 100644 --- a/virtualizarr/kerchunk.py +++ b/virtualizarr/kerchunk.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, NewType, Optional, Tuple, Union, cast +from typing import NewType, cast import ujson # type: ignore import xarray as xr @@ -40,10 +40,11 @@ class FileType(AutoName): def read_kerchunk_references_from_file( filepath: str, - filetype: Optional[FileType], + filepath: str, filetype: FileType | None reader_options: Optional[dict] = { "storage_options": {"key": "", "secret": "", "anon": True} }, + ) -> KerchunkStoreRefs: """ Read a single legacy file and return kerchunk references to its contents. @@ -166,7 +167,7 @@ def extract_array_refs( def parse_array_refs( arr_refs: KerchunkArrRefs, -) -> Tuple[dict, ZArray, ZAttrs]: +) -> tuple[dict, ZArray, ZAttrs]: zarray = ZArray.from_kerchunk_refs(arr_refs.pop(".zarray")) zattrs = arr_refs.pop(".zattrs", {}) chunk_dict = arr_refs @@ -228,7 +229,7 @@ def variable_to_kerchunk_arr_refs(var: xr.Variable) -> KerchunkArrRefs: f"Can only serialize wrapped arrays of type ManifestArray, but got type {type(marr)}" ) - arr_refs: dict[str, Union[str, List[Union[str, int]]]] = { + arr_refs: dict[str, str | list[str | int]] = { str(chunk_key): chunk_entry.to_kerchunk() for chunk_key, chunk_entry in marr.manifest.entries.items() } diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index 423610cb..cfc15cca 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -1,5 +1,5 @@ import warnings -from typing import Any, Tuple, Union +from typing import Any, Union import numpy as np @@ -26,8 +26,8 @@ class ManifestArray: def __init__( self, - zarray: Union[ZArray, dict], - chunkmanifest: Union[dict, ChunkManifest], + zarray: ZArray | dict, + chunkmanifest: dict | ChunkManifest, ) -> None: """ Create a ManifestArray directly from the .zarray information of a zarr array and the manifest of chunks. @@ -80,7 +80,7 @@ def zarray(self) -> ZArray: return self._zarray @property - def chunks(self) -> Tuple[int, ...]: + def chunks(self) -> tuple[int, ...]: return tuple(self.zarray.chunks) @property diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py index e5a4a030..8dac514e 100644 --- a/virtualizarr/manifests/array_api.py +++ b/virtualizarr/manifests/array_api.py @@ -1,5 +1,6 @@ import itertools -from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Tuple, Union, cast +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING, cast import numpy as np @@ -10,7 +11,7 @@ from .array import ManifestArray -MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS: Dict[ +MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS: dict[ str, Callable ] = {} # populated by the @implements decorators below @@ -51,7 +52,7 @@ def _check_same_dtypes(dtypes: list[np.dtype]) -> None: ) -def _check_same_codecs(codecs: List[Codec]) -> None: +def _check_same_codecs(codecs: list[Codec]) -> None: first_codec, *other_codecs = codecs for codec in other_codecs: if codec != first_codec: @@ -62,7 +63,7 @@ def _check_same_codecs(codecs: List[Codec]) -> None: ) -def _check_same_chunk_shapes(chunks_list: List[Tuple[int, ...]]) -> None: +def _check_same_chunk_shapes(chunks_list: list[tuple[int, ...]]) -> None: """Check all the chunk shapes are the same""" first_chunks, *other_chunks_list = chunks_list @@ -77,7 +78,7 @@ def _check_same_chunk_shapes(chunks_list: List[Tuple[int, ...]]) -> None: @implements(np.result_type) def result_type(*arrays_and_dtypes) -> np.dtype: """Called by xarray to ensure all arguments to concat have the same dtype.""" - first_dtype, *other_dtypes = [np.dtype(obj) for obj in arrays_and_dtypes] + first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes) for other_dtype in other_dtypes: if other_dtype != first_dtype: raise ValueError("dtypes not all consistent") @@ -86,10 +87,10 @@ def result_type(*arrays_and_dtypes) -> np.dtype: @implements(np.concatenate) def concatenate( - arrays: Union[tuple["ManifestArray", ...], list["ManifestArray"]], + arrays: tuple["ManifestArray", ...] | list["ManifestArray"], /, *, - axis: Union[int, None] = 0, + axis: int | None = 0, ) -> "ManifestArray": """ Concatenate ManifestArrays by merging their chunk manifests. @@ -176,7 +177,7 @@ def _remove_element_at_position(t: tuple[int, ...], pos: int) -> tuple[int, ...] @implements(np.stack) def stack( - arrays: Union[tuple["ManifestArray", ...], list["ManifestArray"]], + arrays: tuple["ManifestArray", ...] | list["ManifestArray"], /, *, axis: int = 0, @@ -234,7 +235,7 @@ def stack( return ManifestArray(chunkmanifest=stacked_manifest, zarray=new_zarray) -def _check_same_shapes(shapes: List[Tuple[int, ...]]) -> None: +def _check_same_shapes(shapes: list[tuple[int, ...]]) -> None: first_shape, *other_shapes = shapes for other_shape in other_shapes: if other_shape != first_shape: @@ -251,7 +252,7 @@ def expand_dims(x: "ManifestArray", /, *, axis: int = 0) -> "ManifestArray": @implements(np.broadcast_to) -def broadcast_to(x: "ManifestArray", /, shape: Tuple[int, ...]) -> "ManifestArray": +def broadcast_to(x: "ManifestArray", /, shape: tuple[int, ...]) -> "ManifestArray": """ Broadcasts an array to a specified shape, by either manipulating chunk keys or copying chunk manifest entries. """ @@ -328,7 +329,7 @@ def _broadcast_scalar(x: "ManifestArray", new_axis_length: int) -> "ManifestArra @implements(np.full_like) def full_like( - x: "ManifestArray", /, fill_value: bool, *, dtype: Union[np.dtype, None] + x: "ManifestArray", /, fill_value: bool, *, dtype: np.dtype | None ) -> np.ndarray: """ Returns a new array filled with fill_value and having the same shape as an input array x. diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py index 0c578c84..c0a95c67 100644 --- a/virtualizarr/manifests/manifest.py +++ b/virtualizarr/manifests/manifest.py @@ -1,7 +1,8 @@ import itertools import json import re -from typing import Any, Iterable, Iterator, List, Mapping, Tuple, Union, cast +from collections.abc import Iterable, Iterator, Mapping +from typing import Any, cast import numpy as np from pydantic import BaseModel, ConfigDict, field_validator @@ -32,13 +33,11 @@ def __repr__(self) -> str: return f"ChunkEntry(path='{self.path}', offset={self.offset}, length={self.length})" @classmethod - def from_kerchunk( - cls, path_and_byte_range_info: List[Union[str, int]] - ) -> "ChunkEntry": + def from_kerchunk(cls, path_and_byte_range_info: list[str | int]) -> "ChunkEntry": path, offset, length = path_and_byte_range_info return ChunkEntry(path=path, offset=offset, length=length) - def to_kerchunk(self) -> List[Union[str, int]]: + def to_kerchunk(self) -> list[str | int]: """Write out in the format that kerchunk uses for chunk entries.""" return [self.path, self.offset, self.length] @@ -87,7 +86,7 @@ def ndim_chunk_grid(self) -> int: return get_ndim_from_key(list(self.entries.keys())[0]) @property - def shape_chunk_grid(self) -> Tuple[int, ...]: + def shape_chunk_grid(self) -> tuple[int, ...]: """ Number of separate chunks along each dimension. @@ -107,14 +106,14 @@ def __iter__(self) -> Iterator[ChunkKey]: def __len__(self) -> int: return len(self.entries) - def dict(self) -> dict[str, dict[str, Union[str, int]]]: + def dict(self) -> dict[str, dict[str, str | int]]: """Converts the entire manifest to a nested dictionary.""" return {k: dict(entry) for k, entry in self.entries.items()} @classmethod def from_zarr_json(cls, filepath: str) -> "ChunkManifest": """Create a ChunkManifest from a Zarr manifest.json file.""" - with open(filepath, "r") as manifest_file: + with open(filepath) as manifest_file: entries_dict = json.load(manifest_file) entries = { @@ -135,7 +134,7 @@ def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest": return ChunkManifest(entries=chunkentries) -def split(key: ChunkKey) -> List[int]: +def split(key: ChunkKey) -> list[int]: return list(int(i) for i in key.split(".")) @@ -168,7 +167,7 @@ def validate_chunk_keys(chunk_keys: Iterable[ChunkKey]): check_keys_form_grid(chunk_keys) -def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> Tuple[int, ...]: +def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> tuple[int, ...]: # find max chunk index along each dimension zipped_indices = zip(*[split(key) for key in chunk_keys]) chunk_grid_shape = tuple( @@ -186,16 +185,16 @@ def check_keys_form_grid(chunk_keys: Iterable[ChunkKey]): all_possible_combos = itertools.product( *[range(length) for length in chunk_grid_shape] ) - all_required_chunk_keys: set[ChunkKey] = set( + all_required_chunk_keys: set[ChunkKey] = { join(inds) for inds in all_possible_combos - ) + } # check that every possible combination is represented once in the list of chunk keys if set(chunk_keys) != all_required_chunk_keys: raise ValueError("Chunk keys do not form a complete grid") -def concat_manifests(manifests: List["ChunkManifest"], axis: int) -> "ChunkManifest": +def concat_manifests(manifests: list["ChunkManifest"], axis: int) -> "ChunkManifest": """ Concatenate manifests along an existing dimension. @@ -216,7 +215,7 @@ def concat_manifests(manifests: List["ChunkManifest"], axis: int) -> "ChunkManif for manifest, offset in zip(manifests[1:], chunk_index_offsets) ] all_entries = [manifests[0].entries] + new_entries - merged_entries = dict((k, v) for d in all_entries for k, v in d.items()) + merged_entries = {k: v for d in all_entries for k, v in d.items()} # Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests # Could use pydantic's model_construct classmethod to skip these checks @@ -237,7 +236,7 @@ def offset_key(key: ChunkKey, axis: int, offset: int) -> ChunkKey: return {offset_key(k, axis, offset): v for k, v in entries.items()} -def stack_manifests(manifests: List[ChunkManifest], axis: int) -> "ChunkManifest": +def stack_manifests(manifests: list[ChunkManifest], axis: int) -> "ChunkManifest": """ Stack manifests along a new dimension. @@ -252,7 +251,7 @@ def stack_manifests(manifests: List[ChunkManifest], axis: int) -> "ChunkManifest insert_new_axis_into_chunk_keys(manifest.entries, axis, new_index_value) for manifest, new_index_value in zip(manifests, chunk_indexes_along_new_dim) ] - merged_entries = dict((k, v) for d in new_entries for k, v in d.items()) + merged_entries = {k: v for d in new_entries for k, v in d.items()} # Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests # Could use pydantic's model_construct classmethod to skip these checks diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py index 0df2d535..d4820971 100644 --- a/virtualizarr/tests/test_kerchunk.py +++ b/virtualizarr/tests/test_kerchunk.py @@ -113,7 +113,7 @@ def test_accessor_to_kerchunk_json(self, tmp_path): ds.virtualize.to_kerchunk(filepath, format="json") - with open(filepath, "r") as json_file: + with open(filepath) as json_file: loaded_refs = ujson.load(json_file) expected_ds_refs = { diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 5e815590..d145550e 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -1,4 +1,4 @@ -from typing import Mapping +from collections.abc import Mapping import numpy as np import pytest diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index 70c43974..7e498552 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -1,12 +1,7 @@ +from collections.abc import Iterable, Mapping, MutableMapping from pathlib import Path from typing import ( - Iterable, - List, Literal, - Mapping, - MutableMapping, - Optional, - Union, overload, ) @@ -36,10 +31,10 @@ class ManifestBackendArray(ManifestArray, BackendArray): def open_virtual_dataset( filepath: str, - filetype: Optional[FileType] = None, - drop_variables: Optional[Iterable[str]] = None, - loadable_variables: Optional[Iterable[str]] = None, - indexes: Optional[Mapping[str, Index]] = None, + filetype: FileType | None = None, + drop_variables: Iterable[str] | None = None, + loadable_variables: Iterable[str] | None = None, + indexes: Mapping[str, Index] | None = None, virtual_array_class=ManifestArray, reader_options: Optional[dict] = { "storage_options": {"key": "", "secret": "", "anon": True} @@ -170,8 +165,8 @@ def open_virtual_dataset( def open_virtual_dataset_from_v3_store( storepath: str, - drop_variables: List[str], - indexes: Optional[Mapping[str, Index]], + drop_variables: list[str], + indexes: Mapping[str, Index] | None, ) -> xr.Dataset: """ Read a Zarr v3 store and return an xarray Dataset containing virtualized arrays. @@ -221,7 +216,7 @@ def open_virtual_dataset_from_v3_store( def virtual_vars_from_kerchunk_refs( refs: KerchunkStoreRefs, - drop_variables: Optional[List[str]] = None, + drop_variables: list[str] | None = None, virtual_array_class=ManifestArray, ) -> Mapping[str, xr.Variable]: """ @@ -251,9 +246,9 @@ def virtual_vars_from_kerchunk_refs( def dataset_from_kerchunk_refs( refs: KerchunkStoreRefs, - drop_variables: List[str] = [], + drop_variables: list[str] = [], virtual_array_class: type = ManifestArray, - indexes: Optional[MutableMapping[str, Index]] = None, + indexes: MutableMapping[str, Index] | None = None, ) -> xr.Dataset: """ Translate a store-level kerchunk reference dict into an xarray Dataset containing virtualized arrays. @@ -310,7 +305,7 @@ def separate_coords( """ # this would normally come from CF decoding, let's hope the fact we're skipping that doesn't cause any problems... - coord_names: List[str] = [] + coord_names: list[str] = [] # split data and coordinate variables (promote dimension coordinates) data_vars = {} @@ -377,9 +372,9 @@ def to_kerchunk(self, filepath: str, format: Literal["parquet"]) -> None: ... def to_kerchunk( self, - filepath: Optional[str] = None, - format: Union[Literal["dict"], Literal["json"], Literal["parquet"]] = "dict", - ) -> Union[KerchunkStoreRefs, None]: + filepath: str | None = None, + format: Literal["dict", "json", "parquet"] = "dict", + ) -> KerchunkStoreRefs | None: """ Serialize all virtualized arrays in this xarray dataset into the kerchunk references format. diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py index e20561e6..9a9a2067 100644 --- a/virtualizarr/zarr.py +++ b/virtualizarr/zarr.py @@ -3,13 +3,8 @@ from typing import ( TYPE_CHECKING, Any, - Dict, - List, Literal, NewType, - Optional, - Tuple, - Union, ) import numpy as np @@ -29,8 +24,8 @@ class Codec(BaseModel): - compressor: Optional[str] = None - filters: Optional[List[Dict]] = None + compressor: str | None = None + filters: list[dict] | None = None def __repr__(self) -> str: return f"Codec(compressor={self.compressor}, filters={self.filters})" @@ -45,14 +40,14 @@ class ZArray(BaseModel): arbitrary_types_allowed=True, # only here so pydantic doesn't complain about the numpy dtype field ) - chunks: Tuple[int, ...] - compressor: Optional[str] = None + chunks: tuple[int, ...] + compressor: str | None = None dtype: np.dtype - fill_value: Optional[float] = None # float or int? - filters: Optional[List[Dict]] = None - order: Union[Literal["C"], Literal["F"]] - shape: Tuple[int, ...] - zarr_format: Union[Literal[2], Literal[3]] = 2 + fill_value: float | None = None # float or int? + filters: list[dict] | None = None + order: Literal["C", "F"] + shape: tuple[int, ...] + zarr_format: Literal[2, 3] = 2 @field_validator("dtype") @classmethod @@ -181,7 +176,7 @@ def to_zarr_json(var: xr.Variable, array_dir: Path) -> None: metadata_file.write(json_dumps(metadata)) -def zarr_v3_array_metadata(zarray: ZArray, dim_names: List[str], attrs: dict) -> dict: +def zarr_v3_array_metadata(zarray: ZArray, dim_names: list[str], attrs: dict) -> dict: """Construct a v3-compliant metadata dict from v2 zarray + information stored on the xarray variable.""" # TODO it would be nice if we could use the zarr-python metadata.ArrayMetadata classes to do this conversion for us @@ -219,13 +214,13 @@ def zarr_v3_array_metadata(zarray: ZArray, dim_names: List[str], attrs: dict) -> def attrs_from_zarr_group_json(filepath: Path) -> dict: - with open(filepath, "r") as metadata_file: + with open(filepath) as metadata_file: attrs = json.load(metadata_file) return attrs["attributes"] -def metadata_from_zarr_json(filepath: Path) -> Tuple[ZArray, List[str], dict]: - with open(filepath, "r") as metadata_file: +def metadata_from_zarr_json(filepath: Path) -> tuple[ZArray, list[str], dict]: + with open(filepath) as metadata_file: metadata = json.load(metadata_file) if {