Merge branch 'main' into reader_options

zarr-developers · May 13, 2024 · b9c056a · b9c056a
2 parents 3a29b41 + 5d8d198
commit b9c056a
Show file tree

Hide file tree

Showing 12 changed files with 69 additions and 81 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -22,7 +22,7 @@ jobs:
         shell: bash -l {0}
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v4
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,10 +37,8 @@ repos:
           ]
   # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
   # - repo: https://github.com/asottile/pyupgrade
-  #   rev: v1.22.1
+  #   rev: v3.15.2
   #   hooks:
   #     - id: pyupgrade
   #       args:
-  #         - "--py3-only"
-  #         # remove on f-strings in Py3.7
-  #         - "--keep-percent-format"
+  #         - "--py310-plus"
diff --git a/ci/doc.yml b/ci/doc.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - python>=3.9
+  - python>=3.10
   - "sphinx"
   - pip
   - pip:

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,7 +14,6 @@ classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Operating System :: OS Independent",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",

diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import List, NewType, Optional, Tuple, Union, cast
+from typing import NewType, cast
 
 import ujson  # type: ignore
 import xarray as xr
@@ -40,10 +40,11 @@ class FileType(AutoName):
 
 def read_kerchunk_references_from_file(
     filepath: str,
-    filetype: Optional[FileType],
+    filepath: str, filetype: FileType | None
     reader_options: Optional[dict] = {
         "storage_options": {"key": "", "secret": "", "anon": True}
     },
+
 ) -> KerchunkStoreRefs:
     """
     Read a single legacy file and return kerchunk references to its contents.
@@ -166,7 +167,7 @@ def extract_array_refs(
 
 def parse_array_refs(
     arr_refs: KerchunkArrRefs,
-) -> Tuple[dict, ZArray, ZAttrs]:
+) -> tuple[dict, ZArray, ZAttrs]:
     zarray = ZArray.from_kerchunk_refs(arr_refs.pop(".zarray"))
     zattrs = arr_refs.pop(".zattrs", {})
     chunk_dict = arr_refs
@@ -228,7 +229,7 @@ def variable_to_kerchunk_arr_refs(var: xr.Variable) -> KerchunkArrRefs:
             f"Can only serialize wrapped arrays of type ManifestArray, but got type {type(marr)}"
         )
 
-    arr_refs: dict[str, Union[str, List[Union[str, int]]]] = {
+    arr_refs: dict[str, str | list[str | int]] = {
         str(chunk_key): chunk_entry.to_kerchunk()
         for chunk_key, chunk_entry in marr.manifest.entries.items()
     }

diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Any, Tuple, Union
+from typing import Any, Union
 
 import numpy as np
 
@@ -26,8 +26,8 @@ class ManifestArray:
 
     def __init__(
         self,
-        zarray: Union[ZArray, dict],
-        chunkmanifest: Union[dict, ChunkManifest],
+        zarray: ZArray | dict,
+        chunkmanifest: dict | ChunkManifest,
     ) -> None:
         """
         Create a ManifestArray directly from the .zarray information of a zarr array and the manifest of chunks.
@@ -80,7 +80,7 @@ def zarray(self) -> ZArray:
         return self._zarray
 
     @property
-    def chunks(self) -> Tuple[int, ...]:
+    def chunks(self) -> tuple[int, ...]:
         return tuple(self.zarray.chunks)
 
     @property

diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py
@@ -1,5 +1,6 @@
 import itertools
-from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Tuple, Union, cast
+from collections.abc import Callable, Iterable
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 
@@ -10,7 +11,7 @@
     from .array import ManifestArray
 
 
-MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS: Dict[
+MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS: dict[
     str, Callable
 ] = {}  # populated by the @implements decorators below
 
@@ -51,7 +52,7 @@ def _check_same_dtypes(dtypes: list[np.dtype]) -> None:
             )
 
 
-def _check_same_codecs(codecs: List[Codec]) -> None:
+def _check_same_codecs(codecs: list[Codec]) -> None:
     first_codec, *other_codecs = codecs
     for codec in other_codecs:
         if codec != first_codec:
@@ -62,7 +63,7 @@ def _check_same_codecs(codecs: List[Codec]) -> None:
             )
 
 
-def _check_same_chunk_shapes(chunks_list: List[Tuple[int, ...]]) -> None:
+def _check_same_chunk_shapes(chunks_list: list[tuple[int, ...]]) -> None:
     """Check all the chunk shapes are the same"""
 
     first_chunks, *other_chunks_list = chunks_list
@@ -77,7 +78,7 @@ def _check_same_chunk_shapes(chunks_list: List[Tuple[int, ...]]) -> None:
 @implements(np.result_type)
 def result_type(*arrays_and_dtypes) -> np.dtype:
     """Called by xarray to ensure all arguments to concat have the same dtype."""
-    first_dtype, *other_dtypes = [np.dtype(obj) for obj in arrays_and_dtypes]
+    first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes)
     for other_dtype in other_dtypes:
         if other_dtype != first_dtype:
             raise ValueError("dtypes not all consistent")
@@ -86,10 +87,10 @@ def result_type(*arrays_and_dtypes) -> np.dtype:
 
 @implements(np.concatenate)
 def concatenate(
-    arrays: Union[tuple["ManifestArray", ...], list["ManifestArray"]],
+    arrays: tuple["ManifestArray", ...] | list["ManifestArray"],
     /,
     *,
-    axis: Union[int, None] = 0,
+    axis: int | None = 0,
 ) -> "ManifestArray":
     """
     Concatenate ManifestArrays by merging their chunk manifests.
@@ -176,7 +177,7 @@ def _remove_element_at_position(t: tuple[int, ...], pos: int) -> tuple[int, ...]
 
 @implements(np.stack)
 def stack(
-    arrays: Union[tuple["ManifestArray", ...], list["ManifestArray"]],
+    arrays: tuple["ManifestArray", ...] | list["ManifestArray"],
     /,
     *,
     axis: int = 0,
@@ -234,7 +235,7 @@ def stack(
     return ManifestArray(chunkmanifest=stacked_manifest, zarray=new_zarray)
 
 
-def _check_same_shapes(shapes: List[Tuple[int, ...]]) -> None:
+def _check_same_shapes(shapes: list[tuple[int, ...]]) -> None:
     first_shape, *other_shapes = shapes
     for other_shape in other_shapes:
         if other_shape != first_shape:
@@ -251,7 +252,7 @@ def expand_dims(x: "ManifestArray", /, *, axis: int = 0) -> "ManifestArray":
 
 
 @implements(np.broadcast_to)
-def broadcast_to(x: "ManifestArray", /, shape: Tuple[int, ...]) -> "ManifestArray":
+def broadcast_to(x: "ManifestArray", /, shape: tuple[int, ...]) -> "ManifestArray":
     """
     Broadcasts an array to a specified shape, by either manipulating chunk keys or copying chunk manifest entries.
     """
@@ -328,7 +329,7 @@ def _broadcast_scalar(x: "ManifestArray", new_axis_length: int) -> "ManifestArra
 
 @implements(np.full_like)
 def full_like(
-    x: "ManifestArray", /, fill_value: bool, *, dtype: Union[np.dtype, None]
+    x: "ManifestArray", /, fill_value: bool, *, dtype: np.dtype | None
 ) -> np.ndarray:
     """
     Returns a new array filled with fill_value and having the same shape as an input array x.

diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py
@@ -1,7 +1,8 @@
 import itertools
 import json
 import re
-from typing import Any, Iterable, Iterator, List, Mapping, Tuple, Union, cast
+from collections.abc import Iterable, Iterator, Mapping
+from typing import Any, cast
 
 import numpy as np
 from pydantic import BaseModel, ConfigDict, field_validator
@@ -32,13 +33,11 @@ def __repr__(self) -> str:
         return f"ChunkEntry(path='{self.path}', offset={self.offset}, length={self.length})"
 
     @classmethod
-    def from_kerchunk(
-        cls, path_and_byte_range_info: List[Union[str, int]]
-    ) -> "ChunkEntry":
+    def from_kerchunk(cls, path_and_byte_range_info: list[str | int]) -> "ChunkEntry":
         path, offset, length = path_and_byte_range_info
         return ChunkEntry(path=path, offset=offset, length=length)
 
-    def to_kerchunk(self) -> List[Union[str, int]]:
+    def to_kerchunk(self) -> list[str | int]:
         """Write out in the format that kerchunk uses for chunk entries."""
         return [self.path, self.offset, self.length]
 
@@ -87,7 +86,7 @@ def ndim_chunk_grid(self) -> int:
         return get_ndim_from_key(list(self.entries.keys())[0])
 
     @property
-    def shape_chunk_grid(self) -> Tuple[int, ...]:
+    def shape_chunk_grid(self) -> tuple[int, ...]:
         """
         Number of separate chunks along each dimension.
 
@@ -107,14 +106,14 @@ def __iter__(self) -> Iterator[ChunkKey]:
     def __len__(self) -> int:
         return len(self.entries)
 
-    def dict(self) -> dict[str, dict[str, Union[str, int]]]:
+    def dict(self) -> dict[str, dict[str, str | int]]:
         """Converts the entire manifest to a nested dictionary."""
         return {k: dict(entry) for k, entry in self.entries.items()}
 
     @classmethod
     def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
         """Create a ChunkManifest from a Zarr manifest.json file."""
-        with open(filepath, "r") as manifest_file:
+        with open(filepath) as manifest_file:
             entries_dict = json.load(manifest_file)
 
         entries = {
@@ -135,7 +134,7 @@ def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
         return ChunkManifest(entries=chunkentries)
 
 
-def split(key: ChunkKey) -> List[int]:
+def split(key: ChunkKey) -> list[int]:
     return list(int(i) for i in key.split("."))
 
 
@@ -168,7 +167,7 @@ def validate_chunk_keys(chunk_keys: Iterable[ChunkKey]):
     check_keys_form_grid(chunk_keys)
 
 
-def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> Tuple[int, ...]:
+def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> tuple[int, ...]:
     # find max chunk index along each dimension
     zipped_indices = zip(*[split(key) for key in chunk_keys])
     chunk_grid_shape = tuple(
@@ -186,16 +185,16 @@ def check_keys_form_grid(chunk_keys: Iterable[ChunkKey]):
     all_possible_combos = itertools.product(
         *[range(length) for length in chunk_grid_shape]
     )
-    all_required_chunk_keys: set[ChunkKey] = set(
+    all_required_chunk_keys: set[ChunkKey] = {
         join(inds) for inds in all_possible_combos
-    )
+    }
 
     # check that every possible combination is represented once in the list of chunk keys
     if set(chunk_keys) != all_required_chunk_keys:
         raise ValueError("Chunk keys do not form a complete grid")
 
 
-def concat_manifests(manifests: List["ChunkManifest"], axis: int) -> "ChunkManifest":
+def concat_manifests(manifests: list["ChunkManifest"], axis: int) -> "ChunkManifest":
     """
     Concatenate manifests along an existing dimension.
 
@@ -216,7 +215,7 @@ def concat_manifests(manifests: List["ChunkManifest"], axis: int) -> "ChunkManif
         for manifest, offset in zip(manifests[1:], chunk_index_offsets)
     ]
     all_entries = [manifests[0].entries] + new_entries
-    merged_entries = dict((k, v) for d in all_entries for k, v in d.items())
+    merged_entries = {k: v for d in all_entries for k, v in d.items()}
 
     # Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests
     # Could use pydantic's model_construct classmethod to skip these checks
@@ -237,7 +236,7 @@ def offset_key(key: ChunkKey, axis: int, offset: int) -> ChunkKey:
     return {offset_key(k, axis, offset): v for k, v in entries.items()}
 
 
-def stack_manifests(manifests: List[ChunkManifest], axis: int) -> "ChunkManifest":
+def stack_manifests(manifests: list[ChunkManifest], axis: int) -> "ChunkManifest":
     """
     Stack manifests along a new dimension.
 
@@ -252,7 +251,7 @@ def stack_manifests(manifests: List[ChunkManifest], axis: int) -> "ChunkManifest
         insert_new_axis_into_chunk_keys(manifest.entries, axis, new_index_value)
         for manifest, new_index_value in zip(manifests, chunk_indexes_along_new_dim)
     ]
-    merged_entries = dict((k, v) for d in new_entries for k, v in d.items())
+    merged_entries = {k: v for d in new_entries for k, v in d.items()}
 
     # Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests
     # Could use pydantic's model_construct classmethod to skip these checks

diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py
@@ -113,7 +113,7 @@ def test_accessor_to_kerchunk_json(self, tmp_path):
 
         ds.virtualize.to_kerchunk(filepath, format="json")
 
-        with open(filepath, "r") as json_file:
+        with open(filepath) as json_file:
             loaded_refs = ujson.load(json_file)
 
         expected_ds_refs = {

diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py
@@ -1,4 +1,4 @@
-from typing import Mapping
+from collections.abc import Mapping
 
 import numpy as np
 import pytest