[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
zarr-developers · May 6, 2024 · 8703afe · 8703afe
1 parent 7031d81
commit 8703afe
Show file tree

Hide file tree

Showing 9 changed files with 97 additions and 25 deletions.
diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py
@@ -123,7 +123,9 @@ def find_var_names(ds_reference_dict: KerchunkStoreRefs) -> list[str]:
     return found_var_names
 
 
-def extract_array_refs(ds_reference_dict: KerchunkStoreRefs, var_name: str) -> KerchunkArrRefs:
+def extract_array_refs(
+    ds_reference_dict: KerchunkStoreRefs, var_name: str
+) -> KerchunkArrRefs:
     """Extract only the part of the kerchunk reference dict that is relevant to this one zarr array"""
 
     found_var_names = find_var_names(ds_reference_dict)
@@ -133,7 +135,9 @@ def extract_array_refs(ds_reference_dict: KerchunkStoreRefs, var_name: str) -> K
         # TODO these function probably have more loops in them than they need to...
 
         arr_refs = {
-            key.split("/")[1]: refs[key] for key in refs.keys() if var_name == key.split("/")[0]
+            key.split("/")[1]: refs[key]
+            for key in refs.keys()
+            if var_name == key.split("/")[0]
         }
 
         return fully_decode_arr_refs(arr_refs)
@@ -175,7 +179,9 @@ def dataset_to_kerchunk_refs(ds: xr.Dataset) -> KerchunkStoreRefs:
     for var_name, var in ds.variables.items():
         arr_refs = variable_to_kerchunk_arr_refs(var)
 
-        prepended_with_var_name = {f"{var_name}/{key}": val for key, val in arr_refs.items()}
+        prepended_with_var_name = {
+            f"{var_name}/{key}": val for key, val in arr_refs.items()
+        }
 
         all_arr_refs.update(prepended_with_var_name)
 

diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py
@@ -71,7 +71,9 @@ def _from_kerchunk_refs(cls, arr_refs: KerchunkArrRefs) -> "ManifestArray":
 
         zarray = ZArray.from_kerchunk_refs(decoded_arr_refs[".zarray"])
 
-        kerchunk_chunk_dict = {k: v for k, v in decoded_arr_refs.items() if re.match(_CHUNK_KEY, k)}
+        kerchunk_chunk_dict = {
+            k: v for k, v in decoded_arr_refs.items() if re.match(_CHUNK_KEY, k)
+        }
         chunkmanifest = ChunkManifest._from_kerchunk_chunk_dict(kerchunk_chunk_dict)
 
         obj = object.__new__(cls)
@@ -204,7 +206,9 @@ def __getitem__(
         indexer = _possibly_expand_trailing_ellipsis(key, self.ndim)
 
         if len(indexer) != self.ndim:
-            raise ValueError(f"Invalid indexer for array with ndim={self.ndim}: {indexer}")
+            raise ValueError(
+                f"Invalid indexer for array with ndim={self.ndim}: {indexer}"
+            )
 
         if all(
             isinstance(axis_indexer, slice) and axis_indexer == slice(None)

diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py
@@ -154,12 +154,16 @@ def _check_same_ndims(ndims: list[int]) -> None:
 
 def _check_same_shapes_except_on_concat_axis(shapes: list[tuple[int, ...]], axis: int):
     """Check that shapes are compatible for concatenation"""
-    shapes_without_concat_axis = [_remove_element_at_position(shape, axis) for shape in shapes]
+    shapes_without_concat_axis = [
+        _remove_element_at_position(shape, axis) for shape in shapes
+    ]
 
     first_shape, *other_shapes = shapes_without_concat_axis
     for other_shape in other_shapes:
         if other_shape != first_shape:
-            raise ValueError(f"Cannot concatenate arrays with shapes {[shape for shape in shapes]}")
+            raise ValueError(
+                f"Cannot concatenate arrays with shapes {[shape for shape in shapes]}"
+            )
 
 
 def _remove_element_at_position(t: tuple[int, ...], pos: int) -> tuple[int, ...]:
@@ -269,7 +273,9 @@ def broadcast_to(x: "ManifestArray", /, shape: Tuple[int, ...]) -> "ManifestArra
             # concatenate same array upon itself d_requested number of times along existing axis
             result = concatenate([result] * d_requested, axis=axis)
         else:
-            raise ValueError(f"Array with shape {x.shape} cannot be broadcast to shape {shape}")
+            raise ValueError(
+                f"Array with shape {x.shape} cannot be broadcast to shape {shape}"
+            )
 
     return result
 

diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py
@@ -8,7 +8,9 @@
 
 from ..types import ChunkKey
 
-_INTEGER = r"([1-9]+\d*|0)"  # matches 0 or an unsigned integer that does not begin with zero
+_INTEGER = (
+    r"([1-9]+\d*|0)"  # matches 0 or an unsigned integer that does not begin with zero
+)
 _SEPARATOR = r"\."
 _CHUNK_KEY = rf"^{_INTEGER}+({_SEPARATOR}{_INTEGER})*$"  # matches 1 integer, optionally followed by more integers each separated by a separator (i.e. a period)
 
@@ -30,7 +32,9 @@ def __repr__(self) -> str:
         return f"ChunkEntry(path='{self.path}', offset={self.offset}, length={self.length})"
 
     @classmethod
-    def from_kerchunk(cls, path_and_byte_range_info: List[Union[str, int]]) -> "ChunkEntry":
+    def from_kerchunk(
+        cls, path_and_byte_range_info: List[Union[str, int]]
+    ) -> "ChunkEntry":
         path, offset, length = path_and_byte_range_info
         return ChunkEntry(path=path, offset=offset, length=length)
 
@@ -113,7 +117,9 @@ def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
         with open(filepath, "r") as manifest_file:
             entries_dict = json.load(manifest_file)
 
-        entries = {cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()}
+        entries = {
+            cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()
+        }
         return cls(entries=entries)
 
     def to_zarr_json(self, filepath: str) -> None:
@@ -123,7 +129,9 @@ def to_zarr_json(self, filepath: str) -> None:
 
     @classmethod
     def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
-        chunkentries = {k: ChunkEntry.from_kerchunk(v) for k, v in kerchunk_chunk_dict.items()}
+        chunkentries = {
+            k: ChunkEntry.from_kerchunk(v) for k, v in kerchunk_chunk_dict.items()
+        }
         return ChunkManifest(entries=chunkentries)
 
 
@@ -175,8 +183,12 @@ def check_keys_form_grid(chunk_keys: Iterable[ChunkKey]):
     chunk_grid_shape = get_chunk_grid_shape(chunk_keys)
 
     # create every possible combination
-    all_possible_combos = itertools.product(*[range(length) for length in chunk_grid_shape])
-    all_required_chunk_keys: set[ChunkKey] = set(join(inds) for inds in all_possible_combos)
+    all_possible_combos = itertools.product(
+        *[range(length) for length in chunk_grid_shape]
+    )
+    all_required_chunk_keys: set[ChunkKey] = set(
+        join(inds) for inds in all_possible_combos
+    )
 
     # check that every possible combination is represented once in the list of chunk keys
     if set(chunk_keys) != all_required_chunk_keys:

diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py
@@ -43,7 +43,9 @@ def test_dataset_from_df_refs():
     assert da.data.zarray.fill_value is None
     assert da.data.zarray.order == "C"
 
-    assert da.data.manifest.dict() == {"0.0": {"path": "test1.nc", "offset": 6144, "length": 48}}
+    assert da.data.manifest.dict() == {
+        "0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
+    }
 
 
 def test_dataset_from_df_refs_with_filters():

diff --git a/virtualizarr/tests/test_zarr.py b/virtualizarr/tests/test_zarr.py
@@ -22,6 +22,8 @@ def test_zarr_v3_roundtrip(tmpdir):
     original = xr.Dataset({"a": (["x", "y"], arr)}, attrs={"something": 0})
 
     original.virtualize.to_zarr(tmpdir / "store.zarr")
-    roundtrip = open_virtual_dataset(tmpdir / "store.zarr", filetype="zarr_v3", indexes={})
+    roundtrip = open_virtual_dataset(
+        tmpdir / "store.zarr", filetype="zarr_v3", indexes={}
+    )
 
     xrt.assert_identical(roundtrip, original)
diff --git a/virtualizarr/vendor/zarr/utils.py b/virtualizarr/vendor/zarr/utils.py
@@ -18,5 +18,10 @@ def default(self, o):
 def json_dumps(o: Any) -> bytes:
     """Write JSON in a consistent, human-readable way."""
     return json.dumps(
-        o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder
+        o,
+        indent=4,
+        sort_keys=True,
+        ensure_ascii=True,
+        separators=(",", ": "),
+        cls=NumberEncoder,
     ).encode("ascii")
diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py
@@ -1,5 +1,14 @@
 from pathlib import Path
-from typing import List, Literal, Mapping, Optional, Union, overload, MutableMapping, Iterable
+from typing import (
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Union,
+    overload,
+    MutableMapping,
+    Iterable,
+)
 
 import ujson  # type: ignore
 import xarray as xr
@@ -11,7 +20,11 @@
 import virtualizarr.kerchunk as kerchunk
 from virtualizarr.kerchunk import KerchunkStoreRefs, FileType
 from virtualizarr.manifests import ChunkManifest, ManifestArray
-from virtualizarr.zarr import dataset_to_zarr, attrs_from_zarr_group_json, metadata_from_zarr_json
+from virtualizarr.zarr import (
+    dataset_to_zarr,
+    attrs_from_zarr_group_json,
+    metadata_from_zarr_json,
+)
 
 
 class ManifestBackendArray(ManifestArray, BackendArray):
@@ -116,7 +129,9 @@ def open_virtual_dataset(
                 indexes = dict(**indexes)  # for type hinting: to allow mutation
 
             loadable_vars = {
-                name: var for name, var in ds.variables.items() if name in loadable_variables
+                name: var
+                for name, var in ds.variables.items()
+                if name in loadable_variables
             }
 
             # if we only read the indexes we can just close the file right away as nothing is lazy
@@ -211,7 +226,9 @@ def virtual_vars_from_kerchunk_refs(
     var_names = kerchunk.find_var_names(refs)
     if drop_variables is None:
         drop_variables = []
-    var_names_to_keep = [var_name for var_name in var_names if var_name not in drop_variables]
+    var_names_to_keep = [
+        var_name for var_name in var_names if var_name not in drop_variables
+    ]
 
     vars = {
         var_name: variable_from_kerchunk_refs(refs, var_name, virtual_array_class)
@@ -337,7 +354,9 @@ def to_zarr(self, storepath: str) -> None:
         dataset_to_zarr(self.ds, storepath)
 
     @overload
-    def to_kerchunk(self, filepath: None, format: Literal["dict"]) -> KerchunkStoreRefs: ...
+    def to_kerchunk(
+        self, filepath: None, format: Literal["dict"]
+    ) -> KerchunkStoreRefs: ...
 
     @overload
     def to_kerchunk(self, filepath: str, format: Literal["json"]) -> None: ...

diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py
@@ -1,5 +1,15 @@
 from pathlib import Path
-from typing import Any, Literal, NewType, Optional, Tuple, Union, List, Dict, TYPE_CHECKING
+from typing import (
+    Any,
+    Literal,
+    NewType,
+    Optional,
+    Tuple,
+    Union,
+    List,
+    Dict,
+    TYPE_CHECKING,
+)
 import json
 
 import numpy as np
@@ -193,14 +203,20 @@ def zarr_v3_array_metadata(zarray: ZArray, dim_names: List[str], attrs: dict) ->
         "name": "regular",
         "configuration": {"chunk_shape": metadata.pop("chunks")},
     }
-    metadata["chunk_key_encoding"] = {"name": "default", "configuration": {"separator": "/"}}
+    metadata["chunk_key_encoding"] = {
+        "name": "default",
+        "configuration": {"separator": "/"},
+    }
     metadata["codecs"] = metadata.pop("filters")
     metadata.pop("compressor")  # TODO this should be entered in codecs somehow
     metadata.pop("order")  # TODO this should be replaced by a transpose codec
 
     # indicate that we're using the manifest storage transformer ZEP
     metadata["storage_transformers"] = [
-        {"name": "chunk-manifest-json", "configuration": {"manifest": "./manifest.json"}}
+        {
+            "name": "chunk-manifest-json",
+            "configuration": {"manifest": "./manifest.json"},
+        }
     ]
 
     # add information from xarray object