Merge branch 'main' into 0D-scalar

zarr-developers · May 7, 2024 · 299b553 · 299b553
2 parents d0ea94f + a3dab6c
commit 299b553
Show file tree

Hide file tree

Showing 13 changed files with 148 additions and 104 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,24 +3,24 @@ ci:
   autoupdate_schedule: monthly
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-yaml
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.3.5"
+    # Ruff version.
+    rev: "v0.4.3"
     hooks:
+      # Run the linter.
       - id: ruff
-        args: ["--fix"]
-  # - repo: https://github.com/Carreau/velin
-  #   rev: 0.0.8
-  #   hooks:
-  #     - id: velin
-  #       args: ["--write", "--compact"]
+        args: [ --fix ]
+      # Run the formatter.
+      - id: ruff-format
+
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.9.0
+    rev: v1.10.0
     hooks:
       - id: mypy
         # Copied from setup.cfg

diff --git a/pyproject.toml b/pyproject.toml
@@ -75,24 +75,21 @@ datatree = ["py.typed"]
 files = "virtualizarr/**/*.py"
 show_error_codes = true
 
-
-
-
-
 [tool.ruff]
-line-length = 100
+# Same as Black.
+line-length = 88
+indent-width = 4
 target-version = "py39"
 
 exclude = [
     "docs",
     ".eggs"]
 
-
 [tool.ruff.lint]
 # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
 # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
 # McCabe complexity (`C901`) by default.
-select = ["E4", "E7", "E9", "F"]
+select = ["E4", "E7", "E9", "F", "I"]
 per-file-ignores = {}
 
 # E402: module level import not at top of file
@@ -101,7 +98,13 @@ per-file-ignores = {}
 
 ignore = ["E402", "E731"]
 
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+
 [tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
 # Indent with spaces, rather than tabs.
 indent-style = "space"
 # Respect magic trailing commas.

diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py
@@ -20,12 +20,14 @@
 
 from enum import Enum, auto
 
+
 class AutoName(Enum):
     # Recommended by official Python docs for auto naming:
     # https://docs.python.org/3/library/enum.html#using-automatic-values
     def _generate_next_value_(name, start, count, last_values):
         return name
 
+
 class FileType(AutoName):
     netcdf3 = auto()
     netcdf4 = auto()
@@ -34,6 +36,7 @@ class FileType(AutoName):
     fits = auto()
     zarr = auto()
 
+
 def read_kerchunk_references_from_file(
     filepath: str, filetype: Optional[FileType]
 ) -> KerchunkStoreRefs:
@@ -57,6 +60,7 @@ def read_kerchunk_references_from_file(
 
     if filetype.name.lower() == "netcdf3":
         from kerchunk.netCDF3 import NetCDF3ToZarr
+
         refs = NetCDF3ToZarr(filepath, inline_threshold=0).translate()
 
     elif filetype.name.lower() == "netcdf4":
@@ -87,7 +91,7 @@ def _automatically_determine_filetype(filepath: str) -> FileType:
 
     if file_extension == ".nc":
         # based off of: https://github.com/TomNicholas/VirtualiZarr/pull/43#discussion_r1543415167
-        with open(filepath, 'rb') as f:
+        with open(filepath, "rb") as f:
             magic = f.read()
         if magic[0:3] == b"CDF":
             filetype = FileType.netcdf3

diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py
@@ -117,7 +117,9 @@ def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
         with open(filepath, "r") as manifest_file:
             entries_dict = json.load(manifest_file)
 
-        entries = {cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()}
+        entries = {
+            cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()
+        }
         return cls(entries=entries)
 
     def to_zarr_json(self, filepath: str) -> None:

diff --git a/virtualizarr/tests/conftest.py b/virtualizarr/tests/conftest.py
@@ -1,6 +1,7 @@
 import pytest
 import xarray as xr
 
+
 @pytest.fixture
 def netcdf4_file(tmpdir):
     # Set up example xarray dataset

diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py
@@ -1,19 +1,19 @@
 import numpy as np
+import pytest
 import ujson  # type: ignore
 import xarray as xr
 import xarray.testing as xrt
-import pytest
-
 
-from virtualizarr.kerchunk import _automatically_determine_filetype, FileType
+from virtualizarr.kerchunk import FileType, _automatically_determine_filetype
 from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
 from virtualizarr.xarray import dataset_from_kerchunk_refs
 
+
 def gen_ds_refs(
-        zgroup: str = '{"zarr_format":2}',
-        zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
-        zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
-        chunk: list = ["test1.nc", 6144, 48],
+    zgroup: str = '{"zarr_format":2}',
+    zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
+    zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
+    chunk: list = ["test1.nc", 6144, 48],
 ):
     return {
         "version": 1,
@@ -25,9 +25,10 @@ def gen_ds_refs(
         },
     }
 
+
 def test_dataset_from_df_refs():
     ds_refs = gen_ds_refs()
-    ds =  dataset_from_kerchunk_refs(ds_refs)
+    ds = dataset_from_kerchunk_refs(ds_refs)
     assert "a" in ds
     da = ds["a"]
     assert isinstance(da.data, ManifestArray)
@@ -45,11 +46,21 @@ def test_dataset_from_df_refs():
         "0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
     }
 
+
 def test_dataset_from_df_refs_with_filters():
-    filters = [{"elementsize":4,"id":"shuffle"},{"id":"zlib","level":4}]
-    zarray = {"chunks":[2,3],"compressor":None,"dtype":"<i8","fill_value":None,"filters":filters,"order":"C","shape":[2,3],"zarr_format":2}
+    filters = [{"elementsize": 4, "id": "shuffle"}, {"id": "zlib", "level": 4}]
+    zarray = {
+        "chunks": [2, 3],
+        "compressor": None,
+        "dtype": "<i8",
+        "fill_value": None,
+        "filters": filters,
+        "order": "C",
+        "shape": [2, 3],
+        "zarr_format": 2,
+    }
     ds_refs = gen_ds_refs(zarray=ujson.dumps(zarray))
-    ds =  dataset_from_kerchunk_refs(ds_refs)
+    ds = dataset_from_kerchunk_refs(ds_refs)
     da = ds["a"]
     assert da.data.zarray.filters == filters
 
@@ -163,15 +174,13 @@ def test_automatically_determine_filetype_netcdf3_netcdf4():
     assert FileType("netcdf4") == _automatically_determine_filetype(netcdf4_file_path)
 
 
-
-
 def test_FileType():
     # tests if FileType converts user supplied strings to correct filetype
-    assert 'netcdf3' == FileType("netcdf3").name
-    assert 'netcdf4' == FileType("netcdf4").name
-    assert 'grib' == FileType("grib").name
-    assert 'tiff' == FileType("tiff").name
-    assert 'fits' == FileType("fits").name
-    assert 'zarr' == FileType("zarr").name
+    assert "netcdf3" == FileType("netcdf3").name
+    assert "netcdf4" == FileType("netcdf4").name
+    assert "grib" == FileType("grib").name
+    assert "tiff" == FileType("tiff").name
+    assert "fits" == FileType("fits").name
+    assert "zarr" == FileType("zarr").name
     with pytest.raises(ValueError):
         FileType(None)
diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py
@@ -119,8 +119,7 @@ def test_not_equal_chunk_entries(self):
         assert not (marr1 == marr2).all()
 
     @pytest.mark.skip(reason="Not Implemented")
-    def test_partly_equals(self):
-        ...
+    def test_partly_equals(self): ...
 
 
 # TODO we really need some kind of fixtures to generate useful example data

diff --git a/virtualizarr/tests/test_manifests/test_manifest.py b/virtualizarr/tests/test_manifests/test_manifest.py
@@ -156,8 +156,6 @@ def test_stack(self):
 
 @pytest.mark.skip(reason="Not implemented")
 class TestSerializeManifest:
-    def test_serialize_manifest_to_zarr(self):
-        ...
+    def test_serialize_manifest_to_zarr(self): ...
 
-    def test_deserialize_manifest_from_zarr(self):
-        ...
+    def test_deserialize_manifest_from_zarr(self): ...
diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py
@@ -225,9 +225,6 @@ def test_concat_dim_coords_along_existing_dim(self):
         assert result.data.zarray.zarr_format == zarray.zarr_format
 
 
-
-
-
 class TestOpenVirtualDatasetIndexes:
     def test_no_indexes(self, netcdf4_file):
         vds = open_virtual_dataset(netcdf4_file, indexes={})
@@ -273,7 +270,7 @@ def test_combine_by_coords(self, netcdf4_files):
 
 class TestLoadVirtualDataset:
     def test_loadable_variables(self, netcdf4_file):
-        vars_to_load = ['air', 'time']
+        vars_to_load = ["air", "time"]
         vds = open_virtual_dataset(netcdf4_file, loadable_variables=vars_to_load)
 
         for name in vds.variables:

diff --git a/virtualizarr/tests/test_zarr.py b/virtualizarr/tests/test_zarr.py
@@ -1,27 +1,30 @@
-import xarray as xr
 import numpy as np
+import xarray as xr
 import xarray.testing as xrt
-from virtualizarr import open_virtual_dataset, ManifestArray
+
+from virtualizarr import ManifestArray, open_virtual_dataset
 from virtualizarr.manifests.manifest import ChunkEntry
 
 
 def test_zarr_v3_roundtrip(tmpdir):
     arr = ManifestArray(
-            chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
-            zarray=dict(
-                shape=(2, 3),
-                dtype=np.dtype("<i8"),
-                chunks=(2, 3),
-                compressor=None,
-                filters=None,
-                fill_value=None,
-                order="C",
-                zarr_format=3,
-            ),
-        )
+        chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
+        zarray=dict(
+            shape=(2, 3),
+            dtype=np.dtype("<i8"),
+            chunks=(2, 3),
+            compressor=None,
+            filters=None,
+            fill_value=None,
+            order="C",
+            zarr_format=3,
+        ),
+    )
     original = xr.Dataset({"a": (["x", "y"], arr)}, attrs={"something": 0})
 
     original.virtualize.to_zarr(tmpdir / "store.zarr")
-    roundtrip = open_virtual_dataset(tmpdir / "store.zarr", filetype="zarr_v3", indexes={})
+    roundtrip = open_virtual_dataset(
+        tmpdir / "store.zarr", filetype="zarr_v3", indexes={}
+    )
 
     xrt.assert_identical(roundtrip, original)
diff --git a/virtualizarr/vendor/zarr/utils.py b/virtualizarr/vendor/zarr/utils.py
@@ -1,6 +1,5 @@
 import json
 import numbers
-
 from typing import Any
 
 
@@ -18,5 +17,10 @@ def default(self, o):
 def json_dumps(o: Any) -> bytes:
     """Write JSON in a consistent, human-readable way."""
     return json.dumps(
-        o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder
+        o,
+        indent=4,
+        sort_keys=True,
+        ensure_ascii=True,
+        separators=(",", ": "),
+        cls=NumberEncoder,
     ).encode("ascii")