Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump Ruff version and add formatting #98

Merged
merged 7 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ repos:
- id: check-yaml

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.4.3"
hooks:
# Run the linter.
- id: ruff
args: ["--fix"]
# - repo: https://github.com/Carreau/velin
# rev: 0.0.8
# hooks:
# - id: velin
# args: ["--write", "--compact"]
args: [ --fix ]
# Run the formatter.
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.10.0
hooks:
Expand Down
17 changes: 10 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,21 @@ datatree = ["py.typed"]
files = "virtualizarr/**/*.py"
show_error_codes = true





[tool.ruff]
line-length = 100
# Same as Black.
line-length = 88
indent-width = 4
target-version = "py39"

exclude = [
"docs",
".eggs"]


[tool.ruff.lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
select = ["E4", "E7", "E9", "F", "I"]
per-file-ignores = {}

# E402: module level import not at top of file
Expand All @@ -101,7 +98,13 @@ per-file-ignores = {}

ignore = ["E402", "E731"]

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Indent with spaces, rather than tabs.
indent-style = "space"
# Respect magic trailing commas.
Expand Down
6 changes: 5 additions & 1 deletion virtualizarr/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@

from enum import Enum, auto


class AutoName(Enum):
# Recommended by official Python docs for auto naming:
# https://docs.python.org/3/library/enum.html#using-automatic-values
def _generate_next_value_(name, start, count, last_values):
return name


class FileType(AutoName):
netcdf3 = auto()
netcdf4 = auto()
Expand All @@ -34,6 +36,7 @@ class FileType(AutoName):
fits = auto()
zarr = auto()


def read_kerchunk_references_from_file(
filepath: str, filetype: Optional[FileType]
) -> KerchunkStoreRefs:
Expand All @@ -57,6 +60,7 @@ def read_kerchunk_references_from_file(

if filetype.name.lower() == "netcdf3":
from kerchunk.netCDF3 import NetCDF3ToZarr

refs = NetCDF3ToZarr(filepath, inline_threshold=0).translate()

elif filetype.name.lower() == "netcdf4":
Expand Down Expand Up @@ -87,7 +91,7 @@ def _automatically_determine_filetype(filepath: str) -> FileType:

if file_extension == ".nc":
# based off of: https://github.com/TomNicholas/VirtualiZarr/pull/43#discussion_r1543415167
with open(filepath, 'rb') as f:
with open(filepath, "rb") as f:
magic = f.read()
if magic[0:3] == b"CDF":
filetype = FileType.netcdf3
Expand Down
4 changes: 3 additions & 1 deletion virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
with open(filepath, "r") as manifest_file:
entries_dict = json.load(manifest_file)

entries = {cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()}
entries = {
cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()
}
return cls(entries=entries)

def to_zarr_json(self, filepath: str) -> None:
Expand Down
1 change: 1 addition & 0 deletions virtualizarr/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import xarray as xr


@pytest.fixture
def netcdf4_file(tmpdir):
# Set up example xarray dataset
Expand Down
47 changes: 28 additions & 19 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import numpy as np
import pytest
import ujson # type: ignore
import xarray as xr
import xarray.testing as xrt
import pytest


from virtualizarr.kerchunk import _automatically_determine_filetype, FileType
from virtualizarr.kerchunk import FileType, _automatically_determine_filetype
from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs


def gen_ds_refs(
zgroup: str = '{"zarr_format":2}',
zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
chunk: list = ["test1.nc", 6144, 48],
zgroup: str = '{"zarr_format":2}',
zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
chunk: list = ["test1.nc", 6144, 48],
):
return {
"version": 1,
Expand All @@ -25,9 +25,10 @@ def gen_ds_refs(
},
}


def test_dataset_from_df_refs():
ds_refs = gen_ds_refs()
ds = dataset_from_kerchunk_refs(ds_refs)
ds = dataset_from_kerchunk_refs(ds_refs)
assert "a" in ds
da = ds["a"]
assert isinstance(da.data, ManifestArray)
Expand All @@ -45,11 +46,21 @@ def test_dataset_from_df_refs():
"0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
}


def test_dataset_from_df_refs_with_filters():
filters = [{"elementsize":4,"id":"shuffle"},{"id":"zlib","level":4}]
zarray = {"chunks":[2,3],"compressor":None,"dtype":"<i8","fill_value":None,"filters":filters,"order":"C","shape":[2,3],"zarr_format":2}
filters = [{"elementsize": 4, "id": "shuffle"}, {"id": "zlib", "level": 4}]
zarray = {
"chunks": [2, 3],
"compressor": None,
"dtype": "<i8",
"fill_value": None,
"filters": filters,
"order": "C",
"shape": [2, 3],
"zarr_format": 2,
}
ds_refs = gen_ds_refs(zarray=ujson.dumps(zarray))
ds = dataset_from_kerchunk_refs(ds_refs)
ds = dataset_from_kerchunk_refs(ds_refs)
da = ds["a"]
assert da.data.zarray.filters == filters

Expand Down Expand Up @@ -163,15 +174,13 @@ def test_automatically_determine_filetype_netcdf3_netcdf4():
assert FileType("netcdf4") == _automatically_determine_filetype(netcdf4_file_path)




def test_FileType():
# tests if FileType converts user supplied strings to correct filetype
assert 'netcdf3' == FileType("netcdf3").name
assert 'netcdf4' == FileType("netcdf4").name
assert 'grib' == FileType("grib").name
assert 'tiff' == FileType("tiff").name
assert 'fits' == FileType("fits").name
assert 'zarr' == FileType("zarr").name
assert "netcdf3" == FileType("netcdf3").name
assert "netcdf4" == FileType("netcdf4").name
assert "grib" == FileType("grib").name
assert "tiff" == FileType("tiff").name
assert "fits" == FileType("fits").name
assert "zarr" == FileType("zarr").name
with pytest.raises(ValueError):
FileType(None)
3 changes: 1 addition & 2 deletions virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ def test_not_equal_chunk_entries(self):
assert not (marr1 == marr2).all()

@pytest.mark.skip(reason="Not Implemented")
def test_partly_equals(self):
...
def test_partly_equals(self): ...


# TODO we really need some kind of fixtures to generate useful example data
Expand Down
6 changes: 2 additions & 4 deletions virtualizarr/tests/test_manifests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,6 @@ def test_stack(self):

@pytest.mark.skip(reason="Not implemented")
class TestSerializeManifest:
def test_serialize_manifest_to_zarr(self):
...
def test_serialize_manifest_to_zarr(self): ...

def test_deserialize_manifest_from_zarr(self):
...
def test_deserialize_manifest_from_zarr(self): ...
5 changes: 1 addition & 4 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,6 @@ def test_concat_dim_coords_along_existing_dim(self):
assert result.data.zarray.zarr_format == zarray.zarr_format





class TestOpenVirtualDatasetIndexes:
def test_no_indexes(self, netcdf4_file):
vds = open_virtual_dataset(netcdf4_file, indexes={})
Expand Down Expand Up @@ -273,7 +270,7 @@ def test_combine_by_coords(self, netcdf4_files):

class TestLoadVirtualDataset:
def test_loadable_variables(self, netcdf4_file):
vars_to_load = ['air', 'time']
vars_to_load = ["air", "time"]
vds = open_virtual_dataset(netcdf4_file, loadable_variables=vars_to_load)

for name in vds.variables:
Expand Down
33 changes: 18 additions & 15 deletions virtualizarr/tests/test_zarr.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
import xarray as xr
import numpy as np
import xarray as xr
import xarray.testing as xrt
from virtualizarr import open_virtual_dataset, ManifestArray

from virtualizarr import ManifestArray, open_virtual_dataset
from virtualizarr.manifests.manifest import ChunkEntry


def test_zarr_v3_roundtrip(tmpdir):
arr = ManifestArray(
chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
zarray=dict(
shape=(2, 3),
dtype=np.dtype("<i8"),
chunks=(2, 3),
compressor=None,
filters=None,
fill_value=None,
order="C",
zarr_format=3,
),
)
chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
zarray=dict(
shape=(2, 3),
dtype=np.dtype("<i8"),
chunks=(2, 3),
compressor=None,
filters=None,
fill_value=None,
order="C",
zarr_format=3,
),
)
original = xr.Dataset({"a": (["x", "y"], arr)}, attrs={"something": 0})

original.virtualize.to_zarr(tmpdir / "store.zarr")
roundtrip = open_virtual_dataset(tmpdir / "store.zarr", filetype="zarr_v3", indexes={})
roundtrip = open_virtual_dataset(
tmpdir / "store.zarr", filetype="zarr_v3", indexes={}
)

xrt.assert_identical(roundtrip, original)
8 changes: 6 additions & 2 deletions virtualizarr/vendor/zarr/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import numbers

from typing import Any


Expand All @@ -18,5 +17,10 @@ def default(self, o):
def json_dumps(o: Any) -> bytes:
"""Write JSON in a consistent, human-readable way."""
return json.dumps(
o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder
o,
indent=4,
sort_keys=True,
ensure_ascii=True,
separators=(",", ": "),
cls=NumberEncoder,
).encode("ascii")
Loading
Loading