Skip to content

Commit

Permalink
Merge branch 'main' into 0D-scalar
Browse files Browse the repository at this point in the history
  • Loading branch information
TomNicholas authored May 7, 2024
2 parents d0ea94f + a3dab6c commit 299b553
Show file tree
Hide file tree
Showing 13 changed files with 148 additions and 104 deletions.
18 changes: 9 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,24 @@ ci:
autoupdate_schedule: monthly
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.3.5"
# Ruff version.
rev: "v0.4.3"
hooks:
# Run the linter.
- id: ruff
args: ["--fix"]
# - repo: https://github.com/Carreau/velin
# rev: 0.0.8
# hooks:
# - id: velin
# args: ["--write", "--compact"]
args: [ --fix ]
# Run the formatter.
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.9.0
rev: v1.10.0
hooks:
- id: mypy
# Copied from setup.cfg
Expand Down
17 changes: 10 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,21 @@ datatree = ["py.typed"]
files = "virtualizarr/**/*.py"
show_error_codes = true





[tool.ruff]
line-length = 100
# Same as Black.
line-length = 88
indent-width = 4
target-version = "py39"

exclude = [
"docs",
".eggs"]


[tool.ruff.lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F"]
select = ["E4", "E7", "E9", "F", "I"]
per-file-ignores = {}

# E402: module level import not at top of file
Expand All @@ -101,7 +98,13 @@ per-file-ignores = {}

ignore = ["E402", "E731"]

# Allow fix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Indent with spaces, rather than tabs.
indent-style = "space"
# Respect magic trailing commas.
Expand Down
6 changes: 5 additions & 1 deletion virtualizarr/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@

from enum import Enum, auto


class AutoName(Enum):
# Recommended by official Python docs for auto naming:
# https://docs.python.org/3/library/enum.html#using-automatic-values
def _generate_next_value_(name, start, count, last_values):
return name


class FileType(AutoName):
netcdf3 = auto()
netcdf4 = auto()
Expand All @@ -34,6 +36,7 @@ class FileType(AutoName):
fits = auto()
zarr = auto()


def read_kerchunk_references_from_file(
filepath: str, filetype: Optional[FileType]
) -> KerchunkStoreRefs:
Expand All @@ -57,6 +60,7 @@ def read_kerchunk_references_from_file(

if filetype.name.lower() == "netcdf3":
from kerchunk.netCDF3 import NetCDF3ToZarr

refs = NetCDF3ToZarr(filepath, inline_threshold=0).translate()

elif filetype.name.lower() == "netcdf4":
Expand Down Expand Up @@ -87,7 +91,7 @@ def _automatically_determine_filetype(filepath: str) -> FileType:

if file_extension == ".nc":
# based off of: https://github.com/TomNicholas/VirtualiZarr/pull/43#discussion_r1543415167
with open(filepath, 'rb') as f:
with open(filepath, "rb") as f:
magic = f.read()
if magic[0:3] == b"CDF":
filetype = FileType.netcdf3
Expand Down
4 changes: 3 additions & 1 deletion virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
with open(filepath, "r") as manifest_file:
entries_dict = json.load(manifest_file)

entries = {cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()}
entries = {
cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()
}
return cls(entries=entries)

def to_zarr_json(self, filepath: str) -> None:
Expand Down
1 change: 1 addition & 0 deletions virtualizarr/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import xarray as xr


@pytest.fixture
def netcdf4_file(tmpdir):
# Set up example xarray dataset
Expand Down
47 changes: 28 additions & 19 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import numpy as np
import pytest
import ujson # type: ignore
import xarray as xr
import xarray.testing as xrt
import pytest


from virtualizarr.kerchunk import _automatically_determine_filetype, FileType
from virtualizarr.kerchunk import FileType, _automatically_determine_filetype
from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs


def gen_ds_refs(
zgroup: str = '{"zarr_format":2}',
zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
chunk: list = ["test1.nc", 6144, 48],
zgroup: str = '{"zarr_format":2}',
zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
chunk: list = ["test1.nc", 6144, 48],
):
return {
"version": 1,
Expand All @@ -25,9 +25,10 @@ def gen_ds_refs(
},
}


def test_dataset_from_df_refs():
ds_refs = gen_ds_refs()
ds = dataset_from_kerchunk_refs(ds_refs)
ds = dataset_from_kerchunk_refs(ds_refs)
assert "a" in ds
da = ds["a"]
assert isinstance(da.data, ManifestArray)
Expand All @@ -45,11 +46,21 @@ def test_dataset_from_df_refs():
"0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
}


def test_dataset_from_df_refs_with_filters():
filters = [{"elementsize":4,"id":"shuffle"},{"id":"zlib","level":4}]
zarray = {"chunks":[2,3],"compressor":None,"dtype":"<i8","fill_value":None,"filters":filters,"order":"C","shape":[2,3],"zarr_format":2}
filters = [{"elementsize": 4, "id": "shuffle"}, {"id": "zlib", "level": 4}]
zarray = {
"chunks": [2, 3],
"compressor": None,
"dtype": "<i8",
"fill_value": None,
"filters": filters,
"order": "C",
"shape": [2, 3],
"zarr_format": 2,
}
ds_refs = gen_ds_refs(zarray=ujson.dumps(zarray))
ds = dataset_from_kerchunk_refs(ds_refs)
ds = dataset_from_kerchunk_refs(ds_refs)
da = ds["a"]
assert da.data.zarray.filters == filters

Expand Down Expand Up @@ -163,15 +174,13 @@ def test_automatically_determine_filetype_netcdf3_netcdf4():
assert FileType("netcdf4") == _automatically_determine_filetype(netcdf4_file_path)




def test_FileType():
# tests if FileType converts user supplied strings to correct filetype
assert 'netcdf3' == FileType("netcdf3").name
assert 'netcdf4' == FileType("netcdf4").name
assert 'grib' == FileType("grib").name
assert 'tiff' == FileType("tiff").name
assert 'fits' == FileType("fits").name
assert 'zarr' == FileType("zarr").name
assert "netcdf3" == FileType("netcdf3").name
assert "netcdf4" == FileType("netcdf4").name
assert "grib" == FileType("grib").name
assert "tiff" == FileType("tiff").name
assert "fits" == FileType("fits").name
assert "zarr" == FileType("zarr").name
with pytest.raises(ValueError):
FileType(None)
3 changes: 1 addition & 2 deletions virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ def test_not_equal_chunk_entries(self):
assert not (marr1 == marr2).all()

@pytest.mark.skip(reason="Not Implemented")
def test_partly_equals(self):
...
def test_partly_equals(self): ...


# TODO we really need some kind of fixtures to generate useful example data
Expand Down
6 changes: 2 additions & 4 deletions virtualizarr/tests/test_manifests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,6 @@ def test_stack(self):

@pytest.mark.skip(reason="Not implemented")
class TestSerializeManifest:
def test_serialize_manifest_to_zarr(self):
...
def test_serialize_manifest_to_zarr(self): ...

def test_deserialize_manifest_from_zarr(self):
...
def test_deserialize_manifest_from_zarr(self): ...
5 changes: 1 addition & 4 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,6 @@ def test_concat_dim_coords_along_existing_dim(self):
assert result.data.zarray.zarr_format == zarray.zarr_format





class TestOpenVirtualDatasetIndexes:
def test_no_indexes(self, netcdf4_file):
vds = open_virtual_dataset(netcdf4_file, indexes={})
Expand Down Expand Up @@ -273,7 +270,7 @@ def test_combine_by_coords(self, netcdf4_files):

class TestLoadVirtualDataset:
def test_loadable_variables(self, netcdf4_file):
vars_to_load = ['air', 'time']
vars_to_load = ["air", "time"]
vds = open_virtual_dataset(netcdf4_file, loadable_variables=vars_to_load)

for name in vds.variables:
Expand Down
33 changes: 18 additions & 15 deletions virtualizarr/tests/test_zarr.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
import xarray as xr
import numpy as np
import xarray as xr
import xarray.testing as xrt
from virtualizarr import open_virtual_dataset, ManifestArray

from virtualizarr import ManifestArray, open_virtual_dataset
from virtualizarr.manifests.manifest import ChunkEntry


def test_zarr_v3_roundtrip(tmpdir):
arr = ManifestArray(
chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
zarray=dict(
shape=(2, 3),
dtype=np.dtype("<i8"),
chunks=(2, 3),
compressor=None,
filters=None,
fill_value=None,
order="C",
zarr_format=3,
),
)
chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
zarray=dict(
shape=(2, 3),
dtype=np.dtype("<i8"),
chunks=(2, 3),
compressor=None,
filters=None,
fill_value=None,
order="C",
zarr_format=3,
),
)
original = xr.Dataset({"a": (["x", "y"], arr)}, attrs={"something": 0})

original.virtualize.to_zarr(tmpdir / "store.zarr")
roundtrip = open_virtual_dataset(tmpdir / "store.zarr", filetype="zarr_v3", indexes={})
roundtrip = open_virtual_dataset(
tmpdir / "store.zarr", filetype="zarr_v3", indexes={}
)

xrt.assert_identical(roundtrip, original)
8 changes: 6 additions & 2 deletions virtualizarr/vendor/zarr/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import numbers

from typing import Any


Expand All @@ -18,5 +17,10 @@ def default(self, o):
def json_dumps(o: Any) -> bytes:
"""Write JSON in a consistent, human-readable way."""
return json.dumps(
o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder
o,
indent=4,
sort_keys=True,
ensure_ascii=True,
separators=(",", ": "),
cls=NumberEncoder,
).encode("ascii")
Loading

0 comments on commit 299b553

Please sign in to comment.