Skip to content

Commit

Permalink
Merge remote-tracking branch 'mpiannucci/icechunk' into icechunk
Browse files Browse the repository at this point in the history
  • Loading branch information
mpiannucci committed Oct 21, 2024
2 parents 4f3bafa + b59060d commit 26db575
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 16 deletions.
12 changes: 7 additions & 5 deletions virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import numpy.testing as npt
from xarray import Dataset, open_dataset, open_zarr
from xarray import Dataset, open_dataset
from xarray.core.variable import Variable
from zarr import Array, Group, group

Expand Down Expand Up @@ -111,7 +111,7 @@ def test_set_single_virtual_ref_without_encoding(
expected_array = expected_ds["foo"].to_numpy()
npt.assert_equal(array, expected_array)

#ds = open_zarr(store=icechunk_filestore, group='foo', zarr_format=3, consolidated=False)
# ds = open_zarr(store=icechunk_filestore, group='foo', zarr_format=3, consolidated=False)

# note: we don't need to test that committing works, because now we have confirmed
# the refs are in the store (even uncommitted) it's icechunk's problem to manage them now.
Expand All @@ -138,7 +138,9 @@ def test_set_single_virtual_ref_with_encoding(
chunkmanifest=manifest,
zarray=zarray,
)
air = Variable(data=ma, dims=["time", "lat", "lon"], encoding={"scale_factor": 0.01})
air = Variable(
data=ma, dims=["time", "lat", "lon"], encoding={"scale_factor": 0.01}
)
vds = Dataset(
{"air": air},
)
Expand All @@ -152,11 +154,11 @@ def test_set_single_virtual_ref_with_encoding(
assert air_array.shape == (2920, 25, 53)
assert air_array.chunks == (2920, 25, 53)
assert air_array.dtype == np.dtype("int16")
assert air_array.attrs['scale_factor'] == 0.01
assert air_array.attrs["scale_factor"] == 0.01

# xarray performs this when cf_decoding is True, but we are not loading
# with xarray here so we scale it manually.
scale_factor = air_array.attrs['scale_factor']
scale_factor = air_array.attrs["scale_factor"]
scaled_air_array = air_array[:] * scale_factor

# check chunk references
Expand Down
5 changes: 2 additions & 3 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import asyncio
from typing import TYPE_CHECKING

import numpy as np
from xarray import Dataset, conventions
from xarray import Dataset
from xarray.backends.zarr import encode_zarr_attr_value
from xarray.core.variable import Variable
from zarr import Group
Expand Down Expand Up @@ -52,7 +51,7 @@ def dataset_to_icechunk(ds: Dataset, store: "IcechunkStore") -> None:
# root_group.attrs = ds.attrs
for k, v in ds.attrs.items():
root_group.attrs[k] = encode_zarr_attr_value(v)

return write_variables_to_icechunk_group(
ds.variables,
store=store,
Expand Down
16 changes: 8 additions & 8 deletions virtualizarr/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def from_kerchunk_refs(cls, decoded_arr_refs_zarray) -> "ZArray":
# coerce type of fill_value as kerchunk can be inconsistent with this
dtype = np.dtype(decoded_arr_refs_zarray["dtype"])
fill_value = decoded_arr_refs_zarray["fill_value"]
if np.issubdtype(dtype, np.floating) and (fill_value is None or fill_value == "NaN" or fill_value == "nan"):
if np.issubdtype(dtype, np.floating) and (
fill_value is None or fill_value == "NaN" or fill_value == "nan"
):
fill_value = np.nan

compressor = decoded_arr_refs_zarray["compressor"]
Expand Down Expand Up @@ -157,10 +159,8 @@ def _v3_codec_pipeline(self) -> Any:
try:
from zarr.core.metadata.v3 import parse_codecs
except ImportError:
raise ImportError(
"zarr v3 is required to generate v3 codec pipelines"
)

raise ImportError("zarr v3 is required to generate v3 codec pipelines")

codec_configs = []

# https://zarr-specs.readthedocs.io/en/latest/v3/codecs/transpose/v1.0.html#transpose-codec-v1
Expand All @@ -184,9 +184,9 @@ def _v3_codec_pipeline(self) -> Any:
# and that there are far more codecs in `numcodecs`. We take a gamble and assume
# that the codec names and configuration are simply mapped into zarrv3 "configurables".
if self.filters:
codec_configs.extend([
_num_codec_config_to_configurable(filter) for filter in self.filters
])
codec_configs.extend(
[_num_codec_config_to_configurable(filter) for filter in self.filters]
)

if self.compressor:
codec_configs.append(_num_codec_config_to_configurable(self.compressor))
Expand Down

0 comments on commit 26db575

Please sign in to comment.