Skip to content

Commit

Permalink
bump zarr dep to 3.0.0b0 (#194)
Browse files Browse the repository at this point in the history
* bump zarr dep to 3.0.0b0

* Fix config test

* fix some types

* Sync with new store api

* Sync tests with zarr 3 beta

* Fix open mode

---------

Co-authored-by: Matthew Iannucci <[email protected]>
  • Loading branch information
jhamman and mpiannucci authored Oct 12, 2024
1 parent 03e1e89 commit 77de941
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 17 deletions.
2 changes: 1 addition & 1 deletion icechunk-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ classifiers = [
]
dynamic = ["version"]

dependencies = ["zarr==3.0.0a7"]
dependencies = ["zarr==3.0.0b0"]

[tool.poetry]
name = "icechunk"
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/python/icechunk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(
**kwargs: Any,
):
"""Create a new IcechunkStore. This should not be called directly, instead use the create or open_existing class methods."""
super().__init__(mode, *args, **kwargs)
super().__init__(*args, mode=mode, **kwargs)
if store is None:
raise ValueError(
"An IcechunkStore should not be created with the default constructor, instead use either the create or open_existing class methods."
Expand Down
6 changes: 4 additions & 2 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ class VirtualRefConfig:
def s3_from_config(
cls,
credentials: S3Credentials,
endpoint_url: str | None,
*,
endpoint_url: str | None = None,
allow_http: bool | None = None,
region: str | None = None,
) -> VirtualRefConfig:
Expand All @@ -206,7 +207,8 @@ class VirtualRefConfig:
@classmethod
def s3_anonymous(
cls,
endpoint_url: str | None,
*,
endpoint_url: str | None = None,
allow_http: bool | None = None,
region: str | None = None,
) -> VirtualRefConfig:
Expand Down
206 changes: 203 additions & 3 deletions icechunk-python/tests/test_zarr/test_array.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from typing import Literal
import pickle
from itertools import accumulate
from typing import Any, Literal

import numpy as np
import pytest
import zarr
import zarr.api
import zarr.api.asynchronous
from icechunk import IcechunkStore
from zarr import Array, AsyncGroup, Group
from zarr.core.common import ZarrFormat
from zarr import Array, AsyncArray, AsyncGroup, Group
from zarr.codecs import BytesCodec, VLenBytesCodec
from zarr.core.array import chunks_initialized
from zarr.core.common import JSON, ZarrFormat
from zarr.core.indexing import ceildiv
from zarr.core.sync import sync
from zarr.errors import ContainsArrayError, ContainsGroupError
from zarr.storage import StorePath

Expand Down Expand Up @@ -185,3 +191,197 @@ def test_array_v3_fill_value(

assert arr.fill_value == np.dtype(dtype_str).type(fill_value)
assert arr.fill_value.dtype == arr.dtype


@pytest.mark.parametrize("store", ["memory"], indirect=True)
async def test_array_v3_nan_fill_value(store: IcechunkStore) -> None:
shape = (10,)
arr = Array.create(
store=store,
shape=shape,
dtype=np.float64,
zarr_format=3,
chunk_shape=shape,
fill_value=np.nan,
)
arr[:] = np.nan

assert np.isnan(arr.fill_value)
assert arr.fill_value.dtype == arr.dtype
# # all fill value chunk is an empty chunk, and should not be written
# assert len([a async for a in store.list_prefix("/")]) == 0


@pytest.mark.parametrize("store", ["local"], indirect=["store"])
@pytest.mark.parametrize("zarr_format", [3])
async def test_serializable_async_array(
store: IcechunkStore, zarr_format: ZarrFormat
) -> None:
expected = await AsyncArray.create(
store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4"
)
# await expected.setitems(list(range(100)))

p = pickle.dumps(expected)
actual = pickle.loads(p)

assert actual == expected
# np.testing.assert_array_equal(await actual.getitem(slice(None)), await expected.getitem(slice(None)))
# TODO: uncomment the parts of this test that will be impacted by the config/prototype changes in flight


@pytest.mark.parametrize("store", ["local"], indirect=["store"])
@pytest.mark.parametrize("zarr_format", [3])
def test_serializable_sync_array(store: IcechunkStore, zarr_format: ZarrFormat) -> None:
expected = Array.create(
store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4"
)
expected[:] = list(range(100))

p = pickle.dumps(expected)
actual = pickle.loads(p)

assert actual == expected
np.testing.assert_array_equal(actual[:], expected[:])


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_storage_transformers(store: IcechunkStore) -> None:
"""
Test that providing an actual storage transformer produces a warning and otherwise passes through
"""
metadata_dict: dict[str, JSON] = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]])
@pytest.mark.parametrize("nchunks", [2, 5, 10])
def test_nchunks(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> None:
"""
Test that nchunks returns the number of chunks defined for the array.
"""
shape = 100
arr = Array.create(store, shape=(shape,), chunks=(ceildiv(shape, nchunks),), dtype="i4")
expected = nchunks
if test_cls == Array:
observed = arr.nchunks
else:
observed = arr._async_array.nchunks
assert observed == expected


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]])
def test_nchunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]]) -> None:
"""
Test that nchunks_initialized accurately returns the number of stored chunks.
"""
arr = Array.create(store, shape=(100,), chunks=(10,), dtype="i4")

# write chunks one at a time
for idx, region in enumerate(arr._iter_chunk_regions()):
arr[region] = 1
expected = idx + 1
if test_cls == Array:
observed = arr.nchunks_initialized
else:
observed = arr._async_array.nchunks_initialized
assert observed == expected

# delete chunks
for idx, key in enumerate(arr._iter_chunk_keys()):
sync(arr.store_path.store.delete(key))
if test_cls == Array:
observed = arr.nchunks_initialized
else:
observed = arr._async_array.nchunks_initialized
expected = arr.nchunks - idx - 1
assert observed == expected


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]])
def test_chunks_initialized(store: IcechunkStore, test_cls: type[Array] | type[AsyncArray[Any]]) -> None:
"""
Test that chunks_initialized accurately returns the keys of stored chunks.
"""
arr = Array.create(store, shape=(100,), chunks=(10,), dtype="i4")

chunks_accumulated = tuple(
accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))
)
for keys, region in zip(chunks_accumulated, arr._iter_chunk_regions(), strict=False):
arr[region] = 1

if test_cls == Array:
observed = sorted(chunks_initialized(arr))
else:
observed = sorted(chunks_initialized(arr._async_array))

expected = sorted(keys)
assert observed == expected


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_default_fill_values(store: IcechunkStore) -> None:
root = Group.from_store(store)

a = root.create(name="u4", shape=5, chunk_shape=5, dtype="<U4")
assert a.fill_value == ""

b = root.create(name="s4", shape=5, chunk_shape=5, dtype="<S4")
assert b.fill_value == b""

c = root.create(name="i", shape=5, chunk_shape=5, dtype="i")
assert c.fill_value == 0

d = root.create(name="f", shape=5, chunk_shape=5, dtype="f")
assert d.fill_value == 0.0


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_vlen_errors(store: IcechunkStore) -> None:
with pytest.raises(ValueError, match="At least one ArrayBytesCodec is required."):
Array.create(store, shape=5, chunk_shape=5, dtype="<U4", codecs=[])

with pytest.raises(
ValueError,
match="For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `BytesCodec`.",
):
Array.create(
store, shape=5, chunk_shape=5, dtype="<U4", codecs=[BytesCodec()]
)

with pytest.raises(ValueError, match="Only one ArrayBytesCodec is allowed."):
Array.create(
store,
shape=5,
chunk_shape=5,
dtype="<U4",
codecs=[BytesCodec(), VLenBytesCodec()],
)


@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("zarr_format", [3])
def test_update_attrs(store: IcechunkStore, zarr_format: int) -> None:
# regression test for https://github.com/zarr-developers/zarr-python/issues/2328
arr = Array.create(store=store, shape=5, chunk_shape=5, dtype="f8", zarr_format=zarr_format)
arr.attrs["foo"] = "bar"
assert arr.attrs["foo"] == "bar"

arr2 = zarr.open_array(store=store, zarr_format=zarr_format)
assert arr2.attrs["foo"] == "bar"
26 changes: 21 additions & 5 deletions icechunk-python/tests/test_zarr/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,7 @@ def test_group_members(store: IcechunkStore, zarr_format: ZarrFormat) -> None:
"""

path = "group"
agroup = AsyncGroup(
metadata=GroupMetadata(zarr_format=zarr_format),
store_path=StorePath(store=store, path=path),
)
group = Group(agroup)
group = Group.from_store(store=store, zarr_format=zarr_format)
members_expected: dict[str, Array | Group] = {}

members_expected["subgroup"] = group.create_group("subgroup")
Expand Down Expand Up @@ -313,6 +309,26 @@ def test_group_getitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None:
group["nope"]


def test_group_get_with_default(store: IcechunkStore, zarr_format: ZarrFormat) -> None:
group = Group.from_store(store, zarr_format=zarr_format)

# default behavior
result = group.get("subgroup")
assert result is None

# custom default
result = group.get("subgroup", 8)
assert result == 8

# now with a group
subgroup = group.require_group("subgroup")
subgroup.attrs["foo"] = "bar"

result = group.get("subgroup", 8)
result = cast(Group, result)
assert result.attrs["foo"] == "bar"


def test_group_delitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None:
"""
Test the `Group.__delitem__` method.
Expand Down
20 changes: 15 additions & 5 deletions icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,15 @@ async def get(self, store: IcechunkStore, key: str) -> Buffer:
return self.buffer_cls.from_bytes(result)

@pytest.fixture(scope="function", params=[None, True])
def store_kwargs(
self, request: pytest.FixtureRequest
) -> dict[str, str | None | dict[str, Buffer]]:
def store_kwargs(self) -> dict[str, Any]:
kwargs = {
"storage": StorageConfig.memory(""),
"storage": StorageConfig.memory("store_test"),
"mode": "w",
}
return kwargs

@pytest.fixture(scope="function")
async def store(self, store_kwargs: str | None | dict[str, Buffer]) -> IcechunkStore:
async def store(self, store_kwargs: dict[str, Any]) -> IcechunkStore:
return await IcechunkStore.open(**store_kwargs)

@pytest.mark.xfail(reason="Not implemented")
Expand All @@ -72,6 +70,18 @@ def test_store_mode(self, store, store_kwargs: dict[str, Any]) -> None:
assert store.mode == AccessMode.from_literal("w")
assert not store.mode.readonly

@pytest.mark.parametrize("mode", ["r", "r+", "a", "w", "w-"])
async def test_store_open_mode(
self, store_kwargs: dict[str, Any], mode: AccessModeLiteral
) -> None:
store_kwargs["mode"] = mode
try:
store = await self.store_cls.open(**store_kwargs)
assert store._is_open
assert store.mode == AccessMode.from_literal(mode)
except Exception:
assert 'r' in mode

async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None:
create_kwargs = {**store_kwargs, "mode": "r"}
with pytest.raises(ValueError):
Expand Down

0 comments on commit 77de941

Please sign in to comment.