Skip to content

Commit

Permalink
Add wrappers for zarr v3 (#524)
Browse files Browse the repository at this point in the history
* import zarr-python#1839

* pep8

* remove try/catch

* pep8

* update to latest zarr-python interfaces

* flake

* add zarr-python to ci

* fix import

* tests

* fixes

* skip zarr3 tests on older python versions

* ruff

* add zfpy and pcodec

* remove zarr from dependencies

* change prefix

* fixes for ci

* fix for tests

* pr feedback

* Sync with zarr 3 beta (#597)

* Sync with zarr 3 beta

* Update zarr version in ci

* dont install zarr python 3 in workflows running 3.10

* Update numcodecs/tests/test_zarr3.py

Co-authored-by: David Stansby <[email protected]>

* moves zarr3 to private module, adds test for zarr-python2 installs

* add typing_extensions as dep

* tests

* importorskip minversion

* ci install

* drop zarr 2 in ci

* no zarr2 + make zarr3 a public module

* pre-commit

* fixes?

* fix validate

* fix pcodec test

* fix pcodec test

* codecov

* codecov

* fix error match

* codecov

* codecov

* coverage

* wip docs

* docs and renames all codecs

* docs

* new zarr beta

* no zfpy for macos-14

* xfail

* rm dead code

* Update .github/workflows/ci.yaml

Co-authored-by: David Stansby <[email protected]>

* debug rtd

* debug ci

* Filter warnings in zarr3 tests

* Fix warning ignore

* pr feedback

---------

Co-authored-by: Matthew Iannucci <[email protected]>
Co-authored-by: David Stansby <[email protected]>
  • Loading branch information
3 people authored Nov 8, 2024
1 parent d8a219f commit 44130cd
Show file tree
Hide file tree
Showing 8 changed files with 764 additions and 2 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.11", "3.12", "3.13"]
# macos-12 is an intel runner, macos-14 is a arm64 runner
# macos-13 is an intel runner, macos-14 is a arm64 runner
platform: [ubuntu-latest, windows-latest, macos-13, macos-14]

steps:
Expand Down Expand Up @@ -70,6 +70,12 @@ jobs:
conda activate env
python -m pip install -v ".[pcodec]"
- name: Install zarr-python
shell: "bash -l {0}"
run: |
conda activate env
# TODO: remove --pre option when zarr v3 is out
python -m pip install --pre zarr
# This is used to test with zfpy, which does not yet support numpy 2.0
- name: Install older numpy and zfpy
Expand Down
4 changes: 4 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ build:
os: ubuntu-20.04
tools:
python: "3.12"
jobs:
post_install:
- python -m pip install --pre 'zarr'

sphinx:
configuration: docs/conf.py
Expand All @@ -19,3 +22,4 @@ python:
- docs
- msgpack
- zfpy
- crc32c
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ API reference
checksum32
abc
registry
zarr3
99 changes: 99 additions & 0 deletions docs/zarr3.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
Zarr 3 codecs
=============
.. automodule:: numcodecs.zarr3


Bytes-to-bytes codecs
---------------------
.. autoclass:: Blosc()

.. autoattribute:: codec_name

.. autoclass:: LZ4()

.. autoattribute:: codec_name

.. autoclass:: Zstd()

.. autoattribute:: codec_name

.. autoclass:: Zlib()

.. autoattribute:: codec_name

.. autoclass:: GZip()

.. autoattribute:: codec_name

.. autoclass:: BZ2()

.. autoattribute:: codec_name

.. autoclass:: LZMA()

.. autoattribute:: codec_name

.. autoclass:: Shuffle()

.. autoattribute:: codec_name


Array-to-array codecs
---------------------
.. autoclass:: Delta()

.. autoattribute:: codec_name

.. autoclass:: BitRound()

.. autoattribute:: codec_name

.. autoclass:: FixedScaleOffset()

.. autoattribute:: codec_name

.. autoclass:: Quantize()

.. autoattribute:: codec_name

.. autoclass:: PackBits()

.. autoattribute:: codec_name

.. autoclass:: AsType()

.. autoattribute:: codec_name


Bytes-to-bytes checksum codecs
------------------------------
.. autoclass:: CRC32()

.. autoattribute:: codec_name

.. autoclass:: CRC32C()

.. autoattribute:: codec_name

.. autoclass:: Adler32()

.. autoattribute:: codec_name

.. autoclass:: Fletcher32()

.. autoattribute:: codec_name

.. autoclass:: JenkinsLookup3()

.. autoattribute:: codec_name


Array-to-bytes codecs
---------------------
.. autoclass:: PCodec()

.. autoattribute:: codec_name

.. autoclass:: ZFPY()

.. autoattribute:: codec_name
237 changes: 237 additions & 0 deletions numcodecs/tests/test_zarr3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
from __future__ import annotations

import numpy as np
import pytest

import numcodecs.zarr3

zarr = pytest.importorskip("zarr")

pytestmark = [
pytest.mark.skipif(zarr.__version__ < "3.0.0", reason="zarr 3.0.0 or later is required"),
pytest.mark.filterwarnings("ignore:Codec 'numcodecs.*' not configured in config.*:UserWarning"),
pytest.mark.filterwarnings(
"ignore:Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations."
),
]

get_codec_class = zarr.registry.get_codec_class
Array = zarr.Array
JSON = zarr.core.common.JSON
BytesCodec = zarr.codecs.BytesCodec
Store = zarr.abc.store.Store
MemoryStore = zarr.storage.MemoryStore
StorePath = zarr.storage.StorePath


EXPECTED_WARNING_STR = "Numcodecs codecs are not in the Zarr version 3.*"


@pytest.fixture
def store() -> Store:
return StorePath(MemoryStore(mode="w"))


ALL_CODECS = [getattr(numcodecs.zarr3, cls_name) for cls_name in numcodecs.zarr3.__all__]


@pytest.mark.parametrize("codec_class", ALL_CODECS)
def test_entry_points(codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
codec_name = codec_class.codec_name
assert get_codec_class(codec_name) == codec_class


@pytest.mark.parametrize("codec_class", ALL_CODECS)
def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
assert "See :class:`numcodecs." in codec_class.__doc__


@pytest.mark.parametrize(
"codec_class",
[
numcodecs.zarr3.Blosc,
numcodecs.zarr3.LZ4,
numcodecs.zarr3.Zstd,
numcodecs.zarr3.Zlib,
numcodecs.zarr3.GZip,
numcodecs.zarr3.BZ2,
numcodecs.zarr3.LZMA,
numcodecs.zarr3.Shuffle,
],
)
def test_generic_codec_class(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[BytesCodec(), codec_class()],
)

a[:, :] = data.copy()
np.testing.assert_array_equal(data, a[:, :])


@pytest.mark.parametrize(
("codec_class", "codec_config"),
[
(numcodecs.zarr3.Delta, {"dtype": "float32"}),
(numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 25.5}),
(numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 51, "astype": "uint16"}),
(numcodecs.zarr3.AsType, {"encode_dtype": "float32", "decode_dtype": "float64"}),
],
ids=[
"delta",
"fixedscaleoffset",
"fixedscaleoffset2",
"astype",
],
)
def test_generic_filter(
store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec], codec_config: dict[str, JSON]
):
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[
codec_class(**codec_config),
BytesCodec(),
],
)

a[:, :] = data.copy()
a = Array.open(store / "generic")
np.testing.assert_array_equal(data, a[:, :])


def test_generic_filter_bitround(store: Store):
data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic_bitround",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[numcodecs.zarr3.BitRound(keepbits=3), BytesCodec()],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_bitround")
assert np.allclose(data, a[:, :], atol=0.1)


def test_generic_filter_quantize(store: Store):
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic_quantize",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[numcodecs.zarr3.Quantize(digits=3), BytesCodec()],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_quantize")
assert np.allclose(data, a[:, :], atol=0.001)


def test_generic_filter_packbits(store: Store):
data = np.zeros((16, 16), dtype="bool")
data[0:4, :] = True

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic_packbits",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[numcodecs.zarr3.PackBits(), BytesCodec()],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_packbits")
np.testing.assert_array_equal(data, a[:, :])

with pytest.raises(ValueError, match=".*requires bool dtype.*"):
Array.create(
store / "generic_packbits_err",
shape=data.shape,
chunk_shape=(16, 16),
dtype="uint32",
fill_value=0,
codecs=[numcodecs.zarr3.PackBits(), BytesCodec()],
)


@pytest.mark.parametrize(
"codec_class",
[
numcodecs.zarr3.CRC32,
numcodecs.zarr3.CRC32C,
numcodecs.zarr3.Adler32,
numcodecs.zarr3.Fletcher32,
numcodecs.zarr3.JenkinsLookup3,
],
)
def test_generic_checksum(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic_checksum",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[BytesCodec(), codec_class()],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_checksum")
np.testing.assert_array_equal(data, a[:, :])


@pytest.mark.parametrize("codec_class", [numcodecs.zarr3.PCodec, numcodecs.zarr3.ZFPY])
def test_generic_bytes_codec(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
try:
codec_class()._codec # noqa: B018
except ValueError as e:
if "codec not available" in str(e):
pytest.xfail(f"{codec_class.codec_name} is not available: {e}")
else:
raise # pragma: no cover
except ImportError as e:
pytest.xfail(f"{codec_class.codec_name} is not available: {e}")

data = np.arange(0, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[
codec_class(),
],
)

a[:, :] = data.copy()
np.testing.assert_array_equal(data, a[:, :])
13 changes: 13 additions & 0 deletions numcodecs/tests/test_zarr3_import.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

import pytest


def test_zarr3_import():
ERROR_MESSAGE_MATCH = "zarr 3.0.0 or later.*"

try:
import zarr # noqa: F401
except ImportError: # pragma: no cover
with pytest.raises(ImportError, match=ERROR_MESSAGE_MATCH):
import numcodecs.zarr3 # noqa: F401
Loading

0 comments on commit 44130cd

Please sign in to comment.