diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 57e6f0b0..8789102d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: python-version: ["3.11", "3.12", "3.13"] - # macos-12 is an intel runner, macos-14 is a arm64 runner + # macos-13 is an intel runner, macos-14 is a arm64 runner platform: [ubuntu-latest, windows-latest, macos-13, macos-14] steps: @@ -70,6 +70,12 @@ jobs: conda activate env python -m pip install -v ".[pcodec]" + - name: Install zarr-python + shell: "bash -l {0}" + run: | + conda activate env + # TODO: remove --pre option when zarr v3 is out + python -m pip install --pre zarr # This is used to test with zfpy, which does not yet support numpy 2.0 - name: Install older numpy and zfpy diff --git a/.readthedocs.yaml b/.readthedocs.yaml index bf954229..3189c9f5 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,6 +7,9 @@ build: os: ubuntu-20.04 tools: python: "3.12" + jobs: + post_install: + - python -m pip install --pre 'zarr' sphinx: configuration: docs/conf.py @@ -19,3 +22,4 @@ python: - docs - msgpack - zfpy + - crc32c diff --git a/docs/api.rst b/docs/api.rst index ef42fced..7caf51fd 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -10,3 +10,4 @@ API reference checksum32 abc registry + zarr3 \ No newline at end of file diff --git a/docs/zarr3.rst b/docs/zarr3.rst new file mode 100644 index 00000000..d0d8c486 --- /dev/null +++ b/docs/zarr3.rst @@ -0,0 +1,99 @@ +Zarr 3 codecs +============= +.. automodule:: numcodecs.zarr3 + + +Bytes-to-bytes codecs +--------------------- +.. autoclass:: Blosc() + + .. autoattribute:: codec_name + +.. autoclass:: LZ4() + + .. autoattribute:: codec_name + +.. autoclass:: Zstd() + + .. autoattribute:: codec_name + +.. autoclass:: Zlib() + + .. autoattribute:: codec_name + +.. autoclass:: GZip() + + .. autoattribute:: codec_name + +.. autoclass:: BZ2() + + .. autoattribute:: codec_name + +.. autoclass:: LZMA() + + .. autoattribute:: codec_name + +.. autoclass:: Shuffle() + + .. autoattribute:: codec_name + + +Array-to-array codecs +--------------------- +.. autoclass:: Delta() + + .. autoattribute:: codec_name + +.. autoclass:: BitRound() + + .. autoattribute:: codec_name + +.. autoclass:: FixedScaleOffset() + + .. autoattribute:: codec_name + +.. autoclass:: Quantize() + + .. autoattribute:: codec_name + +.. autoclass:: PackBits() + + .. autoattribute:: codec_name + +.. autoclass:: AsType() + + .. autoattribute:: codec_name + + +Bytes-to-bytes checksum codecs +------------------------------ +.. autoclass:: CRC32() + + .. autoattribute:: codec_name + +.. autoclass:: CRC32C() + + .. autoattribute:: codec_name + +.. autoclass:: Adler32() + + .. autoattribute:: codec_name + +.. autoclass:: Fletcher32() + + .. autoattribute:: codec_name + +.. autoclass:: JenkinsLookup3() + + .. autoattribute:: codec_name + + +Array-to-bytes codecs +--------------------- +.. autoclass:: PCodec() + + .. autoattribute:: codec_name + +.. autoclass:: ZFPY() + + .. autoattribute:: codec_name diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py new file mode 100644 index 00000000..78d824e2 --- /dev/null +++ b/numcodecs/tests/test_zarr3.py @@ -0,0 +1,237 @@ +from __future__ import annotations + +import numpy as np +import pytest + +import numcodecs.zarr3 + +zarr = pytest.importorskip("zarr") + +pytestmark = [ + pytest.mark.skipif(zarr.__version__ < "3.0.0", reason="zarr 3.0.0 or later is required"), + pytest.mark.filterwarnings("ignore:Codec 'numcodecs.*' not configured in config.*:UserWarning"), + pytest.mark.filterwarnings( + "ignore:Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations." + ), +] + +get_codec_class = zarr.registry.get_codec_class +Array = zarr.Array +JSON = zarr.core.common.JSON +BytesCodec = zarr.codecs.BytesCodec +Store = zarr.abc.store.Store +MemoryStore = zarr.storage.MemoryStore +StorePath = zarr.storage.StorePath + + +EXPECTED_WARNING_STR = "Numcodecs codecs are not in the Zarr version 3.*" + + +@pytest.fixture +def store() -> Store: + return StorePath(MemoryStore(mode="w")) + + +ALL_CODECS = [getattr(numcodecs.zarr3, cls_name) for cls_name in numcodecs.zarr3.__all__] + + +@pytest.mark.parametrize("codec_class", ALL_CODECS) +def test_entry_points(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): + codec_name = codec_class.codec_name + assert get_codec_class(codec_name) == codec_class + + +@pytest.mark.parametrize("codec_class", ALL_CODECS) +def test_docstring(codec_class: type[numcodecs.zarr3._NumcodecsCodec]): + assert "See :class:`numcodecs." in codec_class.__doc__ + + +@pytest.mark.parametrize( + "codec_class", + [ + numcodecs.zarr3.Blosc, + numcodecs.zarr3.LZ4, + numcodecs.zarr3.Zstd, + numcodecs.zarr3.Zlib, + numcodecs.zarr3.GZip, + numcodecs.zarr3.BZ2, + numcodecs.zarr3.LZMA, + numcodecs.zarr3.Shuffle, + ], +) +def test_generic_codec_class(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): + data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), codec_class()], + ) + + a[:, :] = data.copy() + np.testing.assert_array_equal(data, a[:, :]) + + +@pytest.mark.parametrize( + ("codec_class", "codec_config"), + [ + (numcodecs.zarr3.Delta, {"dtype": "float32"}), + (numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 25.5}), + (numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 51, "astype": "uint16"}), + (numcodecs.zarr3.AsType, {"encode_dtype": "float32", "decode_dtype": "float64"}), + ], + ids=[ + "delta", + "fixedscaleoffset", + "fixedscaleoffset2", + "astype", + ], +) +def test_generic_filter( + store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec], codec_config: dict[str, JSON] +): + data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[ + codec_class(**codec_config), + BytesCodec(), + ], + ) + + a[:, :] = data.copy() + a = Array.open(store / "generic") + np.testing.assert_array_equal(data, a[:, :]) + + +def test_generic_filter_bitround(store: Store): + data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16)) + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic_bitround", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[numcodecs.zarr3.BitRound(keepbits=3), BytesCodec()], + ) + + a[:, :] = data.copy() + a = Array.open(store / "generic_bitround") + assert np.allclose(data, a[:, :], atol=0.1) + + +def test_generic_filter_quantize(store: Store): + data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic_quantize", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[numcodecs.zarr3.Quantize(digits=3), BytesCodec()], + ) + + a[:, :] = data.copy() + a = Array.open(store / "generic_quantize") + assert np.allclose(data, a[:, :], atol=0.001) + + +def test_generic_filter_packbits(store: Store): + data = np.zeros((16, 16), dtype="bool") + data[0:4, :] = True + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic_packbits", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[numcodecs.zarr3.PackBits(), BytesCodec()], + ) + + a[:, :] = data.copy() + a = Array.open(store / "generic_packbits") + np.testing.assert_array_equal(data, a[:, :]) + + with pytest.raises(ValueError, match=".*requires bool dtype.*"): + Array.create( + store / "generic_packbits_err", + shape=data.shape, + chunk_shape=(16, 16), + dtype="uint32", + fill_value=0, + codecs=[numcodecs.zarr3.PackBits(), BytesCodec()], + ) + + +@pytest.mark.parametrize( + "codec_class", + [ + numcodecs.zarr3.CRC32, + numcodecs.zarr3.CRC32C, + numcodecs.zarr3.Adler32, + numcodecs.zarr3.Fletcher32, + numcodecs.zarr3.JenkinsLookup3, + ], +) +def test_generic_checksum(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): + data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic_checksum", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[BytesCodec(), codec_class()], + ) + + a[:, :] = data.copy() + a = Array.open(store / "generic_checksum") + np.testing.assert_array_equal(data, a[:, :]) + + +@pytest.mark.parametrize("codec_class", [numcodecs.zarr3.PCodec, numcodecs.zarr3.ZFPY]) +def test_generic_bytes_codec(store: Store, codec_class: type[numcodecs.zarr3._NumcodecsCodec]): + try: + codec_class()._codec # noqa: B018 + except ValueError as e: + if "codec not available" in str(e): + pytest.xfail(f"{codec_class.codec_name} is not available: {e}") + else: + raise # pragma: no cover + except ImportError as e: + pytest.xfail(f"{codec_class.codec_name} is not available: {e}") + + data = np.arange(0, 256, dtype="float32").reshape((16, 16)) + + with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR): + a = Array.create( + store / "generic", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=[ + codec_class(), + ], + ) + + a[:, :] = data.copy() + np.testing.assert_array_equal(data, a[:, :]) diff --git a/numcodecs/tests/test_zarr3_import.py b/numcodecs/tests/test_zarr3_import.py new file mode 100644 index 00000000..3feaf3e1 --- /dev/null +++ b/numcodecs/tests/test_zarr3_import.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import pytest + + +def test_zarr3_import(): + ERROR_MESSAGE_MATCH = "zarr 3.0.0 or later.*" + + try: + import zarr # noqa: F401 + except ImportError: # pragma: no cover + with pytest.raises(ImportError, match=ERROR_MESSAGE_MATCH): + import numcodecs.zarr3 # noqa: F401 diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py new file mode 100644 index 00000000..811ab501 --- /dev/null +++ b/numcodecs/zarr3.py @@ -0,0 +1,379 @@ +""" +This module provides the compatibility for :py:mod:`numcodecs` in Zarr version 3. + +A compatibility module is required because the codec handling in Zarr version 3 is different from Zarr version 2. + +You can use codecs from :py:mod:`numcodecs` by constructing codecs from :py:mod:`numcodecs.zarr3` using the same parameters as the original codecs. + +>>> import zarr +>>> import numcodecs.zarr3 +>>> +>>> codecs = [zarr.codecs.BytesCodec(), numcodecs.zarr3.BZ2(level=5)] +>>> array = zarr.open( +... "data.zarr", mode="w", +... shape=(1024, 1024), chunks=(64, 64), +... dtype="uint32", +... codecs=codecs) +>>> array[:] = np.arange(*array.shape).astype(array.dtype) + +.. note:: + + Please note that the codecs in :py:mod:`numcodecs.zarr3` are not part of the Zarr version 3 specification. + Using these codecs might cause interoperability issues with other Zarr implementations. +""" + +from __future__ import annotations + +import asyncio +import math +from collections.abc import Callable +from dataclasses import dataclass, replace +from functools import cached_property, partial +from typing import Any, Self, TypeVar +from warnings import warn + +import numpy as np + +import numcodecs + +try: + import zarr + + if zarr.__version__ < "3.0.0": # pragma: no cover + raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") +except ImportError: # pragma: no cover + raise ImportError("zarr 3.0.0 or later is required to use the numcodecs zarr integration.") + +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec +from zarr.core.array_spec import ArraySpec +from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer +from zarr.core.buffer.cpu import as_numpy_array_wrapper +from zarr.core.common import JSON, parse_named_configuration, product + +CODEC_PREFIX = "numcodecs." + + +def _expect_name_prefix(codec_name: str) -> str: + if not codec_name.startswith(CODEC_PREFIX): + raise ValueError( + f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead." + ) # pragma: no cover + return codec_name.removeprefix(CODEC_PREFIX) + + +def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: + parsed_name, parsed_configuration = parse_named_configuration(data) + if not parsed_name.startswith(CODEC_PREFIX): + raise ValueError( + f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead." + ) # pragma: no cover + id = _expect_name_prefix(parsed_name) + return {"id": id, **parsed_configuration} + + +@dataclass(frozen=True) +class _NumcodecsCodec: + codec_name: str + codec_config: dict[str, JSON] + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + if not self.codec_name: + raise ValueError( + "The codec name needs to be supplied through the `codec_name` attribute." + ) # pragma: no cover + unprefixed_codec_name = _expect_name_prefix(self.codec_name) + + if "id" not in codec_config: + codec_config = {"id": unprefixed_codec_name, **codec_config} + elif codec_config["id"] != unprefixed_codec_name: + raise ValueError( + f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}." + ) # pragma: no cover + + object.__setattr__(self, "codec_config", codec_config) + warn( + "Numcodecs codecs are not in the Zarr version 3 specification and " + "may not be supported by other zarr implementations.", + category=UserWarning, + ) + + @cached_property + def _codec(self) -> numcodecs.abc.Codec: + return numcodecs.get_codec(self.codec_config) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + codec_config = _parse_codec_configuration(data) + return cls(**codec_config) + + def to_dict(self) -> JSON: + codec_config = self.codec_config.copy() + return { + "name": self.codec_name, + "configuration": codec_config, + } + + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + raise NotImplementedError # pragma: no cover + + +class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec): + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + async def _decode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: + return await asyncio.to_thread( + as_numpy_array_wrapper, + self._codec.decode, + chunk_bytes, + chunk_spec.prototype, + ) + + def _encode(self, chunk_bytes: Buffer, prototype: BufferPrototype) -> Buffer: + encoded = self._codec.encode(chunk_bytes.as_array_like()) + if isinstance(encoded, np.ndarray): # Required for checksum codecs + return prototype.buffer.from_bytes(encoded.tobytes()) + return prototype.buffer.from_bytes(encoded) + + async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer: + return await asyncio.to_thread(self._encode, chunk_bytes, chunk_spec.prototype) + + +class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec): + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + async def _decode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: + chunk_ndarray = chunk_array.as_ndarray_like() + out = await asyncio.to_thread(self._codec.decode, chunk_ndarray) + return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) + + async def _encode_single(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: + chunk_ndarray = chunk_array.as_ndarray_like() + out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) + return chunk_spec.prototype.nd_buffer.from_ndarray_like(out) + + +class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec): + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + async def _decode_single(self, chunk_buffer: Buffer, chunk_spec: ArraySpec) -> NDBuffer: + chunk_bytes = chunk_buffer.to_bytes() + out = await asyncio.to_thread(self._codec.decode, chunk_bytes) + return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) + + async def _encode_single(self, chunk_ndbuffer: NDBuffer, chunk_spec: ArraySpec) -> Buffer: + chunk_ndarray = chunk_ndbuffer.as_ndarray_like() + out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) + return chunk_spec.prototype.buffer.from_bytes(out) + + +T = TypeVar("T", bound=_NumcodecsCodec) + + +def _add_docstring(cls: type[T], ref_class_name: str) -> type[T]: + cls.__doc__ = f""" + See :class:`{ref_class_name}` for more details and parameters. + """ + return cls + + +def _add_docstring_wrapper(ref_class_name: str) -> Callable[[type[T]], type[T]]: + return partial(_add_docstring, ref_class_name=ref_class_name) + + +def _make_bytes_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: + # rename for class scope + _codec_name = CODEC_PREFIX + codec_name + + class _Codec(_NumcodecsBytesBytesCodec): + codec_name = _codec_name + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + _Codec.__name__ = cls_name + return _Codec + + +def _make_array_array_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayArrayCodec]: + # rename for class scope + _codec_name = CODEC_PREFIX + codec_name + + class _Codec(_NumcodecsArrayArrayCodec): + codec_name = _codec_name + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + _Codec.__name__ = cls_name + return _Codec + + +def _make_array_bytes_codec(codec_name: str, cls_name: str) -> type[_NumcodecsArrayBytesCodec]: + # rename for class scope + _codec_name = CODEC_PREFIX + codec_name + + class _Codec(_NumcodecsArrayBytesCodec): + codec_name = _codec_name + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + _Codec.__name__ = cls_name + return _Codec + + +def _make_checksum_codec(codec_name: str, cls_name: str) -> type[_NumcodecsBytesBytesCodec]: + # rename for class scope + _codec_name = CODEC_PREFIX + codec_name + + class _ChecksumCodec(_NumcodecsBytesBytesCodec): + codec_name = _codec_name + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: + return input_byte_length + 4 # pragma: no cover + + _ChecksumCodec.__name__ = cls_name + return _ChecksumCodec + + +# bytes-to-bytes codecs +Blosc = _add_docstring(_make_bytes_bytes_codec("blosc", "Blosc"), "numcodecs.blosc.Blosc") +LZ4 = _add_docstring(_make_bytes_bytes_codec("lz4", "LZ4"), "numcodecs.lz4.LZ4") +Zstd = _add_docstring(_make_bytes_bytes_codec("zstd", "Zstd"), "numcodecs.zstd.Zstd") +Zlib = _add_docstring(_make_bytes_bytes_codec("zlib", "Zlib"), "numcodecs.zlib.Zlib") +GZip = _add_docstring(_make_bytes_bytes_codec("gzip", "GZip"), "numcodecs.gzip.GZip") +BZ2 = _add_docstring(_make_bytes_bytes_codec("bz2", "BZ2"), "numcodecs.bz2.BZ2") +LZMA = _add_docstring(_make_bytes_bytes_codec("lzma", "LZMA"), "numcodecs.lzma.LZMA") + + +@_add_docstring_wrapper("numcodecs.shuffle.Shuffle") +class Shuffle(_NumcodecsBytesBytesCodec): + codec_name = f"{CODEC_PREFIX}shuffle" + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + if array_spec.dtype.itemsize != self.codec_config.get("elementsize"): + return Shuffle(**{**self.codec_config, "elementsize": array_spec.dtype.itemsize}) + return self # pragma: no cover + + +# array-to-array codecs ("filters") +Delta = _add_docstring(_make_array_array_codec("delta", "Delta"), "numcodecs.delta.Delta") +BitRound = _add_docstring( + _make_array_array_codec("bitround", "BitRound"), "numcodecs.bitround.BitRound" +) + + +@_add_docstring_wrapper("numcodecs.fixedscaleoffset.FixedScaleOffset") +class FixedScaleOffset(_NumcodecsArrayArrayCodec): + codec_name = f"{CODEC_PREFIX}fixedscaleoffset" + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + if astype := self.codec_config.get("astype"): + return replace(chunk_spec, dtype=np.dtype(astype)) + return chunk_spec + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + if str(array_spec.dtype) != self.codec_config.get("dtype"): + return FixedScaleOffset(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + return self + + +@_add_docstring_wrapper("numcodecs.quantize.Quantize") +class Quantize(_NumcodecsArrayArrayCodec): + codec_name = f"{CODEC_PREFIX}quantize" + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + if str(array_spec.dtype) != self.codec_config.get("dtype"): + return Quantize(**{**self.codec_config, "dtype": str(array_spec.dtype)}) + return self + + +@_add_docstring_wrapper("numcodecs.packbits.PackBits") +class PackBits(_NumcodecsArrayArrayCodec): + codec_name = f"{CODEC_PREFIX}packbits" + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + return replace( + chunk_spec, + shape=(1 + math.ceil(product(chunk_spec.shape) / 8),), + dtype=np.dtype("uint8"), + ) + + def validate(self, *, dtype: np.dtype[Any], **_kwargs) -> None: + if dtype != np.dtype("bool"): + raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") + + +@_add_docstring_wrapper("numcodecs.astype.AsType") +class AsType(_NumcodecsArrayArrayCodec): + codec_name = f"{CODEC_PREFIX}astype" + + def __init__(self, **codec_config: dict[str, JSON]) -> None: + super().__init__(**codec_config) + + def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: + return replace(chunk_spec, dtype=np.dtype(self.codec_config["encode_dtype"])) + + def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: + decode_dtype = self.codec_config.get("decode_dtype") + if str(array_spec.dtype) != decode_dtype: + return AsType(**{**self.codec_config, "decode_dtype": str(array_spec.dtype)}) + return self + + +# bytes-to-bytes checksum codecs +CRC32 = _add_docstring(_make_checksum_codec("crc32", "CRC32"), "numcodecs.checksum32.CRC32") +CRC32C = _add_docstring(_make_checksum_codec("crc32c", "CRC32C"), "numcodecs.checksum32.CRC32C") +Adler32 = _add_docstring(_make_checksum_codec("adler32", "Adler32"), "numcodecs.checksum32.Adler32") +Fletcher32 = _add_docstring( + _make_checksum_codec("fletcher32", "Fletcher32"), "numcodecs.fletcher32.Fletcher32" +) +JenkinsLookup3 = _add_docstring( + _make_checksum_codec("jenkins_lookup3", "JenkinsLookup3"), "numcodecs.checksum32.JenkinsLookup3" +) + +# array-to-bytes codecs +PCodec = _add_docstring(_make_array_bytes_codec("pcodec", "PCodec"), "numcodecs.pcodec.PCodec") +ZFPY = _add_docstring(_make_array_bytes_codec("zfpy", "ZFPY"), "numcodecs.zfpy.ZFPY") + +__all__ = [ + "Blosc", + "LZ4", + "Zstd", + "Zlib", + "GZip", + "BZ2", + "LZMA", + "Shuffle", + "Delta", + "BitRound", + "FixedScaleOffset", + "Quantize", + "PackBits", + "AsType", + "CRC32", + "CRC32C", + "Adler32", + "Fletcher32", + "JenkinsLookup3", + "PCodec", + "ZFPY", +] diff --git a/pyproject.toml b/pyproject.toml index d1026104..8e1a03f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,29 @@ crc32c = [ "crc32c>=2.7", ] +[project.entry-points."zarr.codecs"] +"numcodecs.blosc" = "numcodecs.zarr3:Blosc" +"numcodecs.lz4" = "numcodecs.zarr3:LZ4" +"numcodecs.zstd" = "numcodecs.zarr3:Zstd" +"numcodecs.zlib" = "numcodecs.zarr3:Zlib" +"numcodecs.gzip" = "numcodecs.zarr3:GZip" +"numcodecs.bz2" = "numcodecs.zarr3:BZ2" +"numcodecs.lzma" = "numcodecs.zarr3:LZMA" +"numcodecs.shuffle" = "numcodecs.zarr3:Shuffle" +"numcodecs.delta" = "numcodecs.zarr3:Delta" +"numcodecs.bitround" = "numcodecs.zarr3:BitRound" +"numcodecs.fixedscaleoffset" = "numcodecs.zarr3:FixedScaleOffset" +"numcodecs.quantize" = "numcodecs.zarr3:Quantize" +"numcodecs.packbits" = "numcodecs.zarr3:PackBits" +"numcodecs.astype" = "numcodecs.zarr3:AsType" +"numcodecs.crc32" = "numcodecs.zarr3:CRC32" +"numcodecs.crc32c" = "numcodecs.zarr3:CRC32C" +"numcodecs.adler32" = "numcodecs.zarr3:Adler32" +"numcodecs.fletcher32" = "numcodecs.zarr3:Fletcher32" +"numcodecs.jenkins_lookup3" = "numcodecs.zarr3:JenkinsLookup3" +"numcodecs.pcodec" = "numcodecs.zarr3:PCodec" +"numcodecs.zfpy" = "numcodecs.zarr3:ZFPY" + [tool.setuptools] license-files = ["LICENSE.txt"] package-dir = {"" = "."} @@ -112,7 +135,7 @@ doctest_optionflags = [ "IGNORE_EXCEPTION_DETAIL", ] testpaths = [ - "numcodecs", + "numcodecs/tests", ] norecursedirs = [ ".git",