From d0b6bfb1cb0eace12219cbf1293afb2f13652e01 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Mon, 21 Oct 2024 11:17:44 -0400 Subject: [PATCH] Update zarr and icechunk tests, fix zarr v3 metadata --- ci/upstream.yml | 2 +- virtualizarr/readers/zarr_v3.py | 2 ++ virtualizarr/tests/test_writers/test_icechunk.py | 2 +- virtualizarr/writers/zarr.py | 6 +++++- virtualizarr/zarr.py | 3 +++ 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ci/upstream.yml b/ci/upstream.yml index 184c6710..2c2680bc 100644 --- a/ci/upstream.yml +++ b/ci/upstream.yml @@ -24,7 +24,7 @@ dependencies: - fsspec - pip - pip: - - zarr==3.0.0b1 # beta release of zarr-python v3 + - icechunk # Installs zarr v3 as dependency - git+https://github.com/pydata/xarray@zarr-v3 # zarr-v3 compatibility branch - git+https://github.com/zarr-developers/numcodecs@zarr3-codecs # zarr-v3 compatibility branch # - git+https://github.com/fsspec/kerchunk@main # kerchunk is currently incompatible with zarr-python v3 (https://github.com/fsspec/kerchunk/pull/516) diff --git a/virtualizarr/readers/zarr_v3.py b/virtualizarr/readers/zarr_v3.py index 6da81581..a1f4ab7d 100644 --- a/virtualizarr/readers/zarr_v3.py +++ b/virtualizarr/readers/zarr_v3.py @@ -150,5 +150,7 @@ def _configurable_to_num_codec_config(configurable: dict) -> dict: """ configurable_copy = configurable.copy() codec_id = configurable_copy.pop("name") + if codec_id.startswith("numcodecs."): + codec_id = codec_id[len("numcodecs.") :] configuration = configurable_copy.pop("configuration") return numcodecs.get_codec({"id": codec_id, **configuration}).get_config() diff --git a/virtualizarr/tests/test_writers/test_icechunk.py b/virtualizarr/tests/test_writers/test_icechunk.py index 0c3491e6..b2de2c52 100644 --- a/virtualizarr/tests/test_writers/test_icechunk.py +++ b/virtualizarr/tests/test_writers/test_icechunk.py @@ -28,7 +28,7 @@ def icechunk_filestore(tmpdir) -> "IcechunkStore": # TODO if icechunk exposed a synchronous version of .open then we wouldn't need to use asyncio.run here # TODO is this the correct mode to use? - store = asyncio.run(IcechunkStore.open(storage=storage, mode="r+")) + store = asyncio.run(IcechunkStore.open(storage=storage, mode="a")) # TODO instead yield store then store.close() ?? return store diff --git a/virtualizarr/writers/zarr.py b/virtualizarr/writers/zarr.py index b3dc8f1a..e1cea132 100644 --- a/virtualizarr/writers/zarr.py +++ b/virtualizarr/writers/zarr.py @@ -80,6 +80,10 @@ def to_zarr_json(var: Variable, array_dir: Path) -> None: def zarr_v3_array_metadata(zarray: ZArray, dim_names: list[str], attrs: dict) -> dict: """Construct a v3-compliant metadata dict from v2 zarray + information stored on the xarray variable.""" # TODO it would be nice if we could use the zarr-python metadata.ArrayMetadata classes to do this conversion for us + try: + from zarr.core.metadata.v3 import ArrayV3Metadata + except ImportError: + raise ImportError("zarr-python v3+ must be installed to use this function") metadata = zarray.dict() @@ -95,7 +99,7 @@ def zarr_v3_array_metadata(zarray: ZArray, dim_names: list[str], attrs: dict) -> "name": "default", "configuration": {"separator": "/"}, } - metadata["codecs"] = zarray._v3_codec_pipeline() + metadata["codecs"] = tuple(c.to_dict() for c in zarray._v3_codec_pipeline()) metadata.pop("filters") metadata.pop("compressor") metadata.pop("order") diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py index 3f71bbcf..a222a788 100644 --- a/virtualizarr/zarr.py +++ b/virtualizarr/zarr.py @@ -221,6 +221,9 @@ def _num_codec_config_to_configurable(num_codec: dict) -> dict: """ Convert a numcodecs codec into a zarr v3 configurable. """ + if num_codec["id"].startswith("numcodecs."): + return num_codec + num_codec_copy = num_codec.copy() name = "numcodecs." + num_codec_copy.pop("id") return {"name": name, "configuration": num_codec_copy}