Skip to content

Commit

Permalink
Update zarr and icechunk tests, fix zarr v3 metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
mpiannucci committed Oct 21, 2024
1 parent d10de6b commit d0b6bfb
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 3 deletions.
2 changes: 1 addition & 1 deletion ci/upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies:
- fsspec
- pip
- pip:
- zarr==3.0.0b1 # beta release of zarr-python v3
- icechunk # Installs zarr v3 as dependency
- git+https://github.com/pydata/xarray@zarr-v3 # zarr-v3 compatibility branch
- git+https://github.com/zarr-developers/numcodecs@zarr3-codecs # zarr-v3 compatibility branch
# - git+https://github.com/fsspec/kerchunk@main # kerchunk is currently incompatible with zarr-python v3 (https://github.com/fsspec/kerchunk/pull/516)
2 changes: 2 additions & 0 deletions virtualizarr/readers/zarr_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,7 @@ def _configurable_to_num_codec_config(configurable: dict) -> dict:
"""
configurable_copy = configurable.copy()
codec_id = configurable_copy.pop("name")
if codec_id.startswith("numcodecs."):
codec_id = codec_id[len("numcodecs.") :]
configuration = configurable_copy.pop("configuration")
return numcodecs.get_codec({"id": codec_id, **configuration}).get_config()
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def icechunk_filestore(tmpdir) -> "IcechunkStore":

# TODO if icechunk exposed a synchronous version of .open then we wouldn't need to use asyncio.run here
# TODO is this the correct mode to use?
store = asyncio.run(IcechunkStore.open(storage=storage, mode="r+"))
store = asyncio.run(IcechunkStore.open(storage=storage, mode="a"))

# TODO instead yield store then store.close() ??
return store
Expand Down
6 changes: 5 additions & 1 deletion virtualizarr/writers/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def to_zarr_json(var: Variable, array_dir: Path) -> None:
def zarr_v3_array_metadata(zarray: ZArray, dim_names: list[str], attrs: dict) -> dict:
"""Construct a v3-compliant metadata dict from v2 zarray + information stored on the xarray variable."""
# TODO it would be nice if we could use the zarr-python metadata.ArrayMetadata classes to do this conversion for us
try:
from zarr.core.metadata.v3 import ArrayV3Metadata
except ImportError:
raise ImportError("zarr-python v3+ must be installed to use this function")

metadata = zarray.dict()

Expand All @@ -95,7 +99,7 @@ def zarr_v3_array_metadata(zarray: ZArray, dim_names: list[str], attrs: dict) ->
"name": "default",
"configuration": {"separator": "/"},
}
metadata["codecs"] = zarray._v3_codec_pipeline()
metadata["codecs"] = tuple(c.to_dict() for c in zarray._v3_codec_pipeline())
metadata.pop("filters")
metadata.pop("compressor")
metadata.pop("order")
Expand Down
3 changes: 3 additions & 0 deletions virtualizarr/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ def _num_codec_config_to_configurable(num_codec: dict) -> dict:
"""
Convert a numcodecs codec into a zarr v3 configurable.
"""
if num_codec["id"].startswith("numcodecs."):
return num_codec

num_codec_copy = num_codec.copy()
name = "numcodecs." + num_codec_copy.pop("id")
return {"name": name, "configuration": num_codec_copy}

0 comments on commit d0b6bfb

Please sign in to comment.