Skip to content

Commit

Permalink
Add loadable varaible test
Browse files Browse the repository at this point in the history
  • Loading branch information
mpiannucci committed Oct 22, 2024
1 parent e105b78 commit ea52003
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 12 deletions.
51 changes: 48 additions & 3 deletions virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def test_set_single_virtual_ref_with_encoding(
# the refs are in the store (even uncommitted) it's icechunk's problem to manage them now.


# TODO test writing grids of multiple chunks
def test_set_grid_virtual_refs(icechunk_filestore: "IcechunkStore", netcdf4_file: Path):
# TODO kerchunk doesn't work with zarr-python v3 yet so we can't use open_virtual_dataset and icechunk together!
# vds = open_virtual_dataset(netcdf4_file, indexes={})
Expand Down Expand Up @@ -233,9 +232,55 @@ def test_set_grid_virtual_refs(icechunk_filestore: "IcechunkStore", netcdf4_file
)


# TODO test writing to a group that isn't the root group
def test_write_loadable_variable(
icechunk_filestore: "IcechunkStore", simple_netcdf4: Path
):
# instead for now just write out byte ranges explicitly
manifest = ChunkManifest(
{"0.0": {"path": simple_netcdf4, "offset": 6144, "length": 48}}
)
zarray = ZArray(
shape=(3, 4),
chunks=(3, 4),
dtype=np.dtype("int32"),
compressor=None,
filters=None,
fill_value=None,
)
ma = ManifestArray(
chunkmanifest=manifest,
zarray=zarray,
)

ma_v = Variable(data=ma, dims=["x", "y"])

la_v = Variable(
dims=["x", "y"],
data=np.random.rand(3, 4),
attrs={"units": "km"},
)
vds = Dataset({"air": la_v}, {"pres": ma_v})

dataset_to_icechunk(vds, icechunk_filestore)

# TODO test writing loadable variables
root_group = group(store=icechunk_filestore)
air_array = root_group["air"]
assert isinstance(air_array, Array)
assert air_array.shape == (3, 4)
assert air_array.dtype == np.dtype("float64")
assert air_array.attrs["units"] == "km"
assert np.allclose(air_array[:], la_v[:])

pres_array = root_group["pres"]
assert isinstance(pres_array, Array)
assert pres_array.shape == (3, 4)
assert pres_array.dtype == np.dtype("int32")
expected_ds = open_dataset(simple_netcdf4)
expected_array = expected_ds["foo"].to_numpy()
npt.assert_equal(pres_array, expected_array)


# TODO test writing to a group that isn't the root group

# TODO roundtripping tests - requires icechunk compatibility with xarray

Expand Down
20 changes: 11 additions & 9 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,22 @@ def write_variables_to_icechunk_group(
store,
group,
):
print(variables)
virtual_variables = {
name: var
for name, var in variables.items()
if isinstance(var.data, ManifestArray)
}

loadable_variables = {
name: var for name, var in variables.items() if name not in virtual_variables
}

# First write all the non-virtual variables, because xarray has issues with overwriting the root
# group's attributes after the first variable is written
ds = Dataset(loadable_variables)
ds.to_zarr(store, zarr_format=3, consolidated=False, mode='a')

# Then finish by writing the virtual variables to the same group
for name, var in virtual_variables.items():
write_virtual_variable_to_icechunk(
store=store,
Expand All @@ -79,13 +88,6 @@ def write_variables_to_icechunk_group(
var=var,
)

loadable_variables = {
name: var for name, var in variables.items() if name not in virtual_variables
}

ds = Dataset(loadable_variables)
ds.to_zarr(store, zarr_format=3, consolidated=False, mode="r+")


def write_variable_to_icechunk(
store: "IcechunkStore",
Expand Down Expand Up @@ -165,7 +167,7 @@ def write_manifest_virtual_refs(
flags=[
"refs_ok",
"multi_index",
"c_index", # TODO is "c_index" correct? what's the convention for zarr chunk keys?
"c_index",
],
op_flags=[["readonly"]] * 3, # type: ignore
)
Expand Down

0 comments on commit ea52003

Please sign in to comment.