From 5d5f9e2e1e0d9df297b8a8274075301282edb158 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Fri, 25 Oct 2024 16:10:11 -0700 Subject: [PATCH] Working on tests for generate chunk key function --- .../tests/test_writers/test_icechunk.py | 44 ++++++++++++++++++- virtualizarr/writers/icechunk.py | 23 +++++++--- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/virtualizarr/tests/test_writers/test_icechunk.py b/virtualizarr/tests/test_writers/test_icechunk.py index f99b2718..44b1fcf6 100644 --- a/virtualizarr/tests/test_writers/test_icechunk.py +++ b/virtualizarr/tests/test_writers/test_icechunk.py @@ -12,7 +12,7 @@ from zarr import Array, Group, group # type: ignore[import-untyped] from virtualizarr.manifests import ChunkManifest, ManifestArray -from virtualizarr.writers.icechunk import dataset_to_icechunk +from virtualizarr.writers.icechunk import dataset_to_icechunk, generate_chunk_key from virtualizarr.zarr import ZArray if TYPE_CHECKING: @@ -288,3 +288,45 @@ def test_write_loadable_variable( # TODO test writing to a group that isn't the root group # TODO test with S3 / minio + + +def test_generate_chunk_key_no_offset(): + # Test case without any offset (append_axis and existing_num_chunks are None) + index = (1, 2, 3) + result = generate_chunk_key(index) + assert result == "1/2/3", "The chunk key should match the index without any offset." + + +def test_generate_chunk_key_with_offset(): + # Test case with offset on append_axis 1 + index = (1, 2, 3) + append_axis = 1 + existing_num_chunks = 5 + result = generate_chunk_key( + index, append_axis=append_axis, existing_num_chunks=existing_num_chunks + ) + assert result == "1/7/3", "The chunk key should offset the second index by 5." + + +def test_generate_chunk_key_zero_offset(): + # Test case where existing_num_chunks is 0 (no offset should be applied) + index = (4, 5, 6) + append_axis = 1 + existing_num_chunks = 0 + result = generate_chunk_key( + index, append_axis=append_axis, existing_num_chunks=existing_num_chunks + ) + assert ( + result == "4/5/6" + ), "No offset should be applied when existing_num_chunks is 0." + + +def test_generate_chunk_key_append_axis_out_of_bounds(): + # Edge case where append_axis is out of bounds + index = (3, 4) + append_axis = 2 # This is out of bounds for a 2D index + with pytest.raises(IndexError): + generate_chunk_key(index, append_axis=append_axis, existing_num_chunks=1) + + +# Run tests using pytest diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py index 6ca97da1..5bd735c4 100644 --- a/virtualizarr/writers/icechunk.py +++ b/virtualizarr/writers/icechunk.py @@ -194,6 +194,19 @@ def write_virtual_variable_to_icechunk( ) +def generate_chunk_key( + index: np.nditer.multi_index, + append_axis: Optional[int] = None, + existing_num_chunks: Optional[int] = None, +) -> str: + if append_axis is not None: + list_index = list(index) + # Offset by the number of existing chunks on the append axis + list_index[append_axis] += existing_num_chunks + index = tuple(list_index) + return "/".join(str(i) for i in index) + + def write_manifest_virtual_refs( store: "IcechunkStore", group: "Group", @@ -209,6 +222,8 @@ def write_manifest_virtual_refs( # loop over every reference in the ChunkManifest for that array # TODO inefficient: this should be replaced with something that sets all (new) references for the array at once # but Icechunk need to expose a suitable API first + # Aimee: the manifest (and it's corresponding paths, offsets and lengths, already has the shape of the datacube's chunks + # so we want to increment the resulting multi index it = np.nditer( [manifest._paths, manifest._offsets, manifest._lengths], # type: ignore[arg-type] flags=[ @@ -220,13 +235,9 @@ def write_manifest_virtual_refs( ) for path, offset, length in it: + # it.multi_index will be an iterator of the chunk shape index = it.multi_index - if append_axis is not None: - list_index = list(index) - # Offset by the number of existing chunks on the append axis - list_index[append_axis] += existing_num_chunks - index = tuple(list_index) - chunk_key = "/".join(str(i) for i in index) + chunk_key = generate_chunk_key(index, append_axis, existing_num_chunks) # set each reference individually store.set_virtual_ref(