Skip to content

Commit

Permalink
Working on tests for generate chunk key function
Browse files Browse the repository at this point in the history
  • Loading branch information
abarciauskas-bgse committed Oct 25, 2024
1 parent d3a4048 commit 5d5f9e2
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 7 deletions.
44 changes: 43 additions & 1 deletion virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from zarr import Array, Group, group # type: ignore[import-untyped]

from virtualizarr.manifests import ChunkManifest, ManifestArray
from virtualizarr.writers.icechunk import dataset_to_icechunk
from virtualizarr.writers.icechunk import dataset_to_icechunk, generate_chunk_key
from virtualizarr.zarr import ZArray

if TYPE_CHECKING:
Expand Down Expand Up @@ -288,3 +288,45 @@ def test_write_loadable_variable(
# TODO test writing to a group that isn't the root group

# TODO test with S3 / minio


def test_generate_chunk_key_no_offset():
# Test case without any offset (append_axis and existing_num_chunks are None)
index = (1, 2, 3)
result = generate_chunk_key(index)
assert result == "1/2/3", "The chunk key should match the index without any offset."


def test_generate_chunk_key_with_offset():
# Test case with offset on append_axis 1
index = (1, 2, 3)
append_axis = 1
existing_num_chunks = 5
result = generate_chunk_key(
index, append_axis=append_axis, existing_num_chunks=existing_num_chunks
)
assert result == "1/7/3", "The chunk key should offset the second index by 5."


def test_generate_chunk_key_zero_offset():
# Test case where existing_num_chunks is 0 (no offset should be applied)
index = (4, 5, 6)
append_axis = 1
existing_num_chunks = 0
result = generate_chunk_key(
index, append_axis=append_axis, existing_num_chunks=existing_num_chunks
)
assert (
result == "4/5/6"
), "No offset should be applied when existing_num_chunks is 0."


def test_generate_chunk_key_append_axis_out_of_bounds():
# Edge case where append_axis is out of bounds
index = (3, 4)
append_axis = 2 # This is out of bounds for a 2D index
with pytest.raises(IndexError):
generate_chunk_key(index, append_axis=append_axis, existing_num_chunks=1)


# Run tests using pytest
23 changes: 17 additions & 6 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,19 @@ def write_virtual_variable_to_icechunk(
)


def generate_chunk_key(
index: np.nditer.multi_index,
append_axis: Optional[int] = None,
existing_num_chunks: Optional[int] = None,
) -> str:
if append_axis is not None:
list_index = list(index)
# Offset by the number of existing chunks on the append axis
list_index[append_axis] += existing_num_chunks
index = tuple(list_index)
return "/".join(str(i) for i in index)


def write_manifest_virtual_refs(
store: "IcechunkStore",
group: "Group",
Expand All @@ -209,6 +222,8 @@ def write_manifest_virtual_refs(
# loop over every reference in the ChunkManifest for that array
# TODO inefficient: this should be replaced with something that sets all (new) references for the array at once
# but Icechunk need to expose a suitable API first
# Aimee: the manifest (and it's corresponding paths, offsets and lengths, already has the shape of the datacube's chunks
# so we want to increment the resulting multi index
it = np.nditer(
[manifest._paths, manifest._offsets, manifest._lengths], # type: ignore[arg-type]
flags=[
Expand All @@ -220,13 +235,9 @@ def write_manifest_virtual_refs(
)

for path, offset, length in it:
# it.multi_index will be an iterator of the chunk shape
index = it.multi_index
if append_axis is not None:
list_index = list(index)
# Offset by the number of existing chunks on the append axis
list_index[append_axis] += existing_num_chunks
index = tuple(list_index)
chunk_key = "/".join(str(i) for i in index)
chunk_key = generate_chunk_key(index, append_axis, existing_num_chunks)

# set each reference individually
store.set_virtual_ref(
Expand Down

0 comments on commit 5d5f9e2

Please sign in to comment.