Skip to content

Commit

Permalink
Refactor gen virtual dataset method
Browse files Browse the repository at this point in the history
  • Loading branch information
abarciauskas-bgse committed Oct 26, 2024
1 parent 360ea14 commit d3c2851
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 25 deletions.
83 changes: 72 additions & 11 deletions virtualizarr/tests/test_writers/test_icechunk_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,56 @@ def icechunk_storage(tmpdir) -> "StorageConfig":
return storage

Check warning on line 28 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L28

Added line #L28 was not covered by tests


def gen_virtual_dataset(file_uri: str):
manifest = ChunkManifest({"0.0": {"path": file_uri, "offset": 6144, "length": 48}})
def generate_chunk_manifest(

Check warning on line 31 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L31

Added line #L31 was not covered by tests
netcdf4_file: str,
shape: tuple[int, int] = (3, 4),
chunks: tuple[int, int] = (3, 4),
base_offset=6144,
length=48,
) -> ChunkManifest:
chunk_dict = {}
num_chunks_x = shape[0] // chunks[0]
num_chunks_y = shape[1] // chunks[1]
offset = base_offset

Check warning on line 41 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L38-L41

Added lines #L38 - L41 were not covered by tests

for i in range(num_chunks_x):
for j in range(num_chunks_y):
chunk_index = f"{i}.{j}"
chunk_dict[chunk_index] = {

Check warning on line 46 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L43-L46

Added lines #L43 - L46 were not covered by tests
"path": netcdf4_file,
"offset": offset,
"length": length,
}
offset += length # Increase offset for each chunk
return ChunkManifest(chunk_dict)

Check warning on line 52 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L51-L52

Added lines #L51 - L52 were not covered by tests


def gen_virtual_dataset(

Check warning on line 55 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L55

Added line #L55 was not covered by tests
file_uri: str,
shape: tuple[int, int] = (3, 4),
chunks: tuple[int, int] = (3, 4),
dtype: np.dtype = np.dtype("int32"),
compressor: str = None,
filters: str = None,
fill_value: str = None,
variable_name: str = "foo",
):
manifest = generate_chunk_manifest(file_uri, shape, chunks)
zarray = ZArray(

Check warning on line 66 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L65-L66

Added lines #L65 - L66 were not covered by tests
shape=(3, 4),
chunks=(3, 4),
dtype=np.dtype("int32"),
compressor=None,
filters=None,
fill_value=None,
shape=shape,
chunks=chunks,
dtype=dtype,
compressor=compressor,
filters=filters,
fill_value=fill_value,
)
ma = ManifestArray(

Check warning on line 74 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L74

Added line #L74 was not covered by tests
chunkmanifest=manifest,
zarray=zarray,
)
foo = Variable(data=ma, dims=["x", "y"])
var = Variable(data=ma, dims=["x", "y"])
return Dataset(

Check warning on line 79 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L78-L79

Added lines #L78 - L79 were not covered by tests
{"foo": foo},
{variable_name: var},
)


Expand All @@ -55,7 +88,7 @@ def test_set_single_virtual_ref_without_encoding(
from icechunk import IcechunkStore

Check warning on line 88 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L87-L88

Added lines #L87 - L88 were not covered by tests

# generate virtual dataset
vds = gen_virtual_dataset(simple_netcdf4)
vds = gen_virtual_dataset(file_uri=simple_netcdf4)

Check warning on line 91 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L91

Added line #L91 was not covered by tests

# create the icechunk store and commit the first virtual dataset
icechunk_filestore = IcechunkStore.create(storage=icechunk_storage)
Expand All @@ -78,3 +111,31 @@ def test_set_single_virtual_ref_without_encoding(
[expected_ds["foo"], expected_ds["foo"]], dim="x"
).to_numpy()
npt.assert_equal(array, expected_array)

Check warning on line 113 in virtualizarr/tests/test_writers/test_icechunk_append.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/tests/test_writers/test_icechunk_append.py#L113

Added line #L113 was not covered by tests


# def test_append_virtual_ref_with_encoding(
# icechunk_storage: "StorageConfig", netcdf4_files: tuple[str, str]
# ):
# import xarray as xr
# from icechunk import IcechunkStore

# # generate virtual dataset
# filepath1, filepath2 = netcdf4_files
# vds1, vds2 = open_virtual_dataset(filepath1), open_virtual_dataset(filepath2)

# # create the icechunk store and commit the first virtual dataset
# icechunk_filestore = IcechunkStore.create(storage=icechunk_storage)
# dataset_to_icechunk(vds1, icechunk_filestore)
# icechunk_filestore.commit(
# "test commit"
# ) # need to commit it in order to append to it in the next lines

# # Append the same dataset to the same store
# icechunk_filestore_append = IcechunkStore.open_existing(
# storage=icechunk_storage, mode="a"
# )
# dataset_to_icechunk(vds2, icechunk_filestore_append, append_dim="time")

# root_group = group(store=icechunk_filestore_append)
# array = root_group["foo"]
# import pdb; pdb.set_trace()
29 changes: 15 additions & 14 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def write_virtual_variable_to_icechunk(
# Aimee: resize the array if it already exists
# TODO: assert chunking and encoding is the same
existing_keys = tuple(group.array_keys())
append_axis, existing_num_chunks = None, None
append_axis, existing_num_chunks, arr = None, None, None
if name in existing_keys and mode == "a":

Check warning on line 149 in virtualizarr/writers/icechunk.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/writers/icechunk.py#L147-L149

Added lines #L147 - L149 were not covered by tests
# resize
dims = var.dims
Expand All @@ -159,19 +159,20 @@ def write_virtual_variable_to_icechunk(
new_shape = list(existing_array.shape)
new_shape[append_axis] += var.shape[append_axis]
shape = tuple(new_shape)

Check warning on line 161 in virtualizarr/writers/icechunk.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/writers/icechunk.py#L159-L161

Added lines #L159 - L161 were not covered by tests
existing_array.resize(new_shape)

# creates array if it doesn't already exist
arr = group.require_array(
name=name,
shape=shape,
chunk_shape=zarray.chunks,
dtype=encode_dtype(zarray.dtype),
codecs=zarray._v3_codec_pipeline(),
dimension_names=var.dims,
fill_value=zarray.fill_value,
# TODO fill_value?
)
# this doesn't seem to actually resize the array
arr = existing_array.resize(shape)

Check warning on line 163 in virtualizarr/writers/icechunk.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/writers/icechunk.py#L163

Added line #L163 was not covered by tests
else:
# create array if it doesn't already exist
arr = group.require_array(

Check warning on line 166 in virtualizarr/writers/icechunk.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/writers/icechunk.py#L166

Added line #L166 was not covered by tests
name=name,
shape=shape,
chunk_shape=zarray.chunks,
dtype=encode_dtype(zarray.dtype),
codecs=zarray._v3_codec_pipeline(),
dimension_names=var.dims,
fill_value=zarray.fill_value,
# TODO fill_value?
)

# TODO it would be nice if we could assign directly to the .attrs property
# Aimee: assert that new attributes are the same as existing attributes
Expand Down

0 comments on commit d3c2851

Please sign in to comment.