Skip to content

Commit

Permalink
Merge pull request #44 from TomNicholas/concat_dim_coord_no_index
Browse files Browse the repository at this point in the history
Test concat of dimension coordinate not backed by an index
  • Loading branch information
TomNicholas authored Mar 25, 2024
2 parents b80059b + 85472bc commit 4375ccb
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Currently you need to clone VirtualiZarr and install it locally:
git clone virtualizarr
pip install -e .
```
You will also need a specific branch of xarray in order for concatenation without indexes to work. (See [this comment](https://github.com/TomNicholas/VirtualiZarr/issues/14#issuecomment-2018369470).) You may want to install the dependencies using the `virtualizarr/ci/environment.yml` conda file, which includes the specific branch of xarray required.

### Usage

Expand Down
2 changes: 1 addition & 1 deletion ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ dependencies:
- ujson
- pydantic
- pip:
- xarray>=2024.02.0.dev0
- git+https://github.com/TomNicholas/xarray.git@concat-no-indexes#egg=xarray
51 changes: 51 additions & 0 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_equals(self):
assert not ds1.equals(ds3)


# TODO refactor these tests by making some fixtures
class TestConcat:
def test_concat_along_existing_dim(self):
# both manifest arrays in this example have the same zarray properties
Expand Down Expand Up @@ -164,3 +165,53 @@ def test_concat_along_new_dim(self):
assert result.data.zarray.fill_value == zarray.fill_value
assert result.data.zarray.order == zarray.order
assert result.data.zarray.zarr_format == zarray.zarr_format

def test_concat_dim_coords_along_existing_dim(self):
# Tests that dimension coordinates don't automatically get new indexes on concat
# See https://github.com/pydata/xarray/issues/8871

# both manifest arrays in this example have the same zarray properties
zarray = ZArray(
chunks=(10,),
compressor="zlib",
dtype=np.dtype("int32"),
fill_value=0.0,
filters=None,
order="C",
shape=(20,),
zarr_format=2,
)

chunks_dict1 = {
"0": {"path": "foo.nc", "offset": 100, "length": 100},
"1": {"path": "foo.nc", "offset": 200, "length": 100},
}
manifest1 = ChunkManifest(entries=chunks_dict1)
marr1 = ManifestArray(zarray=zarray, chunkmanifest=manifest1)
coords = xr.Coordinates({"t": (["t"], marr1)}, indexes={})
ds1 = xr.Dataset(coords=coords)

chunks_dict2 = {
"0": {"path": "foo.nc", "offset": 300, "length": 100},
"1": {"path": "foo.nc", "offset": 400, "length": 100},
}
manifest2 = ChunkManifest(entries=chunks_dict2)
marr2 = ManifestArray(zarray=zarray, chunkmanifest=manifest2)
coords = xr.Coordinates({"t": (["t"], marr2)}, indexes={})
ds2 = xr.Dataset(coords=coords)

result = xr.concat([ds1, ds2], dim="t")["t"]

assert result.shape == (40,)
assert result.chunks == (10,)
assert result.data.manifest.dict() == {
"0": {"path": "foo.nc", "offset": 100, "length": 100},
"1": {"path": "foo.nc", "offset": 200, "length": 100},
"2": {"path": "foo.nc", "offset": 300, "length": 100},
"3": {"path": "foo.nc", "offset": 400, "length": 100},
}
assert result.data.zarray.compressor == zarray.compressor
assert result.data.zarray.filters == zarray.filters
assert result.data.zarray.fill_value == zarray.fill_value
assert result.data.zarray.order == zarray.order
assert result.data.zarray.zarr_format == zarray.zarr_format

0 comments on commit 4375ccb

Please sign in to comment.