diff --git a/README.md b/README.md index 0f91224b..3e0bc4b6 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Currently you need to clone VirtualiZarr and install it locally: git clone virtualizarr pip install -e . ``` +You will also need a specific branch of xarray in order for concatenation without indexes to work. (See [this comment](https://github.com/TomNicholas/VirtualiZarr/issues/14#issuecomment-2018369470).) You may want to install the dependencies using the `virtualizarr/ci/environment.yml` conda file, which includes the specific branch of xarray required. ### Usage diff --git a/ci/environment.yml b/ci/environment.yml index 82b00449..876c9a5c 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -15,4 +15,4 @@ dependencies: - ujson - pydantic - pip: - - xarray>=2024.02.0.dev0 + - git+https://github.com/TomNicholas/xarray.git@concat-no-indexes#egg=xarray diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 2bc00ca2..4355d033 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -72,6 +72,7 @@ def test_equals(self): assert not ds1.equals(ds3) +# TODO refactor these tests by making some fixtures class TestConcat: def test_concat_along_existing_dim(self): # both manifest arrays in this example have the same zarray properties @@ -164,3 +165,53 @@ def test_concat_along_new_dim(self): assert result.data.zarray.fill_value == zarray.fill_value assert result.data.zarray.order == zarray.order assert result.data.zarray.zarr_format == zarray.zarr_format + + def test_concat_dim_coords_along_existing_dim(self): + # Tests that dimension coordinates don't automatically get new indexes on concat + # See https://github.com/pydata/xarray/issues/8871 + + # both manifest arrays in this example have the same zarray properties + zarray = ZArray( + chunks=(10,), + compressor="zlib", + dtype=np.dtype("int32"), + fill_value=0.0, + filters=None, + order="C", + shape=(20,), + zarr_format=2, + ) + + chunks_dict1 = { + "0": {"path": "foo.nc", "offset": 100, "length": 100}, + "1": {"path": "foo.nc", "offset": 200, "length": 100}, + } + manifest1 = ChunkManifest(entries=chunks_dict1) + marr1 = ManifestArray(zarray=zarray, chunkmanifest=manifest1) + coords = xr.Coordinates({"t": (["t"], marr1)}, indexes={}) + ds1 = xr.Dataset(coords=coords) + + chunks_dict2 = { + "0": {"path": "foo.nc", "offset": 300, "length": 100}, + "1": {"path": "foo.nc", "offset": 400, "length": 100}, + } + manifest2 = ChunkManifest(entries=chunks_dict2) + marr2 = ManifestArray(zarray=zarray, chunkmanifest=manifest2) + coords = xr.Coordinates({"t": (["t"], marr2)}, indexes={}) + ds2 = xr.Dataset(coords=coords) + + result = xr.concat([ds1, ds2], dim="t")["t"] + + assert result.shape == (40,) + assert result.chunks == (10,) + assert result.data.manifest.dict() == { + "0": {"path": "foo.nc", "offset": 100, "length": 100}, + "1": {"path": "foo.nc", "offset": 200, "length": 100}, + "2": {"path": "foo.nc", "offset": 300, "length": 100}, + "3": {"path": "foo.nc", "offset": 400, "length": 100}, + } + assert result.data.zarray.compressor == zarray.compressor + assert result.data.zarray.filters == zarray.filters + assert result.data.zarray.fill_value == zarray.fill_value + assert result.data.zarray.order == zarray.order + assert result.data.zarray.zarr_format == zarray.zarr_format