Skip to content

Commit

Permalink
Merge branch 'main' into dmrpp_root_group_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
TomNicholas authored Nov 8, 2024
2 parents 72cecc2 + 4ae7a19 commit 944779b
Show file tree
Hide file tree
Showing 11 changed files with 80 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.6.9"
rev: "v0.7.2"
hooks:
# Run the linter.
- id: ruff
Expand Down
2 changes: 1 addition & 1 deletion ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
- h5py
- hdf5
- netcdf4
- xarray>=2024.6.0
- xarray>=2024.10.0
- kerchunk>=0.2.5
- numpy>=2.0.0
- ujson
Expand Down
2 changes: 1 addition & 1 deletion ci/min-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
- h5py
- hdf5
- netcdf4
- xarray>=2024.6.0
- xarray>=2024.10.0
- numpy>=2.0.0
- numcodecs
- packaging
Expand Down
2 changes: 1 addition & 1 deletion ci/upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ channels:
- conda-forge
- nodefaults
dependencies:
- xarray>=2024.10.0
- h5netcdf
- h5py
- hdf5
Expand All @@ -25,6 +26,5 @@ dependencies:
- pip
- pip:
- icechunk # Installs zarr v3 as dependency
- git+https://github.com/pydata/xarray@zarr-v3 # zarr-v3 compatibility branch
- git+https://github.com/zarr-developers/numcodecs@zarr3-codecs # zarr-v3 compatibility branch
# - git+https://github.com/fsspec/kerchunk@main # kerchunk is currently incompatible with zarr-python v3 (https://github.com/fsspec/kerchunk/pull/516)
9 changes: 9 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ def netcdf4_file(tmpdir):
return filepath


@pytest.fixture
def netcdf4_file_with_2d_coords(tmpdir):
ds = xr.tutorial.open_dataset("ROMS_example")
filepath = f"{tmpdir}/ROMS_example.nc"
ds.to_netcdf(filepath, format="NETCDF4")
ds.close()
return filepath


@pytest.fixture
def netcdf4_virtual_dataset(netcdf4_file):
from virtualizarr import open_virtual_dataset
Expand Down
45 changes: 35 additions & 10 deletions docs/releases.rst
Original file line number Diff line number Diff line change
@@ -1,36 +1,62 @@
Release notes
=============

.. _v1.0.1:
.. _v1.1.1:

v1.0.1 (unreleased)
v1.1.1 (unreleased)
-------------------

New Features
~~~~~~~~~~~~

Breaking changes
~~~~~~~~~~~~~~~~

- Minimum required version of Xarray is now v2024.10.0.
(:pull:`284`) By `Tom Nicholas <https://github.com/TomNicholas>`_.

Deprecations
~~~~~~~~~~~~

Bug fixes
~~~~~~~~~

- Fixed bug with writing of `dimension_names` into zarr metadata.
(:pull:`286`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
- Fixed bug causing CF-compliant variables not to be identified as coordinates (:pull:`191`)
By `Ayush Nag <https://github.com/ayushnag>`_.

Documentation
~~~~~~~~~~~~~

- FAQ answers on Icechunk compatibility, converting from existing Kerchunk references to Icechunk, and how to add a new reader for a custom file format.
(:pull:`266`) By `Tom Nicholas <https://github.com/TomNicholas>`_.

Internal Changes
~~~~~~~~~~~~~~~~

.. _v1.1.0:

v1.1.0 (22nd Oct 2024)
----------------------

New Features
~~~~~~~~~~~~

- Can open `kerchunk` reference files with ``open_virtual_dataset``.
(:pull:`251`, :pull:`186`) By `Raphael Hagen <https://github.com/norlandrhagen>`_ & `Kristen Thyng <https://github.com/kthyng>`_.

- Adds defaults for `open_virtual_dataset_from_v3_store` in (:pull:`234`)
By `Raphael Hagen <https://github.com/norlandrhagen>`_.

- New ``group`` option on ``open_virtual_dataset`` enables extracting specific HDF Groups.
(:pull:`165`) By `Scott Henderson <https://github.com/scottyhq>`_.

- Adds `decode_times` to open_virtual_dataset (:pull:`232`)
By `Raphael Hagen <https://github.com/norlandrhagen>`_.

- Add parser for the OPeNDAP DMR++ XML format and integration with open_virtual_dataset (:pull:`113`)
By `Ayush Nag <https://github.com/ayushnag>`_.

- Load scalar variables by default. (:pull:`205`)
By `Gustavo Hidalgo <https://github.com/ghidalgo3>`_.

- Support empty files (:pull:`260`)
By `Justus Magin <https://github.com/keewis>`_.

- Can write virtual datasets to Icechunk stores using `vitualize.to_icechunk` (:pull:`256`)
By `Matt Iannucci <https://github.com/mpiannucci>`_.

Expand Down Expand Up @@ -64,7 +90,6 @@ Documentation
- Adds virtualizarr + coiled serverless example notebook (:pull:`223`)
By `Raphael Hagen <https://github.com/norlandrhagen>`_.


Internal Changes
~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
requires-python = ">=3.10"
dynamic = ["version"]
dependencies = [
"xarray>=2024.06.0",
"xarray>=2024.10.0",
"numpy>=2.0.0",
"packaging",
"universal-pathlib",
Expand Down
17 changes: 8 additions & 9 deletions virtualizarr/readers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from collections.abc import Iterable, Mapping, MutableMapping
from io import BufferedIOBase
from typing import (
TYPE_CHECKING,
Any,
Hashable,
Optional,
Expand All @@ -14,6 +13,7 @@
from xarray import (
Coordinates,
Dataset,
DataTree,
Index,
IndexVariable,
Variable,
Expand All @@ -26,12 +26,6 @@

XArrayOpenT = str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore

if TYPE_CHECKING:
try:
from xarray import DataTree # type: ignore[attr-defined]
except ImportError:
DataTree = Any


def open_loadable_vars_and_indexes(
filepath: str,
Expand Down Expand Up @@ -144,8 +138,13 @@ def separate_coords(
coord_vars: dict[
str, tuple[Hashable, Any, dict[Any, Any], dict[Any, Any]] | Variable
] = {}
found_coord_names: set[str] = set()
# Search through variable attributes for coordinate names
for var in vars.values():
if "coordinates" in var.attrs:
found_coord_names.update(var.attrs["coordinates"].split(" "))
for name, var in vars.items():
if name in coord_names or var.dims == (name,):
if name in coord_names or var.dims == (name,) or name in found_coord_names:
# use workaround to avoid creating IndexVariables described here https://github.com/pydata/xarray/pull/8107#discussion_r1311214263
if len(var.dims) == 1:
dim1d, *_ = var.dims
Expand Down Expand Up @@ -189,5 +188,5 @@ def open_virtual_datatree(
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
reader_options: Optional[dict] = None,
) -> "DataTree":
) -> DataTree:
raise NotImplementedError()
22 changes: 22 additions & 0 deletions virtualizarr/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,28 @@ def test_coordinate_variable_attrs_preserved(self, netcdf4_file):
}


@requires_kerchunk
class TestDetermineCoords:
def test_infer_one_dimensional_coords(self, netcdf4_file):
vds = open_virtual_dataset(netcdf4_file, indexes={})
assert set(vds.coords) == {"time", "lat", "lon"}

def test_var_attr_coords(self, netcdf4_file_with_2d_coords):
vds = open_virtual_dataset(netcdf4_file_with_2d_coords, indexes={})

expected_dimension_coords = ["ocean_time", "s_rho"]
expected_2d_coords = ["lon_rho", "lat_rho", "h"]
expected_1d_non_dimension_coords = ["Cs_r"]
expected_scalar_coords = ["hc", "Vtransform"]
expected_coords = (
expected_dimension_coords
+ expected_2d_coords
+ expected_1d_non_dimension_coords
+ expected_scalar_coords
)
assert set(vds.coords) == set(expected_coords)


@network
@requires_s3fs
class TestReadFromS3:
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_write_new_virtual_variable(
# assert dict(arr.attrs) == {"units": "km"}

# check dimensions
assert arr.attrs["_ARRAY_DIMENSIONS"] == ["x", "y"]
assert arr.metadata.dimension_names == ("x", "y")


def test_set_single_virtual_ref_without_encoding(
Expand Down
1 change: 0 additions & 1 deletion virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def write_virtual_variable_to_icechunk(
# TODO it would be nice if we could assign directly to the .attrs property
for k, v in var.attrs.items():
arr.attrs[k] = encode_zarr_attr_value(v)
arr.attrs["_ARRAY_DIMENSIONS"] = encode_zarr_attr_value(var.dims)

_encoding_keys = {"_FillValue", "missing_value", "scale_factor", "add_offset"}
for k, v in var.encoding.items():
Expand Down

0 comments on commit 944779b

Please sign in to comment.