From 772c5800b30507dac37c083b0afa119442e995e3 Mon Sep 17 00:00:00 2001
From: sharkinsspatial <sharkinsgis@gmail.com>
Date: Wed, 23 Oct 2024 20:59:35 -0400
Subject: [PATCH] Add option to explicitly specify use of an experimental hdf
 backend.

---
 virtualizarr/backend.py                       | 13 ++-
 virtualizarr/tests/test_backend.py            | 85 +++++++++++++------
 virtualizarr/tests/test_integration.py        | 30 +++++--
 .../test_readers/test_hdf_integration.py      | 20 +++--
 virtualizarr/tests/test_xarray.py             | 36 +++++---
 5 files changed, 126 insertions(+), 58 deletions(-)

diff --git a/virtualizarr/backend.py b/virtualizarr/backend.py
index 19aebfdd..3ab76d1f 100644
--- a/virtualizarr/backend.py
+++ b/virtualizarr/backend.py
@@ -13,12 +13,13 @@
 from virtualizarr.readers import (
     DMRPPVirtualBackend,
     FITSVirtualBackend,
-    HDFVirtualBackend,
+    HDF5VirtualBackend,
     KerchunkVirtualBackend,
     NetCDF3VirtualBackend,
     TIFFVirtualBackend,
     ZarrV3VirtualBackend,
 )
+from virtualizarr.readers.common import VirtualBackend
 from virtualizarr.utils import _FsspecFSFromFilepath, check_for_collisions
 
 # TODO add entrypoint to allow external libraries to add to this mapping
@@ -26,9 +27,9 @@
     "kerchunk": KerchunkVirtualBackend,
     "zarr_v3": ZarrV3VirtualBackend,
     "dmrpp": DMRPPVirtualBackend,
-    "hdf5": HDFVirtualBackend,
-    "netcdf4": HDFVirtualBackend,  # note this is the same as for hdf5
     # all the below call one of the kerchunk backends internally (https://fsspec.github.io/kerchunk/reference.html#file-format-backends)
+    "hdf5": HDF5VirtualBackend,
+    "netcdf4": HDF5VirtualBackend,  # note this is the same as for hdf5
     "netcdf3": NetCDF3VirtualBackend,
     "tiff": TIFFVirtualBackend,
     "fits": FITSVirtualBackend,
@@ -113,6 +114,7 @@ def open_virtual_dataset(
     indexes: Mapping[str, Index] | None = None,
     virtual_array_class=ManifestArray,
     reader_options: Optional[dict] = None,
+    backend: Optional[VirtualBackend] = None,
 ) -> Dataset:
     """
     Open a file or store as an xarray Dataset wrapping virtualized zarr arrays.
@@ -182,7 +184,10 @@ def open_virtual_dataset(
             filepath=filepath, reader_options=reader_options
         )
 
-    backend_cls = VIRTUAL_BACKENDS.get(filetype.name.lower())
+    if backend:
+        backend_cls = backend
+    else:
+        backend_cls = VIRTUAL_BACKENDS.get(filetype.name.lower())
 
     if backend_cls is None:
         raise NotImplementedError(f"Unsupported file type: {filetype.name}")
diff --git a/virtualizarr/tests/test_backend.py b/virtualizarr/tests/test_backend.py
index 43a6bbd8..2368848a 100644
--- a/virtualizarr/tests/test_backend.py
+++ b/virtualizarr/tests/test_backend.py
@@ -11,6 +11,7 @@
 from virtualizarr import open_virtual_dataset
 from virtualizarr.backend import FileType, automatically_determine_filetype
 from virtualizarr.manifests import ManifestArray
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import (
     has_astropy,
     has_tifffile,
@@ -82,14 +83,15 @@ def test_FileType():
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestOpenVirtualDatasetIndexes:
-    def test_no_indexes(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_no_indexes(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         assert vds.indexes == {}
 
-    def test_create_default_indexes(self, netcdf4_file):
+    def test_create_default_indexes(self, netcdf4_file, hdf_backend):
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds = open_virtual_dataset(netcdf4_file, indexes=None)
+            vds = open_virtual_dataset(netcdf4_file, indexes=None, backend=hdf_backend)
         ds = open_dataset(netcdf4_file, decode_times=True)
 
         # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812
@@ -113,7 +115,8 @@ def index_mappings_equal(indexes1: Mapping[str, Index], indexes2: Mapping[str, I
 
 
 @requires_kerchunk
-def test_cftime_index(tmpdir):
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+def test_cftime_index(tmpdir, hdf_backend):
     """Ensure a virtual dataset contains the same indexes as an Xarray dataset"""
     # Note: Test was created to debug: https://github.com/zarr-developers/VirtualiZarr/issues/168
     ds = xr.Dataset(
@@ -129,7 +132,10 @@ def test_cftime_index(tmpdir):
     )
     ds.to_netcdf(f"{tmpdir}/tmp.nc")
     vds = open_virtual_dataset(
-        f"{tmpdir}/tmp.nc", loadable_variables=["time", "lat", "lon"], indexes={}
+        f"{tmpdir}/tmp.nc",
+        loadable_variables=["time", "lat", "lon"],
+        indexes={},
+        backend=hdf_backend,
     )
     # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812
     assert index_mappings_equal(vds.xindexes, ds.xindexes)
@@ -139,15 +145,16 @@ def test_cftime_index(tmpdir):
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestOpenVirtualDatasetAttrs:
-    def test_drop_array_dimensions(self, netcdf4_file):
+    def test_drop_array_dimensions(self, netcdf4_file, hdf_backend):
         # regression test for GH issue #150
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         assert "_ARRAY_DIMENSIONS" not in vds["air"].attrs
 
-    def test_coordinate_variable_attrs_preserved(self, netcdf4_file):
+    def test_coordinate_variable_attrs_preserved(self, netcdf4_file, hdf_backend):
         # regression test for GH issue #155
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         assert vds["lat"].attrs == {
             "standard_name": "latitude",
             "long_name": "Latitude",
@@ -165,7 +172,8 @@ class TestReadFromS3:
     @pytest.mark.parametrize(
         "indexes", [None, {}], ids=["None index", "empty dict index"]
     )
-    def test_anon_read_s3(self, filetype, indexes):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_anon_read_s3(self, filetype, indexes, hdf_backend):
         """Parameterized tests for empty vs supplied indexes and filetypes."""
         # TODO: Switch away from this s3 url after minIO is implemented.
         fpath = "s3://carbonplan-share/virtualizarr/local.nc"
@@ -174,6 +182,7 @@ def test_anon_read_s3(self, filetype, indexes):
             filetype=filetype,
             indexes=indexes,
             reader_options={"storage_options": {"anon": True}},
+            backend=hdf_backend,
         )
 
         assert vds.dims == {"time": 2920, "lat": 25, "lon": 53}
@@ -182,6 +191,7 @@ def test_anon_read_s3(self, filetype, indexes):
 
 
 @network
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestReadFromURL:
     @pytest.mark.parametrize(
         "filetype, url",
@@ -228,10 +238,15 @@ class TestReadFromURL:
             ),
         ],
     )
-    def test_read_from_url(self, filetype, url):
+    def test_read_from_url(self, hdf_backend, filetype, url):
         if filetype in ["grib", "jpg", "hdf4"]:
             with pytest.raises(NotImplementedError):
-                vds = open_virtual_dataset(url, reader_options={}, indexes={})
+                vds = open_virtual_dataset(
+                    url,
+                    reader_options={},
+                    indexes={},
+                    backend=hdf_backend,
+                )
         elif filetype == "hdf5":
             vds = open_virtual_dataset(
                 url,
@@ -239,13 +254,14 @@ def test_read_from_url(self, filetype, url):
                 drop_variables=["listOfCovarianceTerms", "listOfPolarizations"],
                 indexes={},
                 reader_options={},
+                backend=hdf_backend,
             )
             assert isinstance(vds, xr.Dataset)
         else:
-            vds = open_virtual_dataset(url, indexes={})
+            vds = open_virtual_dataset(url, indexes={}, backend=hdf_backend)
             assert isinstance(vds, xr.Dataset)
 
-    def test_virtualizarr_vs_local_nisar(self):
+    def test_virtualizarr_vs_local_nisar(self, hdf_backend):
         import fsspec
 
         # Open group directly from locally cached file with xarray
@@ -268,6 +284,7 @@ def test_virtualizarr_vs_local_nisar(self):
             group=hdf_group,
             indexes={},
             drop_variables=["listOfCovarianceTerms", "listOfPolarizations"],
+            backend=hdf_backend,
         )
         tmpref = "/tmp/cmip6.json"
         vds.virtualize.to_kerchunk(tmpref, format="json")
@@ -279,10 +296,14 @@ def test_virtualizarr_vs_local_nisar(self):
 
 @requires_kerchunk
 class TestLoadVirtualDataset:
-    def test_loadable_variables(self, netcdf4_file):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_loadable_variables(self, netcdf4_file, hdf_backend):
         vars_to_load = ["air", "time"]
         vds = open_virtual_dataset(
-            netcdf4_file, loadable_variables=vars_to_load, indexes={}
+            netcdf4_file,
+            loadable_variables=vars_to_load,
+            indexes={},
+            backend=hdf_backend,
         )
 
         for name in vds.variables:
@@ -304,11 +325,20 @@ def test_explicit_filetype(self, netcdf4_file):
         with pytest.raises(NotImplementedError):
             open_virtual_dataset(netcdf4_file, filetype="grib")
 
-    def test_group_kwarg(self, hdf5_groups_file):
-        with pytest.raises(ValueError, match="Multiple HDF Groups found"):
-            open_virtual_dataset(hdf5_groups_file)
-        with pytest.raises(ValueError, match="not found in"):
-            open_virtual_dataset(hdf5_groups_file, group="doesnt_exist")
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_group_kwarg(self, hdf5_groups_file, hdf_backend):
+        if hdf_backend:
+            with pytest.raises(NotImplementedError, match="Nested groups"):
+                open_virtual_dataset(hdf5_groups_file, backend=hdf_backend)
+            with pytest.raises(KeyError, match="doesn't exist"):
+                open_virtual_dataset(
+                    hdf5_groups_file, group="doesnt_exist", backend=hdf_backend
+                )
+        else:
+            with pytest.raises(ValueError, match="Multiple HDF Groups found"):
+                open_virtual_dataset(hdf5_groups_file)
+            with pytest.raises(ValueError, match="not found in"):
+                open_virtual_dataset(hdf5_groups_file, group="doesnt_exist")
 
         vars_to_load = ["air", "time"]
         vds = open_virtual_dataset(
@@ -316,6 +346,7 @@ def test_group_kwarg(self, hdf5_groups_file):
             group="test/group",
             loadable_variables=vars_to_load,
             indexes={},
+            backend=hdf_backend,
         )
         full_ds = xr.open_dataset(
             hdf5_groups_file,
@@ -340,13 +371,15 @@ def test_open_virtual_dataset_passes_expected_args(
         }
         mock_read_kerchunk.assert_called_once_with(**args)
 
-    def test_open_dataset_with_empty(self, hdf5_empty, tmpdir):
-        vds = open_virtual_dataset(hdf5_empty)
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_open_dataset_with_empty(self, hdf5_empty, tmpdir, hdf_backend):
+        vds = open_virtual_dataset(hdf5_empty, backend=hdf_backend)
         assert vds.empty.dims == ()
         assert vds.empty.attrs == {"empty": "true"}
 
-    def test_open_dataset_with_scalar(self, hdf5_scalar, tmpdir):
-        vds = open_virtual_dataset(hdf5_scalar)
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_open_dataset_with_scalar(self, hdf5_scalar, tmpdir, hdf_backend):
+        vds = open_virtual_dataset(hdf5_scalar, backend=hdf_backend)
         assert vds.scalar.dims == ()
         assert vds.scalar.attrs == {"scalar": "true"}
 
diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py
index 63158777..0a39eb3d 100644
--- a/virtualizarr/tests/test_integration.py
+++ b/virtualizarr/tests/test_integration.py
@@ -5,6 +5,7 @@
 
 from virtualizarr import open_virtual_dataset
 from virtualizarr.manifests import ChunkManifest, ManifestArray
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import requires_kerchunk
 from virtualizarr.translators.kerchunk import (
     dataset_from_kerchunk_refs,
@@ -63,8 +64,9 @@ def test_no_duplicates_find_var_names():
         ),
     ],
 )
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 def test_numpy_arrays_to_inlined_kerchunk_refs(
-    netcdf4_file, inline_threshold, vars_to_inline
+    netcdf4_file, inline_threshold, vars_to_inline, hdf_backend
 ):
     from kerchunk.hdf import SingleHdf5ToZarr
 
@@ -75,7 +77,7 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
 
     # loading the variables should produce same result as inlining them using kerchunk
     vds = open_virtual_dataset(
-        netcdf4_file, loadable_variables=vars_to_inline, indexes={}
+        netcdf4_file, loadable_variables=vars_to_inline, indexes={}, backend=hdf_backend
     )
     refs = vds.virtualize.to_kerchunk(format="dict")
 
@@ -90,7 +92,8 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
 @requires_kerchunk
 @pytest.mark.parametrize("format", ["dict", "json", "parquet"])
 class TestKerchunkRoundtrip:
-    def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_kerchunk_roundtrip_no_concat(self, tmpdir, format, hdf_backend):
         # set up example xarray dataset
         ds = xr.tutorial.open_dataset("air_temperature", decode_times=False)
 
@@ -98,7 +101,7 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
         ds.to_netcdf(f"{tmpdir}/air.nc")
 
         # use open_dataset_via_kerchunk to read it as references
-        vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={})
+        vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={}, backend=hdf_backend)
 
         if format == "dict":
             # write those references to an in-memory kerchunk-formatted references dictionary
@@ -122,8 +125,11 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
         for coord in ds.coords:
             assert ds.coords[coord].attrs == roundtrip.coords[coord].attrs
 
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
     @pytest.mark.parametrize("decode_times,time_vars", [(False, []), (True, ["time"])])
-    def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars):
+    def test_kerchunk_roundtrip_concat(
+        self, tmpdir, format, hdf_backend, decode_times, time_vars
+    ):
         # set up example xarray dataset
         ds = xr.tutorial.open_dataset("air_temperature", decode_times=decode_times)
 
@@ -139,11 +145,13 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars
             f"{tmpdir}/air1.nc",
             indexes={},
             loadable_variables=time_vars,
+            backend=hdf_backend,
         )
         vds2 = open_virtual_dataset(
             f"{tmpdir}/air2.nc",
             indexes={},
             loadable_variables=time_vars,
+            backend=hdf_backend,
         )
 
         if decode_times is False:
@@ -187,7 +195,8 @@ def test_kerchunk_roundtrip_concat(self, tmpdir, format, decode_times, time_vars
             assert roundtrip.time.encoding["units"] == ds.time.encoding["units"]
             assert roundtrip.time.encoding["calendar"] == ds.time.encoding["calendar"]
 
-    def test_non_dimension_coordinates(self, tmpdir, format):
+    @pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+    def test_non_dimension_coordinates(self, tmpdir, format, hdf_backend):
         # regression test for GH issue #105
 
         # set up example xarray dataset containing non-dimension coordinate variables
@@ -196,7 +205,9 @@ def test_non_dimension_coordinates(self, tmpdir, format):
         # save it to disk as netCDF (in temporary directory)
         ds.to_netcdf(f"{tmpdir}/non_dim_coords.nc")
 
-        vds = open_virtual_dataset(f"{tmpdir}/non_dim_coords.nc", indexes={})
+        vds = open_virtual_dataset(
+            f"{tmpdir}/non_dim_coords.nc", indexes={}, backend=hdf_backend
+        )
 
         assert "lat" in vds.coords
         assert "coordinates" not in vds.attrs
@@ -269,11 +280,12 @@ def test_datetime64_dtype_fill_value(self, tmpdir, format):
 
 
 @requires_kerchunk
-def test_open_scalar_variable(tmpdir):
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
+def test_open_scalar_variable(tmpdir, hdf_backend):
     # regression test for GH issue #100
 
     ds = xr.Dataset(data_vars={"a": 0})
     ds.to_netcdf(f"{tmpdir}/scalar.nc")
 
-    vds = open_virtual_dataset(f"{tmpdir}/scalar.nc", indexes={})
+    vds = open_virtual_dataset(f"{tmpdir}/scalar.nc", indexes={}, backend=hdf_backend)
     assert vds["a"].shape == ()
diff --git a/virtualizarr/tests/test_readers/test_hdf_integration.py b/virtualizarr/tests/test_readers/test_hdf_integration.py
index f73292ee..716d1f28 100644
--- a/virtualizarr/tests/test_readers/test_hdf_integration.py
+++ b/virtualizarr/tests/test_readers/test_hdf_integration.py
@@ -4,18 +4,21 @@
 
 import virtualizarr
 from virtualizarr.backend import FileType
+from virtualizarr.readers.hdf import HDFVirtualBackend
 
 
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestIntegration:
     @pytest.mark.xfail(reason="0 time start is being interpreted as fillvalue")
     def test_filters_h5netcdf_roundtrip(
-        self, tmpdir, filter_encoded_roundtrip_hdf5_file
+        self, tmpdir, filter_encoded_roundtrip_hdf5_file, hdf_backend
     ):
         ds = xr.open_dataset(filter_encoded_roundtrip_hdf5_file, decode_times=True)
         vds = virtualizarr.open_virtual_dataset(
             filter_encoded_roundtrip_hdf5_file,
             loadable_variables=["time"],
             cftime_variables=["time"],
+            backend=hdf_backend,
         )
         kerchunk_file = f"{tmpdir}/kerchunk.json"
         vds.virtualize.to_kerchunk(kerchunk_file, format="json")
@@ -23,19 +26,26 @@ def test_filters_h5netcdf_roundtrip(
         xrt.assert_allclose(ds, roundtrip)
 
     def test_filters_netcdf4_roundtrip(
-        self, tmpdir, filter_encoded_roundtrip_netcdf4_file
+        self, tmpdir, filter_encoded_roundtrip_netcdf4_file, hdf_backend
     ):
         filepath = filter_encoded_roundtrip_netcdf4_file["filepath"]
         ds = xr.open_dataset(filepath)
-        vds = virtualizarr.open_virtual_dataset(filepath, filetype=FileType("netcdf4"))
+        vds = virtualizarr.open_virtual_dataset(
+            filepath, filetype=FileType("netcdf4"), backend=hdf_backend
+        )
         kerchunk_file = f"{tmpdir}/kerchunk.json"
         vds.virtualize.to_kerchunk(kerchunk_file, format="json")
         roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
         xrt.assert_equal(ds, roundtrip)
 
-    def test_filter_and_cf_roundtrip(self, tmpdir, filter_and_cf_roundtrip_hdf5_file):
+    def test_filter_and_cf_roundtrip(
+        self, tmpdir, filter_and_cf_roundtrip_hdf5_file, hdf_backend
+    ):
         ds = xr.open_dataset(filter_and_cf_roundtrip_hdf5_file)
-        vds = virtualizarr.open_virtual_dataset(filter_and_cf_roundtrip_hdf5_file)
+        vds = virtualizarr.open_virtual_dataset(
+            filter_and_cf_roundtrip_hdf5_file,
+            backend=hdf_backend,
+        )
         kerchunk_file = f"{tmpdir}/filter_cf_kerchunk.json"
         vds.virtualize.to_kerchunk(kerchunk_file, format="json")
         roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py
index 062eda5f..12f6fadf 100644
--- a/virtualizarr/tests/test_xarray.py
+++ b/virtualizarr/tests/test_xarray.py
@@ -4,6 +4,7 @@
 
 from virtualizarr import open_virtual_dataset
 from virtualizarr.manifests import ChunkManifest, ManifestArray
+from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import requires_kerchunk
 from virtualizarr.zarr import ZArray
 
@@ -224,14 +225,15 @@ def test_concat_dim_coords_along_existing_dim(self):
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestCombineUsingIndexes:
-    def test_combine_by_coords(self, netcdf4_files):
+    def test_combine_by_coords(self, netcdf4_files, hdf_backend):
         filepath1, filepath2 = netcdf4_files
 
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds1 = open_virtual_dataset(filepath1)
+            vds1 = open_virtual_dataset(filepath1, backend=hdf_backend)
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds2 = open_virtual_dataset(filepath2)
+            vds2 = open_virtual_dataset(filepath2, backend=hdf_backend)
 
         combined_vds = xr.combine_by_coords(
             [vds2, vds1],
@@ -240,13 +242,13 @@ def test_combine_by_coords(self, netcdf4_files):
         assert combined_vds.xindexes["time"].to_pandas_index().is_monotonic_increasing
 
     @pytest.mark.xfail(reason="Not yet implemented, see issue #18")
-    def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files):
+    def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files, hdf_backend):
         filepath1, filepath2 = netcdf4_files
 
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds1 = open_virtual_dataset(filepath1)
+            vds1 = open_virtual_dataset(filepath1, backend=hdf_backend)
         with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
-            vds2 = open_virtual_dataset(filepath2)
+            vds2 = open_virtual_dataset(filepath2, backend=hdf_backend)
 
         combined_vds = xr.combine_by_coords(
             [vds2, vds1],
@@ -258,17 +260,18 @@ def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files):
 
 
 @requires_kerchunk
+@pytest.mark.parametrize("hdf_backend", [None, HDFVirtualBackend])
 class TestRenamePaths:
-    def test_rename_to_str(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_rename_to_str(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
         renamed_vds = vds.virtualize.rename_paths("s3://bucket/air.nc")
         assert (
             renamed_vds["air"].data.manifest.dict()["0.0.0"]["path"]
             == "s3://bucket/air.nc"
         )
 
-    def test_rename_using_function(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_rename_using_function(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
 
         def local_to_s3_url(old_local_path: str) -> str:
             from pathlib import Path
@@ -284,15 +287,20 @@ def local_to_s3_url(old_local_path: str) -> str:
             == "s3://bucket/air.nc"
         )
 
-    def test_invalid_type(self, netcdf4_file):
-        vds = open_virtual_dataset(netcdf4_file, indexes={})
+    def test_invalid_type(self, netcdf4_file, hdf_backend):
+        vds = open_virtual_dataset(netcdf4_file, indexes={}, backend=hdf_backend)
 
         with pytest.raises(TypeError):
             vds.virtualize.rename_paths(["file1.nc", "file2.nc"])
 
-    def test_mixture_of_manifestarrays_and_numpy_arrays(self, netcdf4_file):
+    def test_mixture_of_manifestarrays_and_numpy_arrays(
+        self, netcdf4_file, hdf_backend
+    ):
         vds = open_virtual_dataset(
-            netcdf4_file, indexes={}, loadable_variables=["lat", "lon"]
+            netcdf4_file,
+            indexes={},
+            loadable_variables=["lat", "lon"],
+            backend=hdf_backend,
         )
         renamed_vds = vds.virtualize.rename_paths("s3://bucket/air.nc")
         assert (