diff --git a/virtualizarr/tests/test_readers/test_dmrpp.py b/virtualizarr/tests/test_readers/test_dmrpp.py
index cbafc40f..d6238ca5 100644
--- a/virtualizarr/tests/test_readers/test_dmrpp.py
+++ b/virtualizarr/tests/test_readers/test_dmrpp.py
@@ -20,153 +20,159 @@
# TODO: later add MUR, SWOT, TEMPO and others by using kerchunk JSON to read refs (rather than reading the whole netcdf file)
]
-
-@pytest.fixture
-def basic_dmrpp() -> DMRParser:
- xml_str = """\
-
-
-
-
-
-
-
-
- grid x-axis
-
-
-
-
-
-
-
-
- grid y-axis
-
-
-
-
-
-
-
-
- grid z-axis
-
-
-
-
-
-
-
-
-
- analysed sea surface temperature
-
-
- 1
- 2
- 3
-
-
- -32768
-
-
- 298.14999999999998
-
-
- 0.001
-
-
- x y z
-
-
- 360 720
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- mask
-
-
-
-
-
-
- CF-1.6
-
-
- Sample Dataset
-
-
- """
- return DMRParser(root=ET.fromstring(textwrap.dedent(xml_str)))
-
-
-@pytest.fixture
-def nested_groups_dmrpp() -> DMRParser:
- xml_str = """\
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+DMRPP_XML_STRINGS = {
+ "basic": textwrap.dedent(
+ """\
+
+
+
-
-
- test
+
+
+ grid x-axis
-
-
- test
+
+
+ grid y-axis
-
+
+
+
+
+
+
+ grid z-axis
+
+
+
-
-
-
-
+
+
+
+
+ analysed sea surface temperature
+
+
+ 1
+ 2
+ 3
+
+
+ -32768
+
+
+ 298.14999999999998
+
+
+ 0.001
+
+
+ x y z
+
+
+ 360 720
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mask
+
+
+
+
+
+
+ CF-1.6
+
+
+ Sample Dataset
+
+
+ """
+ ),
+ "nested_groups": textwrap.dedent(
+ """\
+
+
+
+
+
+
-
-
-
- """
- return DMRParser(root=ET.fromstring(textwrap.dedent(xml_str)))
+
+
+
+
+
+
+
+
+
+
+
+
+ test
+
+
+
+
+
+
+
+
+ test
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ """
+ ),
+}
+
+
+def dmrparser(dmrpp_xml_str: str, tmp_path: Path, filename="test.nc") -> DMRParser:
+ # TODO we should actually create a dmrpp file in a temporary directory
+ # this would avoid the need to pass tmp_path separately
+
+ return DMRParser(
+ root=ET.fromstring(dmrpp_xml_str), data_filepath=str(tmp_path / filename)
+ )
@network
@@ -179,36 +185,37 @@ def test_NASA_dmrpp(data_url, dmrpp_url):
@pytest.mark.parametrize(
- "dmrpp_fixture, fqn_path, expected_xpath",
+ "dmrpp_xml_str_key, fqn_path, expected_xpath",
[
- ("basic_dmrpp", "/", "."),
- ("basic_dmrpp", "/data", "./*[@name='data']"),
- ("basic_dmrpp", "/data/items", "./*[@name='data']/*[@name='items']"),
+ ("basic", "/", "."),
+ ("basic", "/data", "./*[@name='data']"),
+ ("basic", "/data/items", "./*[@name='data']/*[@name='items']"),
(
- "nested_groups_dmrpp",
+ "nested_groups",
"/group1/group2/area",
"./*[@name='group1']/*[@name='group2']/*[@name='area']",
),
],
)
-def test_find_node_fqn(request, dmrpp_fixture, fqn_path, expected_xpath):
- parser_instance = request.getfixturevalue(dmrpp_fixture)
+def test_find_node_fqn(tmp_path, dmrpp_xml_str_key, fqn_path, expected_xpath):
+ parser_instance = dmrparser(DMRPP_XML_STRINGS[dmrpp_xml_str_key], tmp_path=tmp_path)
result = parser_instance.find_node_fqn(fqn_path)
expected = parser_instance.root.find(expected_xpath, parser_instance._NS)
assert result == expected
@pytest.mark.parametrize(
- "dmrpp_fixture, group_path",
+ "dmrpp_xml_str_key, group_path",
[
- ("basic_dmrpp", "/"),
- ("nested_groups_dmrpp", "/"),
- ("nested_groups_dmrpp", "/group1"),
- ("nested_groups_dmrpp", "/group1/group2"),
+ ("basic", "/"),
+ ("nested_groups", "/"),
+ ("nested_groups", "/group1"),
+ ("nested_groups", "/group1/group2"),
],
)
-def test_split_groups(request, dmrpp_fixture, group_path):
- dmrpp_instance = request.getfixturevalue(dmrpp_fixture)
+def test_split_groups(tmp_path, dmrpp_xml_str_key, group_path):
+ dmrpp_instance = dmrparser(DMRPP_XML_STRINGS[dmrpp_xml_str_key], tmp_path=tmp_path)
+
# get all tags in a dataset (so all tags excluding nested groups)
dataset_tags = lambda x: [
d for d in x if d.tag != "{" + dmrpp_instance._NS["dap"] + "}" + "Group"
@@ -221,21 +228,30 @@ def test_split_groups(request, dmrpp_fixture, group_path):
assert result_tags == expected_tags
-def test_parse_dataset(basic_dmrpp, nested_groups_dmrpp):
+def test_parse_dataset(tmp_path):
+ basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path)
+
vds = basic_dmrpp.parse_dataset()
assert vds.sizes == {"x": 720, "y": 1440, "z": 3}
assert vds.data_vars.keys() == {"data", "mask"}
assert vds.data_vars["data"].dims == ("x", "y")
assert vds.attrs == {"Conventions": "CF-1.6", "title": "Sample Dataset"}
assert vds.coords.keys() == {"x", "y", "z"}
+
+ nested_groups_dmrpp = dmrparser(
+ DMRPP_XML_STRINGS["nested_groups"], tmp_path=tmp_path
+ )
+
vds_root_implicit = nested_groups_dmrpp.parse_dataset()
vds_root = nested_groups_dmrpp.parse_dataset(group="/")
xrt.assert_identical(vds_root_implicit, vds_root)
assert vds_root.sizes == {"a": 10, "b": 10}
assert vds_root.coords.keys() == {"a", "b"}
+
vds_g1 = nested_groups_dmrpp.parse_dataset(group="/group1")
assert vds_g1.sizes == {"x": 720, "y": 1440}
assert vds_g1.coords.keys() == {"x", "y"}
+
vds_g2 = nested_groups_dmrpp.parse_dataset(group="/group1/group2")
assert vds_g2.sizes == {"x": 720, "y": 1440}
assert vds_g2.data_vars.keys() == {"area"}
@@ -249,13 +265,19 @@ def test_parse_dataset(basic_dmrpp, nested_groups_dmrpp):
("/group1/x", {"x": 720}),
],
)
-def test_parse_dim(nested_groups_dmrpp, dim_path, expected):
+def test_parse_dim(tmp_path, dim_path, expected):
+ nested_groups_dmrpp = dmrparser(
+ DMRPP_XML_STRINGS["nested_groups"], tmp_path=tmp_path
+ )
+
result = nested_groups_dmrpp._parse_dim(nested_groups_dmrpp.find_node_fqn(dim_path))
assert result == expected
@pytest.mark.parametrize("dim_path", ["/", "/mask"])
-def test_find_dimension_tags(basic_dmrpp, dim_path):
+def test_find_dimension_tags(tmp_path, dim_path):
+ basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path)
+
# Check that Dimension tags match Dimension tags from the root
# Check that Dim tags reference the same Dimension tags from the root
assert basic_dmrpp._find_dimension_tags(
@@ -263,7 +285,9 @@ def test_find_dimension_tags(basic_dmrpp, dim_path):
) == basic_dmrpp.root.findall("dap:Dimension", basic_dmrpp._NS)
-def test_parse_variable(basic_dmrpp):
+def test_parse_variable(tmp_path):
+ basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path)
+
var = basic_dmrpp._parse_variable(basic_dmrpp.find_node_fqn("/data"))
assert var.dtype == "float32"
assert var.dims == ("x", "y")
@@ -288,7 +312,9 @@ def test_parse_variable(basic_dmrpp):
("data/_FillValue", {"_FillValue": -32768}),
],
)
-def test_parse_attribute(basic_dmrpp, attr_path, expected):
+def test_parse_attribute(tmp_path, attr_path, expected):
+ basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path)
+
result = basic_dmrpp._parse_attribute(basic_dmrpp.find_node_fqn(attr_path))
assert result == expected
@@ -311,7 +337,9 @@ def test_parse_attribute(basic_dmrpp, attr_path, expected):
),
],
)
-def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters):
+def test_parse_filters(tmp_path, var_path, dtype, expected_filters):
+ basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path)
+
chunks_tag = basic_dmrpp.find_node_fqn(var_path).find(
"dmrpp:chunks", basic_dmrpp._NS
)
@@ -320,36 +348,44 @@ def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters):
@pytest.mark.parametrize(
- "var_path, chunk_shape, expected_lengths, expected_offsets, expected_paths",
+ "var_path, chunk_shape, chunk_grid_shape, expected_lengths, expected_offsets",
[
(
"/data",
(360, 720),
+ (3, 3),
np.full((3, 3), 4083, dtype=np.uint64),
(np.arange(9, dtype=np.uint64) * 4083 + 40762).reshape(3, 3),
- np.full((3, 3), "test.dmrpp", dtype=np.dtypes.StringDType),
),
(
"/mask",
(720, 1440),
+ (1,),
np.array([4], dtype=np.uint64),
np.array([41276], dtype=np.uint64),
- np.array(["test.dmrpp"], dtype=np.dtypes.StringDType),
),
],
)
def test_parse_chunks(
- basic_dmrpp,
+ tmp_path,
var_path,
chunk_shape,
+ chunk_grid_shape,
expected_lengths,
expected_offsets,
- expected_paths,
):
+ basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path)
+
chunks_tag = basic_dmrpp.find_node_fqn(var_path).find(
"dmrpp:chunks", basic_dmrpp._NS
)
result = basic_dmrpp._parse_chunks(chunks_tag, chunk_shape)
+
+ expected_paths = np.full(
+ shape=chunk_grid_shape,
+ fill_value=str(tmp_path / "test.nc"),
+ dtype=np.dtypes.StringDType,
+ )
expected = ChunkManifest.from_arrays(
lengths=expected_lengths, offsets=expected_offsets, paths=expected_paths
)