From 2bfeee39497620121a63a3a4a25b317aa6057653 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 3 Dec 2024 13:26:47 -0700 Subject: [PATCH] Refactor dmrpp tests to expose data file path (#323) * rewrite tests to use a new dmrparser_factory * rewrite using global dict of XML strings * fix final test by explicitly passing in tmp_path instead of using a fixture which requests tmp_path * fix bug with not converting Path objects to strings --- virtualizarr/tests/test_readers/test_dmrpp.py | 346 ++++++++++-------- 1 file changed, 191 insertions(+), 155 deletions(-) diff --git a/virtualizarr/tests/test_readers/test_dmrpp.py b/virtualizarr/tests/test_readers/test_dmrpp.py index cbafc40f..d6238ca5 100644 --- a/virtualizarr/tests/test_readers/test_dmrpp.py +++ b/virtualizarr/tests/test_readers/test_dmrpp.py @@ -20,153 +20,159 @@ # TODO: later add MUR, SWOT, TEMPO and others by using kerchunk JSON to read refs (rather than reading the whole netcdf file) ] - -@pytest.fixture -def basic_dmrpp() -> DMRParser: - xml_str = """\ - - - - - - - - - grid x-axis - - - - - - - - - grid y-axis - - - - - - - - - grid z-axis - - - - - - - - - - analysed sea surface temperature - - - 1 - 2 - 3 - - - -32768 - - - 298.14999999999998 - - - 0.001 - - - x y z - - - 360 720 - - - - - - - - - - - - - - - - - mask - - - - - - - CF-1.6 - - - Sample Dataset - - - """ - return DMRParser(root=ET.fromstring(textwrap.dedent(xml_str))) - - -@pytest.fixture -def nested_groups_dmrpp() -> DMRParser: - xml_str = """\ - - - - - - - - - - - - - - - - - +DMRPP_XML_STRINGS = { + "basic": textwrap.dedent( + """\ + + + - - - test + + + grid x-axis - - - test + + + grid y-axis - + + + + + + + grid z-axis + + + - - - - + + + + + analysed sea surface temperature + + + 1 + 2 + 3 + + + -32768 + + + 298.14999999999998 + + + 0.001 + + + x y z + + + 360 720 + + + + + + + + + + + + + + + + + mask + + + + + + + CF-1.6 + + + Sample Dataset + + + """ + ), + "nested_groups": textwrap.dedent( + """\ + + + + + + - - - - """ - return DMRParser(root=ET.fromstring(textwrap.dedent(xml_str))) + + + + + + + + + + + + + test + + + + + + + + + test + + + + + + + + + + + + + + + + + """ + ), +} + + +def dmrparser(dmrpp_xml_str: str, tmp_path: Path, filename="test.nc") -> DMRParser: + # TODO we should actually create a dmrpp file in a temporary directory + # this would avoid the need to pass tmp_path separately + + return DMRParser( + root=ET.fromstring(dmrpp_xml_str), data_filepath=str(tmp_path / filename) + ) @network @@ -179,36 +185,37 @@ def test_NASA_dmrpp(data_url, dmrpp_url): @pytest.mark.parametrize( - "dmrpp_fixture, fqn_path, expected_xpath", + "dmrpp_xml_str_key, fqn_path, expected_xpath", [ - ("basic_dmrpp", "/", "."), - ("basic_dmrpp", "/data", "./*[@name='data']"), - ("basic_dmrpp", "/data/items", "./*[@name='data']/*[@name='items']"), + ("basic", "/", "."), + ("basic", "/data", "./*[@name='data']"), + ("basic", "/data/items", "./*[@name='data']/*[@name='items']"), ( - "nested_groups_dmrpp", + "nested_groups", "/group1/group2/area", "./*[@name='group1']/*[@name='group2']/*[@name='area']", ), ], ) -def test_find_node_fqn(request, dmrpp_fixture, fqn_path, expected_xpath): - parser_instance = request.getfixturevalue(dmrpp_fixture) +def test_find_node_fqn(tmp_path, dmrpp_xml_str_key, fqn_path, expected_xpath): + parser_instance = dmrparser(DMRPP_XML_STRINGS[dmrpp_xml_str_key], tmp_path=tmp_path) result = parser_instance.find_node_fqn(fqn_path) expected = parser_instance.root.find(expected_xpath, parser_instance._NS) assert result == expected @pytest.mark.parametrize( - "dmrpp_fixture, group_path", + "dmrpp_xml_str_key, group_path", [ - ("basic_dmrpp", "/"), - ("nested_groups_dmrpp", "/"), - ("nested_groups_dmrpp", "/group1"), - ("nested_groups_dmrpp", "/group1/group2"), + ("basic", "/"), + ("nested_groups", "/"), + ("nested_groups", "/group1"), + ("nested_groups", "/group1/group2"), ], ) -def test_split_groups(request, dmrpp_fixture, group_path): - dmrpp_instance = request.getfixturevalue(dmrpp_fixture) +def test_split_groups(tmp_path, dmrpp_xml_str_key, group_path): + dmrpp_instance = dmrparser(DMRPP_XML_STRINGS[dmrpp_xml_str_key], tmp_path=tmp_path) + # get all tags in a dataset (so all tags excluding nested groups) dataset_tags = lambda x: [ d for d in x if d.tag != "{" + dmrpp_instance._NS["dap"] + "}" + "Group" @@ -221,21 +228,30 @@ def test_split_groups(request, dmrpp_fixture, group_path): assert result_tags == expected_tags -def test_parse_dataset(basic_dmrpp, nested_groups_dmrpp): +def test_parse_dataset(tmp_path): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) + vds = basic_dmrpp.parse_dataset() assert vds.sizes == {"x": 720, "y": 1440, "z": 3} assert vds.data_vars.keys() == {"data", "mask"} assert vds.data_vars["data"].dims == ("x", "y") assert vds.attrs == {"Conventions": "CF-1.6", "title": "Sample Dataset"} assert vds.coords.keys() == {"x", "y", "z"} + + nested_groups_dmrpp = dmrparser( + DMRPP_XML_STRINGS["nested_groups"], tmp_path=tmp_path + ) + vds_root_implicit = nested_groups_dmrpp.parse_dataset() vds_root = nested_groups_dmrpp.parse_dataset(group="/") xrt.assert_identical(vds_root_implicit, vds_root) assert vds_root.sizes == {"a": 10, "b": 10} assert vds_root.coords.keys() == {"a", "b"} + vds_g1 = nested_groups_dmrpp.parse_dataset(group="/group1") assert vds_g1.sizes == {"x": 720, "y": 1440} assert vds_g1.coords.keys() == {"x", "y"} + vds_g2 = nested_groups_dmrpp.parse_dataset(group="/group1/group2") assert vds_g2.sizes == {"x": 720, "y": 1440} assert vds_g2.data_vars.keys() == {"area"} @@ -249,13 +265,19 @@ def test_parse_dataset(basic_dmrpp, nested_groups_dmrpp): ("/group1/x", {"x": 720}), ], ) -def test_parse_dim(nested_groups_dmrpp, dim_path, expected): +def test_parse_dim(tmp_path, dim_path, expected): + nested_groups_dmrpp = dmrparser( + DMRPP_XML_STRINGS["nested_groups"], tmp_path=tmp_path + ) + result = nested_groups_dmrpp._parse_dim(nested_groups_dmrpp.find_node_fqn(dim_path)) assert result == expected @pytest.mark.parametrize("dim_path", ["/", "/mask"]) -def test_find_dimension_tags(basic_dmrpp, dim_path): +def test_find_dimension_tags(tmp_path, dim_path): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) + # Check that Dimension tags match Dimension tags from the root # Check that Dim tags reference the same Dimension tags from the root assert basic_dmrpp._find_dimension_tags( @@ -263,7 +285,9 @@ def test_find_dimension_tags(basic_dmrpp, dim_path): ) == basic_dmrpp.root.findall("dap:Dimension", basic_dmrpp._NS) -def test_parse_variable(basic_dmrpp): +def test_parse_variable(tmp_path): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) + var = basic_dmrpp._parse_variable(basic_dmrpp.find_node_fqn("/data")) assert var.dtype == "float32" assert var.dims == ("x", "y") @@ -288,7 +312,9 @@ def test_parse_variable(basic_dmrpp): ("data/_FillValue", {"_FillValue": -32768}), ], ) -def test_parse_attribute(basic_dmrpp, attr_path, expected): +def test_parse_attribute(tmp_path, attr_path, expected): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) + result = basic_dmrpp._parse_attribute(basic_dmrpp.find_node_fqn(attr_path)) assert result == expected @@ -311,7 +337,9 @@ def test_parse_attribute(basic_dmrpp, attr_path, expected): ), ], ) -def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters): +def test_parse_filters(tmp_path, var_path, dtype, expected_filters): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) + chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS ) @@ -320,36 +348,44 @@ def test_parse_filters(basic_dmrpp, var_path, dtype, expected_filters): @pytest.mark.parametrize( - "var_path, chunk_shape, expected_lengths, expected_offsets, expected_paths", + "var_path, chunk_shape, chunk_grid_shape, expected_lengths, expected_offsets", [ ( "/data", (360, 720), + (3, 3), np.full((3, 3), 4083, dtype=np.uint64), (np.arange(9, dtype=np.uint64) * 4083 + 40762).reshape(3, 3), - np.full((3, 3), "test.dmrpp", dtype=np.dtypes.StringDType), ), ( "/mask", (720, 1440), + (1,), np.array([4], dtype=np.uint64), np.array([41276], dtype=np.uint64), - np.array(["test.dmrpp"], dtype=np.dtypes.StringDType), ), ], ) def test_parse_chunks( - basic_dmrpp, + tmp_path, var_path, chunk_shape, + chunk_grid_shape, expected_lengths, expected_offsets, - expected_paths, ): + basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) + chunks_tag = basic_dmrpp.find_node_fqn(var_path).find( "dmrpp:chunks", basic_dmrpp._NS ) result = basic_dmrpp._parse_chunks(chunks_tag, chunk_shape) + + expected_paths = np.full( + shape=chunk_grid_shape, + fill_value=str(tmp_path / "test.nc"), + dtype=np.dtypes.StringDType, + ) expected = ChunkManifest.from_arrays( lengths=expected_lengths, offsets=expected_offsets, paths=expected_paths )