Skip to content

Commit

Permalink
- Updated tests (covered missing lines introduced by refactor found by
Browse files Browse the repository at this point in the history
  sentry)
- Updated cmip6.yaml as a different translator is required for Cordex
  experiments as main CMIP6 experiments.
  • Loading branch information
charles-turner-1 committed Oct 14, 2024
1 parent b3da577 commit b303840
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 86 deletions.
6 changes: 1 addition & 5 deletions config/cmip6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,4 @@ sources:

- metadata_yaml: /g/data/xp65/admin/access-nri-intake-catalog/config/metadata_sources/cmip6-oi10/metadata.yaml
path:
- /g/data/oi10/catalog/v2/esm/catalog.json

- metadata_yaml: /g/data/xp65/admin/intake/metadata/cmip6_ig45/metadata.yaml
path:
- /g/data/ig45/catalog/v2/esm/catalog.json
- /g/data/oi10/catalog/v2/esm/catalog.json
26 changes: 0 additions & 26 deletions config/experiments/cmip6_ig45/metadata.yaml

This file was deleted.

1 change: 0 additions & 1 deletion config/metadata_sources/cordex-ig45/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ license:
url: https://geonetwork.nci.org.au/geonetwork/srv/eng/catalog.search#/metadata/f7465_8388_5100_7022
parent_experiment:
related_experiments:
-
notes:
keywords:
- cmip
133 changes: 84 additions & 49 deletions src/access_nri_intake/catalog/translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,31 @@ def _unique_values(series):

return df[self.columns] # Preserve ordering

def set_dispatch(
self, core_colname: str, func: Callable, input_name: Optional[str] = None
):
"""
Set a dispatch function for a column. Typically only required when either:
1. `core_colname != input_name`
2. A custom translation function (`func`) is required.
Parameters
----------
core_colname: str
The core column name to translate to
input_name: str, optional
The name of the column in the source. If not provided, this defaults
to none, and no translation will occur
func: callable
The function to translate the column
"""
if core_colname not in ["model", "realm", "frequency", "variable"]:
raise TranslatorError(
f"'core_colname' must be one of 'model', 'realm', 'frequency', 'variable', not {core_colname}"
)
self._dispatch[core_colname] = func
setattr(self._dispatch_keys, core_colname, input_name)

def _realm_translator(self) -> pd.Series:
"""
Return realm, fixing a few issues
Expand Down Expand Up @@ -240,16 +265,21 @@ def __init__(self, source, columns):
"""

super().__init__(source, columns)
self._dispatch["model"] = self._model_translator
self._dispatch["realm"] = self._realm_translator
self._dispatch["frequency"] = self._frequency_translator
self._dispatch["variable"] = self._variable_translator

self._dispatch_keys = _DispatchKeys(
model="source_id",
realm="realm",
frequency="frequency",
variable="variable_id",
self.set_dispatch(
input_name="source_id", core_colname="model", func=super()._model_translator
)
self.set_dispatch(
input_name="realm", core_colname="realm", func=super()._realm_translator
)
self.set_dispatch(
input_name="frequency",
core_colname="frequency",
func=super()._frequency_translator,
)
self.set_dispatch(
input_name="variable_id",
core_colname="variable",
func=super()._variable_translator,
)


Expand All @@ -271,16 +301,21 @@ def __init__(self, source, columns):
"""

super().__init__(source, columns)
self._dispatch["model"] = self._model_translator
self._dispatch["realm"] = self._realm_translator
self._dispatch["frequency"] = self._frequency_translator
self._dispatch["variable"] = self._variable_translator

self._dispatch_keys = _DispatchKeys(
model="model",
realm="realm",
frequency="frequency",
variable="variable",
self.set_dispatch(
input_name="model", core_colname="model", func=super()._model_translator
)
self.set_dispatch(
input_name="realm", core_colname="realm", func=super()._realm_translator
)
self.set_dispatch(
input_name="frequency",
core_colname="frequency",
func=super()._frequency_translator,
)
self.set_dispatch(
input_name="variable",
core_colname="variable",
func=super()._variable_translator,
)


Expand All @@ -302,17 +337,11 @@ def __init__(self, source, columns):
"""

super().__init__(source, columns)
self._dispatch["variable"] = self._variable_translator
self._dispatch_keys = _DispatchKeys(variable="variable")

def _realm_translator(self) -> pd.Series:
raise AttributeError(
f"{self.__class__.__name__}: 'realm' does not require translation"
)

def _frequency_translator(self) -> pd.Series:
raise AttributeError(
f"{self.__class__.__name__}: 'data' does not require translation"
self.set_dispatch(
input_name="variable",
core_colname="variable",
func=super()._variable_translator,
)


Expand All @@ -334,15 +363,21 @@ def __init__(self, source, columns):
"""

super().__init__(source, columns)
self._dispatch["model"] = self._model_translator
self._dispatch["realm"] = self._realm_translator
self._dispatch["frequency"] = self._frequency_translator
self._dispatch["variable"] = self._variable_translator
self._dispatch_keys = _DispatchKeys(
model="source_id",
realm="realm",
variable="variable_id",
frequency="freq",
self.set_dispatch(
input_name="source_id", core_colname="model", func=super()._model_translator
)
self.set_dispatch(
input_name="realm", core_colname="realm", func=self._realm_translator
)
self.set_dispatch(
input_name="freq",
core_colname="frequency",
func=super()._frequency_translator,
)
self.set_dispatch(
input_name="variable_id",
core_colname="variable",
func=super()._variable_translator,
)

def _realm_translator(self):
Expand Down Expand Up @@ -370,16 +405,16 @@ def __init__(self, source, columns):
"""

super().__init__(source, columns)
self._dispatch["model"] = self._model_translator
self._dispatch["frequency"] = self._frequency_translator
self._dispatch["variable"] = self._variable_translator
self._dispatch["realm"] = self._variable_translator

self._dispatch_keys = _DispatchKeys(
model="source_id",
frequency="frequency",
variable="variable_id",
realm="realm",
self.set_dispatch(
input_name="source_id", core_colname="model", func=super()._model_translator
)
self.set_dispatch(
input_name="variable_id",
core_colname="variable",
func=super()._variable_translator,
)
self.set_dispatch(
input_name="realm", core_colname="realm", func=self._realm_translator
)

def _realm_translator(self):
Expand Down
52 changes: 47 additions & 5 deletions tests/test_translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
TranslatorError,
_cmip_realm_translator,
_to_tuple,
tuplify_series,
)


Expand Down Expand Up @@ -189,6 +190,29 @@ def test_DefaultTranslator_error(test_data):
assert "Could not translate" in str(excinfo.value)


@pytest.mark.parametrize(
"colname, should_raise",
[
("model", False),
("realm", False),
("frequency", False),
("variable", False),
("random_string", True),
],
)
def test_DefaultTranslator_set_dispatch(test_data, colname, should_raise):
"""Test that only valid translation setups are allowed"""
esmds = intake.open_esm_datastore(test_data / "esm_datastore/cmip5-al33.json")
dtrans = DefaultTranslator(esmds, CORE_COLUMNS)
if should_raise:
with pytest.raises(TranslatorError) as excinfo:
dtrans.set_dispatch(colname, dtrans._model_translator, "model")
assert "'core_colname' must be one of" in str(excinfo.value)
else:
dtrans.set_dispatch(colname, dtrans._model_translator, colname)
assert dtrans._dispatch[colname] == dtrans._model_translator


@pytest.mark.parametrize(
"groupby, n_entries",
[
Expand Down Expand Up @@ -271,11 +295,7 @@ def test_BarpaTranslator(test_data, groupby, n_entries):

@pytest.mark.parametrize(
"groupby, n_entries",
[
(None, 5),
(["variable"], 4),
(["frequency"], 2),
],
[(None, 5), (["variable"], 4), (["frequency"], 2), (["realm"], 1)],
)
def test_CordexTranslator(test_data, groupby, n_entries):
"""Test CORDEX datastore translator"""
Expand All @@ -284,3 +304,25 @@ def test_CordexTranslator(test_data, groupby, n_entries):
esmds.description = "description"
df = CordexTranslator(esmds, CORE_COLUMNS).translate(groupby)
assert len(df) == n_entries


@pytest.mark.parametrize(
"input_series, expected_output",
[
(pd.Series([1, 2, 3]), pd.Series([(1,), (2,), (3,)])),
],
)
def test_tuplify_series(input_series, expected_output):
"""Test the _tuplify_series function"""

@tuplify_series
def tuplify_func(series):
return series

class TestSeries:
@tuplify_series
def method(self, series):
return series

assert all(tuplify_func(input_series) == expected_output)
assert all(TestSeries().method(input_series) == expected_output)

0 comments on commit b303840

Please sign in to comment.