From 95e043761314b7757a58e55ae5b9ed52549308c9 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 31 Jul 2024 10:46:10 -0400 Subject: [PATCH 01/31] Preliminary documentation for `gempyor.parameters` First draft of documentation for the `gempyor.parameters` module, most importantly containing the `Parameters` class. Also added `__all__` dunder to regulate exports and converted TODO comments to GitHub issues, see GH-274, GH-275. --- .../gempyor_pkg/src/gempyor/parameters.py | 168 ++++++++++++++---- 1 file changed, 133 insertions(+), 35 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index b01fb52ab..da41fc394 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -1,30 +1,45 @@ -import numpy as np -import pandas as pd -import pyarrow as pa -import pyarrow.parquet as pq +""" +Abstractions for interacting with the parameters configurations. + +This module contains abstractions for interacting with the parameters section of given +config files. Namely it contains the `Parameters` class. +""" + +__all__ = ["Parameters"] + + import copy -import confuse -from numpy import ndarray -import logging -from . import model_info, NPI, utils import datetime -import xarray as xr +import logging import os -logger = logging.getLogger(__name__) +import confuse +import numpy as np +from numpy import ndarray +import pandas as pd -# TODO: it should work like -# import xarray as xr -# xr.DataArray(p_draw, dims=["parameter", "date", "subpop"], -# coords={"parameter":modinf.parameters.pnames, -# "date":pd.date_range(modinf.ti, modinf.tf, freq="D"), -# "subpop":modinf.subpop_struct.subpop_names}).sel(parameter="gamma_0").plot() +from . import NPI, utils -## TODO: ideally here path_prefix should not be used and all files loaded from modinf +logger = logging.getLogger(__name__) class Parameters: + """ + Encapsulates logic for loading, parsing, and summarizing parameter configurations. + + Attributes: + npar: The number of parameters contained within the given configuration. + pconfig: A view subsetting to the parameters section of a given config file. + pdata: A dictionary containing a processed and reformatted view of the `pconfig` + attribute. + pnames: The names of the parameters given. + pnames2index: A mapping parameter names to their location in the `pnames` + attribute. + stacked_modifier_method: A mapping of modifier method to the parameters to which + that modifier method is relevant for. + """ + # Minimal object to be easily picklable for // runs def __init__( self, @@ -35,6 +50,26 @@ def __init__( subpop_names: list, path_prefix: str = ".", ): + """ + Initialize a `Parameters` instance from a parameter config view. + + Args: + parameter_config: A view subsetting to the parameters section of a given + config file. + ti: An initial date. + tf: A final date. + subpop_names: A list of subpopulation names. + path_prefix: A file path prefix to use when reading in parameter values from + a dataframe like file. + + Raises: + ValueError: The parameter names for the SEIR model are not unique. + ValueError: The dataframe file found for a given parameter contains an + insufficient number of columns for the subpopulations being considered. + ValueError: The dataframe file found for a given parameter does not have + enough date entries to cover the time span being considered by the given + `ti` and `tf`. + """ self.pconfig = parameter_config self.pnames = [] self.npar = len(self.pnames) @@ -118,22 +153,59 @@ def __init__( logging.debug(f"NPI overlap operation is {self.stacked_modifier_method} ") def picklable_lamda_alpha(self): - """These two functions were lambda in __init__ before, it was more elegant. but as the object needs to be pickable, - we cannot use second order function, hence these ugly definitions""" + """ + Read the `alpha_val` attribute. + + This defunct method returns the `alpha_val` attribute of this class which is + never set by this class. If this method is called and the `alpha_val` attribute + is not set an AttributeError will be raised. + + Returns: + The `alpha_val` attribute. + """ return self.alpha_val def picklable_lamda_sigma(self): + """ + Read the `sigma_val` attribute. + + This defunct method returns the `sigma_val` attribute of this class which is + never set by this class. If this method is called and the `sigma_val` attribute + is not set an AttributeError will be raised. + + Returns: + The `sigma_val` attribute. + """ return self.sigma_val def get_pnames2pindex(self) -> dict: + """ + Read the `pnames2pindex` attribute. + + This redundant method returns the `pnames2pindex` attribute of this class. + + Returns: + A mapping parameter names to their location in the `pnames` attribute. + """ return self.pnames2pindex def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: """ - Returns all parameter in an array. These are drawn based on the seir::parameters section of the config, passed in as p_config. - :param n_days: number of time interval - :param nsubpops: number of spatial nodes - :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time (same value) + Format all parameters as a numpy array including sampling. + + The entries in the output array are filled based on the input given in the + parameters section of a yaml config file. If the given parameter is pulled from + a distribution rather than fixed the values will be pulled from that + distribution. If an appropriate value cannot be found for an entry then a + `np.nan` is returned. + + Args: + n_days: The number of days to generate an array for. + nsubpops: The number of subpopulations to generate an array for. + + Returns: + A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` + corresponds to the `npar` attribute of this class. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently @@ -148,12 +220,22 @@ def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> ndarray: """ - drop-in equivalent to param_quick_draw() that take a file as written parameter_write() - :param fname: - :param n_days: - :param nsubpops: - :param extension: - :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time. + Format all parameters as a numpy array including sampling and overrides. + + This method serves largely the same purpose as the `parameters_quick_draw`, but + has the ability to override the parameter specifications contained by this class + with a given dataframe. + + Args: + param_df: A dataframe containing the columns 'parameter' and 'value'. If + more than one entry for a given parameter is given then only the first + value will be taken. + n_days: The number of days to generate an array for. + nsubpops: The number of subpopulations to generate an array for. + + Returns: + A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` + corresponds to the `npar` attribute of this class. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently @@ -173,9 +255,19 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: """ - return parameters generated by parameters_quick_draw() as dataframe, just the first value as they are all similar. - :param p_draw: - :return: The dataframe (to be written to disk, or not) + Serialize a parameter draw as a pandas `DataFrame`. + + This method only considers distribution parameters and will pull the first + sample from the `p_draw` given. + + Args: + p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that + returned by `parameters_quick_draw`. + + Returns: + A pandas `DataFrame` with the columns 'parameter' and 'value' corresponding + to the parameter name and value as well as an index containing the parameter + name. """ # we don't write to disk time series parameters. out_df = pd.DataFrame( @@ -190,9 +282,15 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: """ Params reduced according to the NPI provided. - :param p_draw: array of shape (nparam, n_days, nsubpops) from p_draw - :param npi: NPI object with the reduction - :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time, reduced + + Args: + p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that + returned by `parameters_quick_draw`. + npi: An NPI object describing the parameter reduction to perform. + + Returns: + An array the same shape as `p_draw` with the prescribed reductions + performed. """ p_reduced = copy.deepcopy(p_draw) if npi is not None: From b914481db0d70a5c8056657a9c8ed89ccd43642a Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:00:35 -0400 Subject: [PATCH 02/31] Create unit testing utils for `confuse` package Wrote, documented, and unit tested `create_confuse_subview_from_dict` and `create_confuse_rootview_from_dict` functions to easily create confuse package objects from dicts for use in parametrized unit tests. --- flepimop/gempyor_pkg/src/gempyor/testing.py | 76 +++++++++++++++++++ .../test_create_confuse_rootview_from_dict.py | 32 ++++++++ .../test_create_confuse_subview_from_dict.py | 33 ++++++++ 3 files changed, 141 insertions(+) create mode 100644 flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py create mode 100644 flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py diff --git a/flepimop/gempyor_pkg/src/gempyor/testing.py b/flepimop/gempyor_pkg/src/gempyor/testing.py index 233ad45c5..bc8ab2343 100644 --- a/flepimop/gempyor_pkg/src/gempyor/testing.py +++ b/flepimop/gempyor_pkg/src/gempyor/testing.py @@ -5,10 +5,18 @@ the optional test dependencies must be installed. """ +__all__ = [ + "change_directory_to_temp_directory", + "create_confuse_rootview_from_dict", + "create_confuse_subview_from_dict", +] + from collections.abc import Generator import os from tempfile import TemporaryDirectory +from typing import Any +import confuse import pytest @@ -30,3 +38,71 @@ def change_directory_to_temp_directory() -> Generator[None, None, None]: yield os.chdir(current_dir) temp_dir.cleanup() + + +def create_confuse_rootview_from_dict(data: dict[str, Any]) -> confuse.RootView: + """ + Create a RootView from a dictionary for unit testing confuse parameters. + + Args: + data: The data to populate the confuse root view with. + + Returns: + A confuse root view. + + Examples: + >>> data = { + ... "foo": "bar", + ... "fizz": 123, + ... "alphabet": ["a", "b", "c"], + ... "mapping": {"x": 1, "y": 2}, + ... } + >>> rv = create_confuse_rootview_from_dict(data) + >>> rv + + >>> rv.keys() + ['foo', 'fizz', 'alphabet', 'mapping'] + >>> rv.get() + {'foo': 'bar', 'fizz': 123, 'alphabet': ['a', 'b', 'c'], 'mapping': {'x': 1, 'y': 2}} + >>> rv == rv.root() + True + >>> rv.name + 'root' + """ + return confuse.RootView([confuse.ConfigSource.of(data)]) + + +def create_confuse_subview_from_dict( + name: str, data: dict[str, Any] +) -> confuse.Subview: + """ + Create a Subview from a dictionary for unit testing confuse parameters. + + Args: + name: The name of the subview being created. + data: The data to populate the confuse subview with. + + Returns: + A confuse subview. + + Examples: + >>> data = { + ... "foo": "bar", + ... "fizz": 123, + ... "alphabet": ["a", "b", "c"], + ... "mapping": {"x": 1, "y": 2}, + ... } + >>> sv = create_confuse_subview_from_dict("params", data) + >>> sv + + >>> sv.keys() + ['foo', 'fizz', 'alphabet', 'mapping'] + >>> sv.get() + {'foo': 'bar', 'fizz': 123, 'alphabet': ['a', 'b', 'c'], 'mapping': {'x': 1, 'y': 2}} + >>> sv == sv.root() + False + >>> sv.name + 'params' + """ + root_view = create_confuse_rootview_from_dict({name: data}) + return root_view[name] diff --git a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py new file mode 100644 index 000000000..8a889fa6a --- /dev/null +++ b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py @@ -0,0 +1,32 @@ +from datetime import date +from typing import Any + +import confuse +import pytest + +from gempyor.testing import create_confuse_rootview_from_dict + + +class TestCreateConfuseRootviewFromDict: + @pytest.mark.parametrize( + "data", + [ + ({}), + ({"foo": "bar"}), + ({"a": "b", "c": 1}), + ( + { + "alphabet": ["a", "b", "c", "d", "e"], + "integers": [1, 2, 3, 4, 5], + "floats": [1.2, 2.3, 3.4, 4.5, 5.6], + } + ), + ({"as_of_date": date(2024, 1, 1)}), + ], + ) + def test_output_validation(self, data: dict[str, Any]) -> None: + root_view = create_confuse_rootview_from_dict(data) + assert isinstance(root_view, confuse.RootView) + assert root_view == root_view.root() + assert root_view.name == "root" + assert root_view.get() == data diff --git a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py new file mode 100644 index 000000000..20de677a3 --- /dev/null +++ b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py @@ -0,0 +1,33 @@ +from datetime import date +from typing import Any + +import confuse +import pytest + +from gempyor.testing import create_confuse_subview_from_dict + + +class TestCreateConfuseSubviewFromDict: + @pytest.mark.parametrize( + "name,data", + [ + ("nil", {}), + ("basic", {"foo": "bar"}), + ("small", {"a": "b", "c": 1}), + ( + "big", + { + "alphabet": ["a", "b", "c", "d", "e"], + "integers": [1, 2, 3, 4, 5], + "floats": [1.2, 2.3, 3.4, 4.5, 5.6], + }, + ), + ("date_data_type", {"as_of_date": date(2024, 1, 1)}), + ], + ) + def test_output_validation(self, name: str, data: dict[str, Any]) -> None: + root_view = create_confuse_subview_from_dict(name, data) + assert isinstance(root_view, confuse.Subview) + assert root_view != root_view.root() + assert root_view.name == name + assert root_view.get() == data From 500b212f709a69feffabaa04d79877f6968ba7cd Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:00:06 -0400 Subject: [PATCH 03/31] Unit test `Parameters` exceptions in init Initialized the unit tests for the `gempyor.parameters.Parameters` class with fixtures for the exceptions raised by the class' constructor. Left a note about the inability to reach one of the `ValueError`s, will have to revisit, either with a way to reach that exception or refactor the dead code. --- .../tests/parameters/test_parameters_class.py | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py new file mode 100644 index 000000000..f4c033e4a --- /dev/null +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -0,0 +1,112 @@ +from datetime import date + +import pandas as pd +import pytest +import re +from tempfile import NamedTemporaryFile + +from gempyor.parameters import Parameters +from gempyor.testing import create_confuse_subview_from_dict + + +class TestParameters: + # Taken straight from `config_sample_2pop.yml` + valid_parameters_subview = create_confuse_subview_from_dict( + "parameters", + {"sigma": {"value": 0.25}, "gamma": {"value": 0.2}, "Ro": {"value": 2.5}}, + ) + + def test_nonunique_parameter_names_value_error(self) -> None: + duplicated_parameters = create_confuse_subview_from_dict( + "parameters", + {"sigma": {"value": 0.1}, "gamma": {"value": 0.2}, "GAMMA": {"value": 0.3}}, + ) + with pytest.raises( + ValueError, + match=( + r"Parameters of the SEIR model have the same name " + r"\(remember that case is not sufficient\!\)" + ), + ): + Parameters( + duplicated_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 12, 31), + subpop_names=["1", "2"], + ) + + def test_timeseries_parameter_has_insufficient_columns_value_error(self) -> None: + param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + with NamedTemporaryFile(suffix=".csv") as temp_file: + param_df.to_csv(temp_file.name, index=False) + invalid_timeseries_parameters = create_confuse_subview_from_dict( + "parameters", {"sigma": {"timeseries": temp_file.name}} + ) + with pytest.raises( + ValueError, + match=( + rf"ERROR loading file {temp_file.name} for parameter sigma\: " + rf"the number of non 'date'\s+columns are 2, expected 3 " + rf"\(the number of subpops\) or one\." + ), + ): + Parameters( + invalid_timeseries_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2", "3"], + ) + + def test_timeseries_parameter_has_insufficient_dates_value_error(self) -> None: + # First way to get at this error, purely a length difference + param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + with NamedTemporaryFile(suffix=".csv") as temp_file: + param_df.to_csv(temp_file.name, index=False) + invalid_timeseries_parameters = create_confuse_subview_from_dict( + "parameters", {"sigma": {"timeseries": temp_file.name}} + ) + with pytest.raises( + ValueError, + match=( + rf"ERROR loading file {temp_file.name} for parameter sigma\:\s+" + rf"the \'date\' entries of the provided file do not include all the" + rf" days specified to be modeled by\s+the config\. the provided " + rf"file includes 5 days between 2024-01-01( 00\:00\:00)? to " + rf"2024-01-05( 00\:00\:00)?,\s+while there are 6 days in the config" + rf" time span of 2024-01-01->2024-01-06\. The file must contain " + rf"entries for the\s+the exact start and end dates from the " + rf"config\. " + ), + ): + Parameters( + invalid_timeseries_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 6), + subpop_names=["1", "2"], + ) + + # TODO: I'm not sure how to get to the second pathway to this error message. + # 1) We subset the read in dataframe to `ti` to `tf` so if the dataframe goes + # from 2024-01-01 through 2024-01-05 and the given date range is 2024-01-02 + # through 2024-01-06 the dataframe's date range will be subsetted to 2024-01-02 + # through 2024-01-05 which is a repeat of the above. + # 2) Because of the subsetting you can't provide anything except a monotonic + # increasing sequence of dates, pandas only allows subsetting on ordered date + # indexes so you'll get a different error. + # 3) If you provide a monotonic increasing sequence of dates but 'reverse' `ti` + # and `tf` you get no errors (which I think is also bad) because the slice + # operation returns an empty dataframe with the right columns & index and the + # `pd.date_range` function only creates monotonic increasing sequences and + # 0 == 0. From d21d80857a0e627c6958c5f235061930ec23f27f Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:51:08 -0400 Subject: [PATCH 04/31] Test fixture for `Parameters` attributes Wrote a unit test fixture for correctly instantiating the `Parameters` class as well as checking its documented attributes. Fixed typo in attributes documentation. --- .../gempyor_pkg/src/gempyor/parameters.py | 2 +- .../tests/parameters/test_parameters_class.py | 74 +++++++++++++++++-- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index da41fc394..bec04efcd 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -34,7 +34,7 @@ class Parameters: pdata: A dictionary containing a processed and reformatted view of the `pconfig` attribute. pnames: The names of the parameters given. - pnames2index: A mapping parameter names to their location in the `pnames` + pnames2pindex: A mapping parameter names to their location in the `pnames` attribute. stacked_modifier_method: A mapping of modifier method to the parameters to which that modifier method is relevant for. diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index f4c033e4a..dc942adef 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -1,8 +1,9 @@ from datetime import date +from functools import partial +import numpy as np import pandas as pd import pytest -import re from tempfile import NamedTemporaryFile from gempyor.parameters import Parameters @@ -10,12 +11,6 @@ class TestParameters: - # Taken straight from `config_sample_2pop.yml` - valid_parameters_subview = create_confuse_subview_from_dict( - "parameters", - {"sigma": {"value": 0.25}, "gamma": {"value": 0.2}, "Ro": {"value": 2.5}}, - ) - def test_nonunique_parameter_names_value_error(self) -> None: duplicated_parameters = create_confuse_subview_from_dict( "parameters", @@ -110,3 +105,68 @@ def test_timeseries_parameter_has_insufficient_dates_value_error(self) -> None: # operation returns an empty dataframe with the right columns & index and the # `pd.date_range` function only creates monotonic increasing sequences and # 0 == 0. + + def test_parameters_instance_attributes(self) -> None: + param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + with NamedTemporaryFile(suffix=".csv") as temp_file: + param_df.to_csv(temp_file.name, index=False) + valid_parameters = create_confuse_subview_from_dict( + "parameters", + { + "sigma": {"timeseries": temp_file.name}, + "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, + "Ro": { + "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} + }, + }, + ) + params = Parameters( + valid_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2"], + ) + assert params.npar == 3 + assert params.pconfig == valid_parameters + assert set(params.pdata.keys()) == {"sigma", "gamma", "Ro"} + assert set(params.pdata["sigma"].keys()) == { + "idx", + "ts", + "stacked_modifier_method", + } + assert params.pdata["sigma"]["idx"] == 0 + assert params.pdata["sigma"]["ts"].equals(param_df.set_index("date")) + assert params.pdata["sigma"]["stacked_modifier_method"] == "product" + assert set(params.pdata["gamma"].keys()) == { + "idx", + "dist", + "stacked_modifier_method", + } + assert params.pdata["gamma"]["idx"] == 1 + assert isinstance(params.pdata["gamma"]["dist"], partial) + assert params.pdata["gamma"]["dist"].func == np.random.uniform + assert params.pdata["gamma"]["dist"].args == (0.1234, 0.1234) + assert params.pdata["gamma"]["stacked_modifier_method"] == "sum" + assert set(params.pdata["Ro"].keys()) == { + "idx", + "dist", + "stacked_modifier_method", + } + assert params.pdata["Ro"]["idx"] == 2 + assert isinstance(params.pdata["Ro"]["dist"], partial) + assert params.pdata["Ro"]["dist"].func == np.random.uniform + assert params.pdata["Ro"]["dist"].args == (1.0, 2.0) + assert params.pdata["Ro"]["stacked_modifier_method"] == "product" + assert params.pnames == ["sigma", "gamma", "Ro"] + assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "Ro": 2} + assert params.stacked_modifier_method == { + "sum": ["gamma"], + "product": ["sigma", "ro"], + "reduction_product": [], + } From b3e58d00751637db544bddc29c251085fec3cb32 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Thu, 1 Aug 2024 10:11:38 -0400 Subject: [PATCH 05/31] Added `partials_are_similar` convenience function Added `partials_are_similar` to `gempyor.testing` for testing if `functools.partial` objects are similar enough for unit testing purposes along with corresponding documentation and unit tests. --- flepimop/gempyor_pkg/src/gempyor/testing.py | 48 ++++++++ .../testing/test_partials_are_similar.py | 112 ++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 flepimop/gempyor_pkg/tests/testing/test_partials_are_similar.py diff --git a/flepimop/gempyor_pkg/src/gempyor/testing.py b/flepimop/gempyor_pkg/src/gempyor/testing.py index bc8ab2343..8c015937e 100644 --- a/flepimop/gempyor_pkg/src/gempyor/testing.py +++ b/flepimop/gempyor_pkg/src/gempyor/testing.py @@ -12,6 +12,7 @@ ] from collections.abc import Generator +import functools import os from tempfile import TemporaryDirectory from typing import Any @@ -106,3 +107,50 @@ def create_confuse_subview_from_dict( """ root_view = create_confuse_rootview_from_dict({name: data}) return root_view[name] + + +def partials_are_similar( + f: functools.partial, + g: functools.partial, + check_func: bool = True, + check_args: bool = True, + check_keywords: bool = True, +) -> bool: + """ + Check if two partials are 'similar' enough to be equal. + + For most unit testing purposes python's default `__eq__` method does not have the + desired behavior for `functools.partial`. For unit testing purposes it is usually + sufficient that two partials are similar enough. See python/cpython#47814 for more + details on why `__eq__` is tricky for `functools.partial`. + + Args: + f: A partial function to test. + g: A partial function to test. + check_func: If the `func` attributes of `f` and `g` should be checked for + equality. + check_args: If the `args` attributes of `f` and `g` should be checked for + equality. + check_keywords: If the `keywords` attributes of `f` and `g` should be checked + for equality. + + Returns: + A boolean indicating if `f` and `g` are similar. + + Examples: + >>> from functools import partial + >>> a = lambda x, y: x + y + >>> b = partial(a, 1) + >>> c = partial(a, 1.) + >>> b == c + False + >>> partials_are_similar(b, c) + True + """ + if check_func and f.func != g.func: + return False + elif check_args and f.args != g.args: + return False + elif check_keywords and f.keywords != g.keywords: + return False + return True diff --git a/flepimop/gempyor_pkg/tests/testing/test_partials_are_similar.py b/flepimop/gempyor_pkg/tests/testing/test_partials_are_similar.py new file mode 100644 index 000000000..5693fa5b1 --- /dev/null +++ b/flepimop/gempyor_pkg/tests/testing/test_partials_are_similar.py @@ -0,0 +1,112 @@ +from functools import partial +from typing import Literal + +import pytest + +from gempyor.testing import partials_are_similar + + +def add_two_numbers(x: int | float, y: int | float) -> float: + return float(x) + float(y) + + +def combine_two_numbers( + x: int | float, y: int | float, how: Literal["sum", "product"] = "sum" +) -> float: + if how == "sum": + return float(x) + float(y) + return float(x) * float(y) + + +class TestPartialsAreSimilar: + @pytest.mark.parametrize( + "f,g,check_func,check_args,check_keywords", + [ + ( + partial(add_two_numbers, 2), + partial(add_two_numbers, 2), + True, + True, + True, + ), + ( + partial(add_two_numbers, 2), + partial(add_two_numbers, 2.0), + True, + True, + True, + ), + ( + partial(add_two_numbers, 2), + partial(add_two_numbers, 3), + True, + False, + True, + ), + ( + partial(add_two_numbers, 2), + partial(add_two_numbers, 3.0), + True, + False, + True, + ), + ( + partial(add_two_numbers, 2.0), + partial(combine_two_numbers, 2.0), + False, + True, + True, + ), + ( + partial(add_two_numbers, 2.0), + partial(combine_two_numbers, 3.0), + False, + False, + True, + ), + ( + partial(add_two_numbers, 2.0), + partial(combine_two_numbers, 2.0, how="product"), + False, + True, + False, + ), + ( + partial(combine_two_numbers, 2, how="sum"), + partial(combine_two_numbers, 2, how="product"), + True, + True, + False, + ), + ( + partial(combine_two_numbers, 2, how="sum"), + partial(combine_two_numbers, 2.0, how="product"), + True, + True, + False, + ), + ( + partial(combine_two_numbers, 2), + partial(combine_two_numbers, 2, how="sum"), + True, + True, + False, + ), + ], + ) + def test_output_validation( + self, + f: partial, + g: partial, + check_func: bool, + check_args: bool, + check_keywords: bool, + ) -> None: + assert f != g + assert partials_are_similar( + f, + g, + check_func=check_func, + check_args=check_args, + check_keywords=check_keywords, + ) From a375df3c0724a92e6a58171308bcbd86bd2023c4 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Thu, 1 Aug 2024 10:21:18 -0400 Subject: [PATCH 06/31] `partials_are_similar` in `Parameters` unit tests Switch from manually testing partials to using `partials_are_similar` in the `gempyor.parameters.Paramters` unit tests. --- flepimop/gempyor_pkg/src/gempyor/parameters.py | 1 - .../tests/parameters/test_parameters_class.py | 13 ++++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index bec04efcd..5fea04272 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -40,7 +40,6 @@ class Parameters: that modifier method is relevant for. """ - # Minimal object to be easily picklable for // runs def __init__( self, parameter_config: confuse.ConfigView, diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index dc942adef..2665837b3 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -7,7 +7,7 @@ from tempfile import NamedTemporaryFile from gempyor.parameters import Parameters -from gempyor.testing import create_confuse_subview_from_dict +from gempyor.testing import create_confuse_subview_from_dict, partials_are_similar class TestParameters: @@ -150,8 +150,10 @@ def test_parameters_instance_attributes(self) -> None: } assert params.pdata["gamma"]["idx"] == 1 assert isinstance(params.pdata["gamma"]["dist"], partial) - assert params.pdata["gamma"]["dist"].func == np.random.uniform - assert params.pdata["gamma"]["dist"].args == (0.1234, 0.1234) + assert partials_are_similar( + params.pdata["gamma"]["dist"], + partial(np.random.uniform, 0.1234, 0.1234), + ) assert params.pdata["gamma"]["stacked_modifier_method"] == "sum" assert set(params.pdata["Ro"].keys()) == { "idx", @@ -160,8 +162,9 @@ def test_parameters_instance_attributes(self) -> None: } assert params.pdata["Ro"]["idx"] == 2 assert isinstance(params.pdata["Ro"]["dist"], partial) - assert params.pdata["Ro"]["dist"].func == np.random.uniform - assert params.pdata["Ro"]["dist"].args == (1.0, 2.0) + assert partials_are_similar( + params.pdata["Ro"]["dist"], partial(np.random.uniform, 1.0, 2.0) + ) assert params.pdata["Ro"]["stacked_modifier_method"] == "product" assert params.pnames == ["sigma", "gamma", "Ro"] assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "Ro": 2} From f2be834cd7257c80383e9717a137721fd0bfe145 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Thu, 1 Aug 2024 10:34:10 -0400 Subject: [PATCH 07/31] Test fixtures for `picklable_lamda_alpha/sigma` Added test fixtures for the `picklable_lamda_alpha/sigma` methods of `gempyor.parameters.Parameters`. Also added some light inline comments for the unit tests. --- .../tests/parameters/test_parameters_class.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 2665837b3..1a970e5c1 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -107,6 +107,7 @@ def test_timeseries_parameter_has_insufficient_dates_value_error(self) -> None: # 0 == 0. def test_parameters_instance_attributes(self) -> None: + # Setup param_df = pd.DataFrame( data={ "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), @@ -132,8 +133,14 @@ def test_parameters_instance_attributes(self) -> None: tf=date(2024, 1, 5), subpop_names=["1", "2"], ) + + # The `npar` attribute assert params.npar == 3 + + # The `pconfig` attribute assert params.pconfig == valid_parameters + + # The `pdata` attribute assert set(params.pdata.keys()) == {"sigma", "gamma", "Ro"} assert set(params.pdata["sigma"].keys()) == { "idx", @@ -166,10 +173,56 @@ def test_parameters_instance_attributes(self) -> None: params.pdata["Ro"]["dist"], partial(np.random.uniform, 1.0, 2.0) ) assert params.pdata["Ro"]["stacked_modifier_method"] == "product" + + # The `pnames` attribute assert params.pnames == ["sigma", "gamma", "Ro"] + + # The `pnames2pindex` attribute assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "Ro": 2} + + # The `stacked_modifier_method` attribute assert params.stacked_modifier_method == { "sum": ["gamma"], "product": ["sigma", "ro"], "reduction_product": [], } + + def test_picklable_lamda_alpha_method(self) -> None: + # Setup + simple_parameters = create_confuse_subview_from_dict( + "parameters", {"sigma": {"value": 0.1}} + ) + params = Parameters( + simple_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 10), + subpop_names=["1", "2"], + ) + + # Attribute error if `alpha_val` is not set + with pytest.raises(AttributeError): + params.picklable_lamda_alpha() + + # We get the expected value when `alpha_val` is set + params.alpha_val = None + assert params.picklable_lamda_alpha() == None + + def test_picklable_lamda_sigma_method(self) -> None: + # Setup + simple_parameters = create_confuse_subview_from_dict( + "parameters", {"sigma": {"value": 0.1}} + ) + params = Parameters( + simple_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 10), + subpop_names=["1", "2"], + ) + + # Attribute error if `sigma_val` is not set + with pytest.raises(AttributeError): + params.picklable_lamda_sigma() + + # We get the expected value when `sigma_val` is set + params.sigma_val = None + assert params.picklable_lamda_sigma() == None From 25f583c2b921ce3d90da65164cd89596808041c9 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Thu, 1 Aug 2024 10:54:41 -0400 Subject: [PATCH 08/31] Added test fixture for `get_pnames2pindex` Added a test fixture for the `get_pnames2pindex` method of `gempyor.parameters.Parameters`. --- .../tests/parameters/test_parameters_class.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 1a970e5c1..908a04c06 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -226,3 +226,17 @@ def test_picklable_lamda_sigma_method(self) -> None: # We get the expected value when `sigma_val` is set params.sigma_val = None assert params.picklable_lamda_sigma() == None + + def test_get_pnames2pindex(self) -> None: + simple_parameters = create_confuse_subview_from_dict( + "parameters", + {"sigma": {"value": 0.1}, "gamma": {"value": 0.2}, "eta": {"value": 0.3}}, + ) + params = Parameters( + simple_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 10), + subpop_names=["1", "2"], + ) + assert params.get_pnames2pindex() == params.pnames2pindex + assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "eta": 2} From 5dec9dd2974d1dd40554977034aa93394e502551 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Thu, 1 Aug 2024 11:31:33 -0400 Subject: [PATCH 09/31] Test fixture for `parameters_quick_draw` Added a test fixture for `parameters_quick_draw` including coverage for the shape issues with time series parameters. Added a note to the documentation for `Parameters.parameters_quick_draw` about time series parameters. --- .../gempyor_pkg/src/gempyor/parameters.py | 6 ++ .../tests/parameters/test_parameters_class.py | 94 ++++++++++++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index 5fea04272..22b119d3f 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -205,6 +205,12 @@ def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: Returns: A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` corresponds to the `npar` attribute of this class. + + Note: + If any of the parameters are 'timeseries' type parameters then `n_days` and + `nsubpops` must be equal to the number of days between `ti` and `tf` given + when initializing this class and the number of subpopulations given to this + class via `subpop_names`. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 908a04c06..30cb67c4a 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -187,7 +187,7 @@ def test_parameters_instance_attributes(self) -> None: "reduction_product": [], } - def test_picklable_lamda_alpha_method(self) -> None: + def test_picklable_lamda_alpha(self) -> None: # Setup simple_parameters = create_confuse_subview_from_dict( "parameters", {"sigma": {"value": 0.1}} @@ -207,7 +207,7 @@ def test_picklable_lamda_alpha_method(self) -> None: params.alpha_val = None assert params.picklable_lamda_alpha() == None - def test_picklable_lamda_sigma_method(self) -> None: + def test_picklable_lamda_sigma(self) -> None: # Setup simple_parameters = create_confuse_subview_from_dict( "parameters", {"sigma": {"value": 0.1}} @@ -240,3 +240,93 @@ def test_get_pnames2pindex(self) -> None: ) assert params.get_pnames2pindex() == params.pnames2pindex assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "eta": 2} + + def test_parameters_quick_draw(self) -> None: + # First with a time series param, fixed size draws + param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + with NamedTemporaryFile(suffix=".csv") as temp_file: + param_df.to_csv(temp_file.name, index=False) + valid_parameters = create_confuse_subview_from_dict( + "parameters", + { + "sigma": {"timeseries": temp_file.name}, + "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, + "Ro": { + "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} + }, + }, + ) + params = Parameters( + valid_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2"], + ) + + # Test the exception + with pytest.raises( + ValueError, + match=( + r"could not broadcast input array from shape " + r"\(5\,2\) into shape \(4\,2\)" + ), + ): + params.parameters_quick_draw(4, 2) + + # Test our result + p_draw = params.parameters_quick_draw(5, 2) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (3, 5, 2) + assert np.allclose( + p_draw[0, :, :], + np.array([[1.2, 2.3], [2.3, 3.4], [3.4, 4.5], [4.5, 5.6], [5.6, 6.7]]), + ) + assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((5, 2))) + assert np.greater_equal(p_draw[2, :, :], 1.0).all() + assert np.less(p_draw[2, :, :], 2.0).all() + + # Second without a time series param, arbitrary sized draws + valid_parameters = create_confuse_subview_from_dict( + "parameters", + { + "eta": {"value": 2.2}, + "nu": { + "value": { + "distribution": "truncnorm", + "mean": 0.0, + "sd": 2.0, + "a": -2.0, + "b": 2.0, + } + }, + }, + ) + params = Parameters( + valid_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2"], + ) + + p_draw = params.parameters_quick_draw(5, 2) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (2, 5, 2) + assert np.allclose(p_draw[0, :, :], 2.2) + assert np.greater_equal(p_draw[1, :, :], -2.0).all() + assert np.less_equal(p_draw[1, :, :], 2.0).all() + + p_draw = params.parameters_quick_draw(4, 3) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (2, 4, 3) + assert np.allclose(p_draw[0, :, :], 2.2) + assert np.greater_equal(p_draw[1, :, :], -2.0).all() + assert np.less_equal(p_draw[1, :, :], 2.0).all() From 896023fa5cc7493801002275c04f924b3d6136a8 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:42:03 -0400 Subject: [PATCH 10/31] Test fixture for `Parameters.parameters_load` Added a test fixture for `Parameters.parameters_load` as well as add some to the fixture for `parameters_quick_draw`. Light documentation updates to the corresponding method to clarify conforming sizes with time series parameters. --- .../gempyor_pkg/src/gempyor/parameters.py | 8 +- .../tests/parameters/test_parameters_class.py | 128 ++++++++++++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index 22b119d3f..feeb480cb 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -232,7 +232,7 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> with a given dataframe. Args: - param_df: A dataframe containing the columns 'parameter' and 'value'. If + param_df: A DataFrame containing the columns 'parameter' and 'value'. If more than one entry for a given parameter is given then only the first value will be taken. n_days: The number of days to generate an array for. @@ -241,6 +241,12 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> Returns: A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` corresponds to the `npar` attribute of this class. + + Note: + If any of the parameters are 'timeseries' type parameters and are not being + overridden then `n_days` and `nsubpops` must be equal to the number of days + between `ti` and `tf` given when initializing this class and the number of + subpopulations given to this class via `subpop_names`. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 30cb67c4a..0035237f1 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -291,6 +291,7 @@ def test_parameters_quick_draw(self) -> None: assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((5, 2))) assert np.greater_equal(p_draw[2, :, :], 1.0).all() assert np.less(p_draw[2, :, :], 2.0).all() + assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) # Second without a time series param, arbitrary sized draws valid_parameters = create_confuse_subview_from_dict( @@ -322,6 +323,7 @@ def test_parameters_quick_draw(self) -> None: assert np.allclose(p_draw[0, :, :], 2.2) assert np.greater_equal(p_draw[1, :, :], -2.0).all() assert np.less_equal(p_draw[1, :, :], 2.0).all() + assert np.allclose(p_draw[1, :, :], p_draw[1, 0, 0]) p_draw = params.parameters_quick_draw(4, 3) assert isinstance(p_draw, np.ndarray) @@ -330,3 +332,129 @@ def test_parameters_quick_draw(self) -> None: assert np.allclose(p_draw[0, :, :], 2.2) assert np.greater_equal(p_draw[1, :, :], -2.0).all() assert np.less_equal(p_draw[1, :, :], 2.0).all() + assert np.allclose(p_draw[1, :, :], p_draw[1, 0, 0]) + + def test_parameters_load(self) -> None: + # Setup + param_overrides_df = pd.DataFrame( + {"parameter": ["nu", "gamma", "nu"], "value": [0.1, 0.2, 0.3]} + ) + param_empty_df = pd.DataFrame({"parameter": [], "value": []}) + + # With time series + param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + with NamedTemporaryFile(suffix=".csv") as temp_file: + param_df.to_csv(temp_file.name, index=False) + valid_parameters = create_confuse_subview_from_dict( + "parameters", + { + "sigma": {"timeseries": temp_file.name}, + "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, + "Ro": { + "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} + }, + }, + ) + params = Parameters( + valid_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2"], + ) + + # Test the exception + with pytest.raises( + ValueError, + match=( + r"could not broadcast input array from shape " + r"\(5\,2\) into shape \(4\,2\)" + ), + ): + params.parameters_load(param_empty_df, 4, 2) + + # Empty overrides + p_draw = params.parameters_load(param_empty_df, 5, 2) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (3, 5, 2) + assert np.allclose( + p_draw[0, :, :], + np.array([[1.2, 2.3], [2.3, 3.4], [3.4, 4.5], [4.5, 5.6], [5.6, 6.7]]), + ) + assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((5, 2))) + assert np.greater_equal(p_draw[2, :, :], 1.0).all() + assert np.less(p_draw[2, :, :], 2.0).all() + assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) + + # But if we override time series no exception + p_draw = params.parameters_load( + pd.DataFrame({"parameter": ["sigma"], "value": [12.34]}), 4, 2 + ) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (3, 4, 2) + assert np.allclose(p_draw[0, :, :], 12.34) + assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((4, 2))) + assert np.greater_equal(p_draw[2, :, :], 1.0).all() + assert np.less(p_draw[2, :, :], 2.0).all() + assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) + + # If not overriding time series then must conform + p_draw = params.parameters_load(param_overrides_df, 5, 2) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (3, 5, 2) + assert np.allclose( + p_draw[0, :, :], + np.array([[1.2, 2.3], [2.3, 3.4], [3.4, 4.5], [4.5, 5.6], [5.6, 6.7]]), + ) + assert np.allclose(p_draw[1, :, :], 0.2 * np.ones((5, 2))) + assert np.greater_equal(p_draw[2, :, :], 1.0).all() + assert np.less(p_draw[2, :, :], 2.0).all() + assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) + + # Without time series + valid_parameters = create_confuse_subview_from_dict( + "parameters", + { + "eta": {"value": 2.2}, + "nu": { + "value": { + "distribution": "truncnorm", + "mean": 0.0, + "sd": 2.0, + "a": -2.0, + "b": 2.0, + } + }, + }, + ) + params = Parameters( + valid_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2"], + ) + + # Takes an 'empty' DataFrame + p_draw = params.parameters_load(param_empty_df, 5, 2) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (2, 5, 2) + assert np.allclose(p_draw[0, :, :], 2.2) + assert np.greater_equal(p_draw[1, :, :], -2.0).all() + assert np.less_equal(p_draw[1, :, :], 2.0).all() + + # Takes a DataFrame with values, only takes the first + p_draw = params.parameters_load(param_overrides_df, 4, 3) + assert isinstance(p_draw, np.ndarray) + assert p_draw.dtype == np.float64 + assert p_draw.shape == (2, 4, 3) + assert np.allclose(p_draw[0, :, :], 2.2) + assert np.allclose(p_draw[1, :, :], 0.1) From 0fec2da18617324bbcc9b24fbd59ed71f4f9968b Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Fri, 2 Aug 2024 08:48:14 -0400 Subject: [PATCH 11/31] Test fixture for `Parameters.getParameterDF` Added a test fixutre for the `getParametersDF` method of `gempyor.parameters.Parameters`. --- .../gempyor_pkg/src/gempyor/parameters.py | 5 +-- .../tests/parameters/test_parameters_class.py | 43 +++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index feeb480cb..8a304066b 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -268,8 +268,8 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: """ Serialize a parameter draw as a pandas `DataFrame`. - This method only considers distribution parameters and will pull the first - sample from the `p_draw` given. + This method only considers distribution parameters, which does include fixed + parameters. Args: p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that @@ -287,7 +287,6 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: index=[pn for idx, pn in enumerate(self.pnames) if "dist" in self.pdata[pn]], ) out_df["parameter"] = out_df.index - return out_df def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 0035237f1..dba3f61fa 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -458,3 +458,46 @@ def test_parameters_load(self) -> None: assert p_draw.shape == (2, 4, 3) assert np.allclose(p_draw[0, :, :], 2.2) assert np.allclose(p_draw[1, :, :], 0.1) + + def test_getParameterDF(self) -> None: + param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + with NamedTemporaryFile(suffix=".csv") as temp_file: + param_df.to_csv(temp_file.name, index=False) + valid_parameters = create_confuse_subview_from_dict( + "parameters", + { + "sigma": {"timeseries": temp_file.name}, + "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, + "Ro": { + "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} + }, + }, + ) + params = Parameters( + valid_parameters, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2"], + ) + + # Create a quick sample + p_draw = params.parameters_quick_draw(5, 2) + df = params.getParameterDF(p_draw) + assert isinstance(df, pd.DataFrame) + assert df.shape == (2, 2) + assert df.columns.to_list() == ["value", "parameter"] + assert df["parameter"].to_list() == ["gamma", "Ro"] + values = df["value"].to_list() + assert values[0] == 0.1234 + assert values[1] >= 1.0 + assert values[1] < 2.0 + assert (df.index.to_series() == df["parameter"]).all() + + # Make clear that 'sigma' is not present because it's a time series + assert "sigma" not in df["parameter"].to_list() From 893fac286367d31fbb4378e5b8e325a9ef9d2c68 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Fri, 2 Aug 2024 09:15:44 -0400 Subject: [PATCH 12/31] Add note for `parameters_reduce` test fixture Did not implement a test fixture for the `parameters_reduce` method of `gempyor.parameters.Parameters` for now. Left a note explaining the blocker of getting a handle on the `NPI` module. --- .../gempyor_pkg/tests/parameters/test_parameters_class.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index dba3f61fa..66a1b1c6a 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -501,3 +501,8 @@ def test_getParameterDF(self) -> None: # Make clear that 'sigma' is not present because it's a time series assert "sigma" not in df["parameter"].to_list() + + def test_parameters_reduce(self) -> None: + # TODO: Come back and unit test this method after getting a better handle on + # these NPI objects. + pass From fe830b1675d3f9d1ca26a115b290b966a0b93622 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 5 Aug 2024 10:55:31 -0400 Subject: [PATCH 13/31] Parameterized insufficient dates test fixutre Parameterized `test_timeseries_parameter_has_insufficient_dates_value_error` test fixture by moving custom DataFrame to shared `MockData` class. --- .../tests/parameters/test_parameters_class.py | 46 ++++++++++++------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 66a1b1c6a..e9ab0cc0f 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -10,6 +10,16 @@ from gempyor.testing import create_confuse_subview_from_dict, partials_are_similar +class MockData: + simple_timeseries_param_df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + + class TestParameters: def test_nonunique_parameter_names_value_error(self) -> None: duplicated_parameters = create_confuse_subview_from_dict( @@ -58,38 +68,40 @@ def test_timeseries_parameter_has_insufficient_columns_value_error(self) -> None subpop_names=["1", "2", "3"], ) - def test_timeseries_parameter_has_insufficient_dates_value_error(self) -> None: + @pytest.mark.parametrize( + "start_date,end_date,timeseries_df", + [(date(2024, 1, 1), date(2024, 1, 6), MockData.simple_timeseries_param_df)], + ) + def test_timeseries_parameter_has_insufficient_dates_value_error( + self, start_date: date, end_date: date, timeseries_df: pd.DataFrame + ) -> None: # First way to get at this error, purely a length difference - param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } - ) with NamedTemporaryFile(suffix=".csv") as temp_file: - param_df.to_csv(temp_file.name, index=False) + timeseries_df.to_csv(temp_file.name, index=False) invalid_timeseries_parameters = create_confuse_subview_from_dict( "parameters", {"sigma": {"timeseries": temp_file.name}} ) + timeseries_start_date = timeseries_df["date"].dt.date.min() + timeseries_end_date = timeseries_df["date"].dt.date.max() + subpop_names = [c for c in timeseries_df.columns.to_list() if c != "date"] with pytest.raises( ValueError, match=( rf"ERROR loading file {temp_file.name} for parameter sigma\:\s+" rf"the \'date\' entries of the provided file do not include all the" rf" days specified to be modeled by\s+the config\. the provided " - rf"file includes 5 days between 2024-01-01( 00\:00\:00)? to " - rf"2024-01-05( 00\:00\:00)?,\s+while there are 6 days in the config" - rf" time span of 2024-01-01->2024-01-06\. The file must contain " - rf"entries for the\s+the exact start and end dates from the " - rf"config\. " + rf"file includes 5 days between {timeseries_start_date}" + rf"( 00\:00\:00)? to {timeseries_end_date}( 00\:00\:00)?,\s+while " + rf"there are 6 days in the config time span of {start_date}->" + rf"{end_date}\. The file must contain entries for the\s+the exact " + rf"start and end dates from the config\. " ), ): Parameters( invalid_timeseries_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 6), - subpop_names=["1", "2"], + ti=start_date, + tf=end_date, + subpop_names=subpop_names, ) # TODO: I'm not sure how to get to the second pathway to this error message. From c9a64be27bfab646497f91c5928b4d094d0c2a72 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:52:34 -0400 Subject: [PATCH 14/31] Add `partials_are_similar` to `__all__` Export the `partials_are_similar` function from the `gempyor.testing` module using the `__all__` dunder. --- flepimop/gempyor_pkg/src/gempyor/testing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flepimop/gempyor_pkg/src/gempyor/testing.py b/flepimop/gempyor_pkg/src/gempyor/testing.py index 8c015937e..ae6a0088c 100644 --- a/flepimop/gempyor_pkg/src/gempyor/testing.py +++ b/flepimop/gempyor_pkg/src/gempyor/testing.py @@ -9,6 +9,7 @@ "change_directory_to_temp_directory", "create_confuse_rootview_from_dict", "create_confuse_subview_from_dict", + "partials_are_similar", ] from collections.abc import Generator From 2aa7267700eabadb0ef88820f85cfddd72dea797 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 5 Aug 2024 16:51:23 -0400 Subject: [PATCH 15/31] Added `random_distribution_sampler` to `utils` Added a function with the same core logic as `gempyor.utils.as_random_distribution` that is decoupled from the `confuse.ConfigView` class along with corresponding documentation and tests. --- flepimop/gempyor_pkg/src/gempyor/utils.py | 77 +++++++++++++++++- .../utils/test_random_distribution_sampler.py | 80 +++++++++++++++++++ 2 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 flepimop/gempyor_pkg/tests/utils/test_random_distribution_sampler.py diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py index 6131b5c52..f5062c190 100644 --- a/flepimop/gempyor_pkg/src/gempyor/utils.py +++ b/flepimop/gempyor_pkg/src/gempyor/utils.py @@ -7,7 +7,7 @@ import shutil import subprocess import time -from typing import List, Dict, Literal +from typing import Any, Callable, Dict, List, Literal import confuse import numpy as np @@ -299,6 +299,81 @@ def get_log_normal( return scipy.stats.lognorm(s=sdlog, scale=np.exp(meanlog), loc=0) +def random_distribution_sampler( + distribution: Literal[ + "fixed", "uniform", "poisson", "binomial", "truncnorm", "lognorm" + ], + **kwargs: dict[str, Any] +) -> Callable[[], float | int]: + """ + Create function to sample from a random distribution. + + Args: + distribution: The type of distribution to generate a sampling function for. + **kwargs: Further parameters that are passed to the underlying function for the + given distribution. + + Notes: + The further args expected by each distribution type are: + - fixed: value, + - uniform: low, high, + - poisson: lam, + - binomial: n, p, + - truncnorm: mean, sd, a, b, + - lognorm: meanlog, sdlog. + + Returns: + A function that can be called to sample from that distribution. + + Raises: + ValueError: If `distribution` is 'binomial' the given `p` must be in (0,1). + NotImplementedError: If `distribution` is not one of the type hinted options. + + Examples: + >>> import numpy as np + >>> np.random.seed(123) + >>> uniform_sampler = random_distribution_sampler("uniform", low=0.0, high=3.0) + >>> uniform_sampler() + 2.089407556793585 + >>> uniform_sampler() + 0.8584180048511384 + """ + if distribution == "fixed": + # Fixed value is the same as uniform on [a, a) + return functools.partial( + np.random.uniform, + kwargs.get("value"), + kwargs.get("value"), + ) + elif distribution == "uniform": + # Uniform on [low, high) + return functools.partial( + np.random.uniform, + kwargs.get("low"), + kwargs.get("high"), + ) + elif distribution == "poisson": + # Poisson with mean lambda + return functools.partial(np.random.poisson, kwargs.get("lam")) + elif distribution == "binomial": + p = kwargs.get("p") + if not (0 < p < 1): + raise ValueError(f"p value {p} is out of range [0,1]") + return functools.partial(np.random.binomial, kwargs.get("n"), p) + elif distribution == "truncnorm": + # Truncated normal with mean, sd on interval [a, b] + return get_truncated_normal( + mean=kwargs.get("mean"), + sd=kwargs.get("sd"), + a=kwargs.get("a"), + b=kwargs.get("b"), + ).rvs + elif distribution == "lognorm": + # Lognormal distribution with meanlog, sdlog + return get_log_normal(kwargs.get("meanlog"), kwargs.get("sdlog")).rvs + raise NotImplementedError(f"unknown distribution [got: {distribution}]") + + @add_method(confuse.ConfigView) def as_random_distribution(self): "Constructs a random distribution object from a distribution config key" diff --git a/flepimop/gempyor_pkg/tests/utils/test_random_distribution_sampler.py b/flepimop/gempyor_pkg/tests/utils/test_random_distribution_sampler.py new file mode 100644 index 000000000..64efc98b1 --- /dev/null +++ b/flepimop/gempyor_pkg/tests/utils/test_random_distribution_sampler.py @@ -0,0 +1,80 @@ +from functools import partial +import inspect +from typing import Any + +import numpy as np +import pytest + +from gempyor.testing import partials_are_similar +from gempyor.utils import random_distribution_sampler + + +class TestRandomDistributionSampler: + @pytest.mark.parametrize("distribution", [("abc"), ("def"), ("ghi")]) + def test_not_implemented_error_exception(self, distribution: str) -> None: + with pytest.raises( + NotImplementedError, + match=rf"^unknown distribution \[got\: {distribution}\]$", + ): + random_distribution_sampler(distribution) + + @pytest.mark.parametrize("p", [(-0.5), (1.2), (0.0), (1.0)]) + def test_binomial_p_value_error(self, p: float) -> None: + with pytest.raises( + ValueError, + match=rf"^p value {p} is out of range \[0\,1\]$", + ): + random_distribution_sampler("binomial", n=100, p=p) + + @pytest.mark.parametrize( + "distribution,kwargs", + [ + ("fixed", {"value": 0.12}), + ("fixed", {"value": -3.45}), + ("fixed", {"value": 0.0}), + ("uniform", {"low": 0.0, "high": 1.0}), + ("uniform", {"low": 50.0, "high": 200.0}), + ("uniform", {"low": -1.0, "high": 1.0}), + ("uniform", {"low": 1.0, "high": -1.0}), + ("poisson", {"lam": 0.1}), + ("poisson", {"lam": 1.23}), + ("poisson", {"lam": -0.1}), + ("binomial", {"n": 10, "p": 0.1}), + ("binomial", {"n": -10, "p": 0.1}), + ("binomial", {"n": 50, "p": 0.67}), + ("truncnorm", {"mean": 0.0, "sd": 1.0, "a": -2.0, "b": 2.0}), + ("truncnorm", {"mean": 1.4, "sd": 0.34, "a": -0.3, "b": 22.8}), + ("lognorm", {"sdlog": 1.0, "meanlog": 1.0}), + ("lognorm", {"sdlog": 3.4, "meanlog": -0.56}), + ], + ) + def test_output_validation(self, distribution: str, kwargs: dict[str, Any]) -> None: + actual = random_distribution_sampler(distribution, **kwargs) + if distribution == "fixed": + expected = partial( + np.random.uniform, kwargs.get("value"), kwargs.get("value") + ) + assert partials_are_similar(actual, expected) + elif distribution == "uniform": + expected = partial(np.random.uniform, kwargs.get("low"), kwargs.get("high")) + assert partials_are_similar(actual, expected) + elif distribution == "poisson": + expected = partial(np.random.poisson, kwargs.get("lam")) + assert partials_are_similar(actual, expected) + elif distribution == "binomial": + expected = partial(np.random.binomial, kwargs.get("n"), kwargs.get("p")) + assert partials_are_similar(actual, expected) + elif distribution == "truncnorm": + assert inspect.ismethod(actual) + assert actual.__self__.kwds.get("loc") == kwargs.get("mean") + assert actual.__self__.kwds.get("scale") == kwargs.get("sd") + assert actual.__self__.a == ( + kwargs.get("a") - kwargs.get("mean") + ) / kwargs.get("sd") + assert actual.__self__.b == ( + kwargs.get("b") - kwargs.get("mean") + ) / kwargs.get("sd") + elif distribution == "lognorm": + assert inspect.ismethod(actual) + assert actual.__self__.kwds.get("s") == kwargs.get("sdlog") + assert actual.__self__.kwds.get("scale") == np.exp(kwargs.get("meanlog")) From 7822ce9d9669b2ff2c24986ed84f9908dbf41fbc Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 5 Aug 2024 16:52:58 -0400 Subject: [PATCH 16/31] Parameterized `Parameters` attributes test fixture Decoupled the `test_parameters_instance_attributes` test fixture from a specific set of inputs and generalized it to accept arbitrary factories that generate valid input sets for `Parameters`. --- .../tests/parameters/test_parameters_class.py | 209 +++++++++++------- 1 file changed, 135 insertions(+), 74 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index e9ab0cc0f..8a6c553a9 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -1,6 +1,9 @@ from datetime import date from functools import partial +import pathlib +from typing import Any, Callable +import confuse import numpy as np import pandas as pd import pytest @@ -8,6 +11,7 @@ from gempyor.parameters import Parameters from gempyor.testing import create_confuse_subview_from_dict, partials_are_similar +from gempyor.utils import random_distribution_sampler class MockData: @@ -20,6 +24,37 @@ class MockData: ) +def valid_parameters_factory( + tmp_path: pathlib.Path, +) -> tuple[dict[str, pd.DataFrame], dict[str, dict[str, Any]]]: + """ + Factory for creating small and valid set of parameters. + + Creates the configuration for three parameters: + - 'sigma': A time series, + - 'gamma': A fixed value of 0.1234 with a sum stacked modifier, + - 'Ro': A uniform distribution between 1 and 2. + + Args: + tmp_path: A temporary file path, typically provided by pytest's `tmp_path` + fixture. + + Returns: + A tuple of a dictionary of pandas DataFrames where the keys are the parameter + names and the values are time series values and a dictionary of configuration + values that can be converted to a confuse subview. + """ + tmp_file = tmp_path / "valid_parameters_factory_df.csv" + df = MockData.simple_timeseries_param_df.copy() + df.to_csv(tmp_file, index=False) + params = { + "sigma": {"timeseries": str(tmp_file.absolute())}, + "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, + "Ro": {"value": {"distribution": "uniform", "low": 1.0, "high": 2.0}}, + } + return [{"sigma": df}, params] + + class TestParameters: def test_nonunique_parameter_names_value_error(self) -> None: duplicated_parameters = create_confuse_subview_from_dict( @@ -118,86 +153,112 @@ def test_timeseries_parameter_has_insufficient_dates_value_error( # `pd.date_range` function only creates monotonic increasing sequences and # 0 == 0. - def test_parameters_instance_attributes(self) -> None: + @pytest.mark.parametrize("factory", [(valid_parameters_factory)]) + def test_parameters_instance_attributes( + self, + tmp_path: pathlib.Path, + factory: Callable[ + [pathlib.Path], tuple[dict[str, pd.DataFrame], dict[str, dict[str, Any]]] + ], + ) -> None: # Setup - param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } + timeseries_dfs, param_config = factory(tmp_path) + if timeseries_dfs: + start_date = None + end_date = None + subpop_names = [] + for _, df in timeseries_dfs.items(): + df_start_date = df["date"].dt.date.min() + if start_date is None or df_start_date < start_date: + start_date = df_start_date + df_end_date = df["date"].dt.date.max() + if end_date is None or df_end_date > end_date: + end_date = df_end_date + if df.shape[1] > 2: + subpop_names += [c for c in df.columns.to_list() if c != "date"] + if not subpop_names: + subpop_names = ["1", "2"] # filler value if all time series are 1 value + else: + start_date = date(2024, 1, 1) + end_date = date(2024, 1, 5) + subpop_names = ["1", "2"] + + valid_parameters = create_confuse_subview_from_dict("parameters", param_config) + params = Parameters( + valid_parameters, + ti=start_date, + tf=end_date, + subpop_names=subpop_names, ) - with NamedTemporaryFile(suffix=".csv") as temp_file: - param_df.to_csv(temp_file.name, index=False) - valid_parameters = create_confuse_subview_from_dict( - "parameters", - { - "sigma": {"timeseries": temp_file.name}, - "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, - "Ro": { - "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} - }, - }, - ) - params = Parameters( - valid_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2"], - ) - - # The `npar` attribute - assert params.npar == 3 - - # The `pconfig` attribute - assert params.pconfig == valid_parameters - # The `pdata` attribute - assert set(params.pdata.keys()) == {"sigma", "gamma", "Ro"} - assert set(params.pdata["sigma"].keys()) == { - "idx", - "ts", - "stacked_modifier_method", - } - assert params.pdata["sigma"]["idx"] == 0 - assert params.pdata["sigma"]["ts"].equals(param_df.set_index("date")) - assert params.pdata["sigma"]["stacked_modifier_method"] == "product" - assert set(params.pdata["gamma"].keys()) == { - "idx", - "dist", - "stacked_modifier_method", - } - assert params.pdata["gamma"]["idx"] == 1 - assert isinstance(params.pdata["gamma"]["dist"], partial) - assert partials_are_similar( - params.pdata["gamma"]["dist"], - partial(np.random.uniform, 0.1234, 0.1234), - ) - assert params.pdata["gamma"]["stacked_modifier_method"] == "sum" - assert set(params.pdata["Ro"].keys()) == { - "idx", - "dist", - "stacked_modifier_method", - } - assert params.pdata["Ro"]["idx"] == 2 - assert isinstance(params.pdata["Ro"]["dist"], partial) - assert partials_are_similar( - params.pdata["Ro"]["dist"], partial(np.random.uniform, 1.0, 2.0) - ) - assert params.pdata["Ro"]["stacked_modifier_method"] == "product" + # The `npar` attribute + assert params.npar == len(param_config) + + # The `pconfig` attribute + assert params.pconfig == valid_parameters + + # The `pdata` attribute + assert set(params.pdata.keys()) == set(param_config.keys()) + for param_name, param_conf in param_config.items(): + assert params.pdata[param_name]["idx"] == params.pnames2pindex[param_name] + assert params.pdata[param_name][ + "stacked_modifier_method" + ] == param_conf.get("stacked_modifier_method", "product") + if "timeseries" in param_conf: + assert params.pdata[param_name]["ts"].equals( + timeseries_dfs[param_name].set_index("date") + ) + elif isinstance(params.pdata[param_name]["dist"], partial): + if isinstance(param_conf.get("value"), float): + expected = random_distribution_sampler( + "fixed", value=param_conf.get("value") + ) + else: + expected = random_distribution_sampler( + param_conf.get("value").get("distribution"), + **{ + k: v + for k, v in param_conf.get("value").items() + if k != "distribution" + }, + ) + assert partials_are_similar(params.pdata[param_name]["dist"], expected) + else: + expected = random_distribution_sampler( + param_conf.get("value").get("distribution"), + **{ + k: v + for k, v in param_conf.get("value").items() + if k != "distribution" + }, + ) + assert ( + params.pdata[param_name]["dist"].__self__.kwds + == expected.__self__.kwds + ) + assert ( + params.pdata[param_name]["dist"].__self__.support() + == expected.__self__.support() + ) - # The `pnames` attribute - assert params.pnames == ["sigma", "gamma", "Ro"] + # The `pnames` attribute + assert params.pnames == list(param_config.keys()) - # The `pnames2pindex` attribute - assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "Ro": 2} + # The `pnames2pindex` attribute + assert params.pnames2pindex == { + key: idx for idx, key in enumerate(param_config.keys()) + } - # The `stacked_modifier_method` attribute - assert params.stacked_modifier_method == { - "sum": ["gamma"], - "product": ["sigma", "ro"], - "reduction_product": [], - } + # # The `stacked_modifier_method` attribute + expected_stacked_modifier_method = { + "sum": [], + "product": [], + "reduction_product": [], + } + for param_name, param_conf in param_config.items(): + modifier_type = param_conf.get("stacked_modifier_method", "product") + expected_stacked_modifier_method[modifier_type].append(param_name.lower()) + assert params.stacked_modifier_method == expected_stacked_modifier_method def test_picklable_lamda_alpha(self) -> None: # Setup From cb95001ef282d7ee4b08ef63c941de8ef62c5baf Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:04:36 -0400 Subject: [PATCH 17/31] Parametrize accessor methods of `Parameters` Parametrized the `test_picklable_lamda_alpha`, `test_picklable_lamda_sigma`, and `test_get_pnames2pindex` test fixtures to accept arbitrary valid inputs. --- .../tests/parameters/test_parameters_class.py | 82 +++++++++++-------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 8a6c553a9..98342c5f4 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -23,6 +23,25 @@ class MockData: } ) + simple_inputs = { + "parameter_config": create_confuse_subview_from_dict( + "parameters", {"sigma": {"value": 0.1}} + ), + "ti": date(2024, 1, 1), + "tf": date(2024, 1, 10), + "subpop_names": ["1", "2", "3"], + } + + small_inputs = { + "parameter_config": create_confuse_subview_from_dict( + "parameters", + {"sigma": {"value": 0.1}, "gamma": {"value": 0.2}, "eta": {"value": 0.3}}, + ), + "ti": date(2024, 1, 1), + "tf": date(2024, 1, 31), + "subpop_names": ["1", "2"], + } + def valid_parameters_factory( tmp_path: pathlib.Path, @@ -260,59 +279,50 @@ def test_parameters_instance_attributes( expected_stacked_modifier_method[modifier_type].append(param_name.lower()) assert params.stacked_modifier_method == expected_stacked_modifier_method - def test_picklable_lamda_alpha(self) -> None: + @pytest.mark.parametrize( + "parameters_inputs,alpha_val", [(MockData.simple_inputs, None)] + ) + def test_picklable_lamda_alpha( + self, parameters_inputs: dict[str, Any], alpha_val: Any + ) -> None: # Setup - simple_parameters = create_confuse_subview_from_dict( - "parameters", {"sigma": {"value": 0.1}} - ) - params = Parameters( - simple_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 10), - subpop_names=["1", "2"], - ) + params = Parameters(**parameters_inputs) # Attribute error if `alpha_val` is not set with pytest.raises(AttributeError): params.picklable_lamda_alpha() # We get the expected value when `alpha_val` is set - params.alpha_val = None - assert params.picklable_lamda_alpha() == None + params.alpha_val = alpha_val + assert params.picklable_lamda_alpha() == alpha_val - def test_picklable_lamda_sigma(self) -> None: + @pytest.mark.parametrize( + "parameters_inputs,sigma_val", [(MockData.simple_inputs, None)] + ) + def test_picklable_lamda_sigma( + self, parameters_inputs: dict[str, Any], sigma_val: Any + ) -> None: # Setup - simple_parameters = create_confuse_subview_from_dict( - "parameters", {"sigma": {"value": 0.1}} - ) - params = Parameters( - simple_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 10), - subpop_names=["1", "2"], - ) + params = Parameters(**parameters_inputs) # Attribute error if `sigma_val` is not set with pytest.raises(AttributeError): params.picklable_lamda_sigma() # We get the expected value when `sigma_val` is set - params.sigma_val = None - assert params.picklable_lamda_sigma() == None + params.sigma_val = sigma_val + assert params.picklable_lamda_sigma() == sigma_val - def test_get_pnames2pindex(self) -> None: - simple_parameters = create_confuse_subview_from_dict( - "parameters", - {"sigma": {"value": 0.1}, "gamma": {"value": 0.2}, "eta": {"value": 0.3}}, - ) - params = Parameters( - simple_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 10), - subpop_names=["1", "2"], - ) + @pytest.mark.parametrize( + "parameters_inputs", [(MockData.simple_inputs), (MockData.small_inputs)] + ) + def test_get_pnames2pindex(self, parameters_inputs: dict[str, Any]) -> None: + params = Parameters(**parameters_inputs) assert params.get_pnames2pindex() == params.pnames2pindex - assert params.pnames2pindex == {"sigma": 0, "gamma": 1, "eta": 2} + assert params.pnames2pindex == { + key: idx + for idx, key in enumerate(parameters_inputs["parameter_config"].keys()) + } def test_parameters_quick_draw(self) -> None: # First with a time series param, fixed size draws From 7e88353d2f934d8637585542ba3c16325860b3d7 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 11:51:20 -0400 Subject: [PATCH 18/31] Added `sample_fits_distribution` testing util Added a function to `gempyor.testing` to determine if a given value is supported by a given distribution and its paramters. It does not test if said sample is plausible or not. --- flepimop/gempyor_pkg/src/gempyor/testing.py | 77 ++++++++++++++++++- .../testing/test_sample_fits_distribution.py | 77 +++++++++++++++++++ 2 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 flepimop/gempyor_pkg/tests/testing/test_sample_fits_distribution.py diff --git a/flepimop/gempyor_pkg/src/gempyor/testing.py b/flepimop/gempyor_pkg/src/gempyor/testing.py index ae6a0088c..56f92c3eb 100644 --- a/flepimop/gempyor_pkg/src/gempyor/testing.py +++ b/flepimop/gempyor_pkg/src/gempyor/testing.py @@ -10,15 +10,17 @@ "create_confuse_rootview_from_dict", "create_confuse_subview_from_dict", "partials_are_similar", + "sample_fits_distribution", ] from collections.abc import Generator import functools import os from tempfile import TemporaryDirectory -from typing import Any +from typing import Any, Literal import confuse +import numpy as np import pytest @@ -155,3 +157,76 @@ def partials_are_similar( elif check_keywords and f.keywords != g.keywords: return False return True + + +def sample_fits_distribution( + sample: float | int, + distribution: Literal[ + "fixed", "uniform", "poisson", "binomial", "truncnorm", "lognorm" + ], + **kwargs: dict[str, Any], +) -> bool: + """ + Test if a sample fits a distribution with a given set of parameters. + + This function tests if the given `sample` could possibly be drawn from the + distribution given with its parameters, but it does not test if it could reasonably + be drawn from that distribution. + + Args: + sample: The value to test. + distribution: The name of the distribution to test against. + **kwargs: Further arguments to specify the parameters of a distribution. + + Returns: + A boolean indicating if the sample given could be from the distribution. + + See Also: + gempyor.utils.random_distribution_sampler + + Examples: + >>> sample_fits_distribution(0.0, "fixed", value=0.0) + True + >>> sample_fits_distribution(0.0, "fixed", value=0.5) + False + >>> sample_fits_distribution(0.5, "poisson", lam=3.0) + False + >>> sample_fits_distribution( + ... -3.5, "truncnorm", a=-5.5, b=3.4, mean=-1.4, sd=1.1 + ... ) + True + >>> sample_fits_distribution(100000000, "lognorm", meanlog=1.0, sdlog=1.0) + True + """ + # Poisson and binomial only have support on a subset of the integers + if distribution in ["poisson", "binomial"] and not ( + isinstance(sample, int) or (isinstance(sample, float) and sample.is_integer()) + ): + return False + # Now check distribution constraints + if distribution == "fixed": + return bool(np.isclose(sample, kwargs.get("value"))) + elif distribution == "uniform": + # Uniform is on [low,high), but want uniform to match fixed when low == high. + return bool( + ( + np.isclose(kwargs.get("high"), kwargs.get("low")) + and np.isclose(sample, kwargs.get("low")) + ) + or ( + np.greater_equal(sample, kwargs.get("low")) + and np.less(sample, kwargs.get("high")) + ) + ) + elif distribution == "poisson": + return bool(np.greater_equal(sample, 0.0)) + elif distribution == "binomial": + return bool( + np.greater_equal(sample, 0.0) and np.less_equal(sample, kwargs.get("n")) + ) + elif distribution == "truncnorm": + return bool( + np.greater(sample, kwargs.get("a")) and np.less(sample, kwargs.get("b")) + ) + elif distribution == "lognorm": + return bool(np.greater(sample, 0.0)) diff --git a/flepimop/gempyor_pkg/tests/testing/test_sample_fits_distribution.py b/flepimop/gempyor_pkg/tests/testing/test_sample_fits_distribution.py new file mode 100644 index 000000000..e88867e5f --- /dev/null +++ b/flepimop/gempyor_pkg/tests/testing/test_sample_fits_distribution.py @@ -0,0 +1,77 @@ +from typing import Any, Literal + +import pytest + +from gempyor.testing import sample_fits_distribution + + +class TestSampleFitsDistribution: + @pytest.mark.parametrize( + "sample,distribution,kwargs,expected", + [ + # Fixed distribution + (0.5, "fixed", {"value": 0.5}, True), + (0.5, "fixed", {"value": 0.6}, False), + (1, "fixed", {"value": 1}, True), + (1.0, "fixed", {"value": 1.0}, True), + (1, "fixed", {"value": 1.0}, True), + (1.0, "fixed", {"value": 1}, True), + (0.0000001, "fixed", {"value": 0.0}, False), + (0.00000001, "fixed", {"value": 0.0}, True), + # Uniform distribution + (0.5, "uniform", {"low": 0.5, "high": 0.5}, True), + (0.5, "uniform", {"low": 0.0, "high": 1.0}, True), + (0.0, "uniform", {"low": 0.0, "high": 1.0}, True), + (1.0, "uniform", {"low": 0.0, "high": 1.0}, False), + (-0.1, "uniform", {"low": 0.0, "high": 1.0}, False), + # Poisson distribution + (0.5, "poisson", {"lam": 1.0}, False), + (1.0, "poisson", {"lam": 1.5}, True), + (1, "poisson", {"lam": 1.5}, True), + (-1.0, "poisson", {"lam": 1.0}, False), + (-1, "poisson", {"lam": 1.0}, False), + (9999.0, "poisson", {"lam": 0.1}, True), # Extremely unlikely + # Binomial distribution + (0.5, "binomial", {"n": 10, "p": 0.5}, False), + (1.0, "binomial", {"n": 10, "p": 0.5}, True), + (1, "binomial", {"n": 10, "p": 0.5}, True), + (-1.0, "binomial", {"n": 5, "p": 0.75}, False), + (-1, "binomial", {"n": 5, "p": 0.75}, False), + (0, "binomial", {"n": 45, "p": 0.1}, True), + (0.0, "binomial", {"n": 45, "p": 0.1}, True), + (1000.0, "binomial", {"n": 1000, "p": 0.001}, True), # Extremely unlikely + (0, "binomial", {"n": 1000, "p": 0.999}, True), + # Truncated normal distribution + (-0.5, "truncnorm", {"a": -3.0, "b": 3.0, "mean": 0.0, "sd": 1.0}, True), + (-3.5, "truncnorm", {"a": -3.0, "b": 3.0, "mean": 0.0, "sd": 1.0}, False), + (3.1, "truncnorm", {"a": -3.0, "b": 3.0, "mean": 0.0, "sd": 1.0}, False), + ( # Extremely unlikely + 99.9, + "truncnorm", + {"a": -100.0, "b": 100.0, "mean": 0.0, "sd": 1.0}, + True, + ), + # Log-normal distribution + (1.1, "lognorm", {"meanlog": 1.0, "sdlog": 1.0}, True), + (-0.5, "lognorm", {"meanlog": 2.0, "sdlog": 2.0}, False), + (-7.8, "lognorm", {"meanlog": 3.4, "sdlog": 5.6}, False), + (0.0, "lognorm", {"meanlog": 1.2, "sdlog": 3.4}, False), + (0.0000001, "lognorm", {"meanlog": 1.2, "sdlog": 3.4}, True), + ( # Extremely unlikely + 99999.9, + "lognorm", + {"meanlog": 1.2, "sdlog": 3.4}, + True, + ), + ], + ) + def test_output_validation( + self, + sample: float | int, + distribution: Literal[ + "fixed", "uniform", "poisson", "binomial", "truncnorm", "lognorm" + ], + kwargs: dict[str, Any], + expected: bool, + ) -> None: + assert sample_fits_distribution(sample, distribution, **kwargs) == expected From 706f5f3e5f3260b047b16ea0989a24517cbfdad1 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 11:54:15 -0400 Subject: [PATCH 19/31] Parametrize `parameters_quick_draw` test fixture Parametrized the test fixture for the `Parameters.parameters_quick_draw` method. Also developed a generic `MockParametersInput` class that can be used to easily facilitate the parameterization of `gempyor.parameters.Parameters` unit tests. --- .../tests/parameters/test_parameters_class.py | 220 +++++++++++------- 1 file changed, 136 insertions(+), 84 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 98342c5f4..cbe2379a1 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -3,14 +3,17 @@ import pathlib from typing import Any, Callable -import confuse import numpy as np import pandas as pd import pytest from tempfile import NamedTemporaryFile from gempyor.parameters import Parameters -from gempyor.testing import create_confuse_subview_from_dict, partials_are_similar +from gempyor.testing import ( + create_confuse_subview_from_dict, + partials_are_similar, + sample_fits_distribution, +) from gempyor.utils import random_distribution_sampler @@ -43,6 +46,87 @@ class MockData: } +class MockParametersInput: + def __init__( + self, + config: dict[str, Any], + ti: date, + tf: date, + subpop_names: list[str], + path_prefix: str = ".", + ) -> None: + self.config = config + self.ti = ti + self.tf = tf + self.subpop_names = subpop_names + self.path_prefix = path_prefix + self._timeseries_dfs = {} + + def create_parameters_instance(self) -> Parameters: + return Parameters( + parameter_config=create_confuse_subview_from_dict( + "parameters", self.config + ), + ti=self.ti, + tf=self.tf, + subpop_names=self.subpop_names, + path_prefix=self.path_prefix, + ) + + def number_of_subpops(self) -> int: + return len(self.subpop_names) + + def number_of_days(self) -> int: + return (self.tf - self.ti).days + + def number_of_parameters(self) -> int: + return len(self.config) + + def has_timeseries_parameter(self) -> bool: + for _, v in self.config.items(): + if "timeseries" in v: + return True + return False + + def get_timeseries_df(self, param_name: str) -> pd.DataFrame: + df = self._timeseries_dfs.get(param_name) + if df is not None: + return df.copy() + conf = self.config.get(param_name, {}) + df_file = conf.get("timeseries") + if df_file is None: + raise ValueError( + f"The given param '{param_name}' does not have a timeseries dataframe." + ) + df = pd.read_csv(df_file, index_col="date") + self._timeseries_dfs[param_name] = df + return df.copy() + + +def fixed_three_valid_parameter_factory(tmp_path: pathlib.Path) -> MockParametersInput: + return MockParametersInput( + config={"sigma": {"value": 0.1}, "eta": {"value": 0.2}, "nu": {"value": 0.3}}, + ti=date(2024, 1, 1), + tf=date(2024, 1, 31), + subpop_names=["1", "2", "3"], + ) + + +def distribution_three_valid_parameter_factory( + tmp_path: pathlib.Path, +) -> MockParametersInput: + return MockParametersInput( + config={ + "sigma": {"value": {"distribution": "uniform", "low": 1.0, "high": 2.0}}, + "eta": {"value": {"distribution": "binomial", "n": 20, "p": 0.5}}, + "nu": {"value": {"distribution": "lognorm", "meanlog": 1.0, "sdlog": 1.0}}, + }, + ti=date(2024, 1, 1), + tf=date(2024, 1, 31), + subpop_names=["1", "2", "3"], + ) + + def valid_parameters_factory( tmp_path: pathlib.Path, ) -> tuple[dict[str, pd.DataFrame], dict[str, dict[str, Any]]]: @@ -324,98 +408,66 @@ def test_get_pnames2pindex(self, parameters_inputs: dict[str, Any]) -> None: for idx, key in enumerate(parameters_inputs["parameter_config"].keys()) } - def test_parameters_quick_draw(self) -> None: - # First with a time series param, fixed size draws - param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } - ) - with NamedTemporaryFile(suffix=".csv") as temp_file: - param_df.to_csv(temp_file.name, index=False) - valid_parameters = create_confuse_subview_from_dict( - "parameters", - { - "sigma": {"timeseries": temp_file.name}, - "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, - "Ro": { - "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} - }, - }, - ) - params = Parameters( - valid_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2"], - ) + @pytest.mark.parametrize( + "factory,n_days,nsubpops", + [ + (fixed_three_valid_parameter_factory, 4, 2), + (distribution_three_valid_parameter_factory, 5, 2), + ], + ) + def test_parameters_quick_draw( + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + n_days: int, + nsubpops: int, + ) -> None: + # Setup + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() - # Test the exception + if mock_inputs.has_timeseries_parameter() and ( + (n_days_expected := mock_inputs.number_of_days()) != n_days + or (nsubpops_expected := mock_inputs.number_of_subpops()) != nsubpops + ): + # Incompatible shapes with pytest.raises( ValueError, match=( - r"could not broadcast input array from shape " - r"\(5\,2\) into shape \(4\,2\)" + rf"could not broadcast input array from shape \({n_days}\," + rf"{nsubpops}\) into shape \({n_days_expected}\," + rf"{nsubpops_expected}\)" ), ): - params.parameters_quick_draw(4, 2) - - # Test our result - p_draw = params.parameters_quick_draw(5, 2) + params.parameters_quick_draw(n_days, nsubpops) + else: + # Compatible shapes + p_draw = params.parameters_quick_draw(n_days, nsubpops) assert isinstance(p_draw, np.ndarray) assert p_draw.dtype == np.float64 - assert p_draw.shape == (3, 5, 2) - assert np.allclose( - p_draw[0, :, :], - np.array([[1.2, 2.3], [2.3, 3.4], [3.4, 4.5], [4.5, 5.6], [5.6, 6.7]]), + assert p_draw.shape == ( + mock_inputs.number_of_parameters(), + n_days, + nsubpops, ) - assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((5, 2))) - assert np.greater_equal(p_draw[2, :, :], 1.0).all() - assert np.less(p_draw[2, :, :], 2.0).all() - assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) - # Second without a time series param, arbitrary sized draws - valid_parameters = create_confuse_subview_from_dict( - "parameters", - { - "eta": {"value": 2.2}, - "nu": { - "value": { - "distribution": "truncnorm", - "mean": 0.0, - "sd": 2.0, - "a": -2.0, - "b": 2.0, - } - }, - }, - ) - params = Parameters( - valid_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2"], - ) - - p_draw = params.parameters_quick_draw(5, 2) - assert isinstance(p_draw, np.ndarray) - assert p_draw.dtype == np.float64 - assert p_draw.shape == (2, 5, 2) - assert np.allclose(p_draw[0, :, :], 2.2) - assert np.greater_equal(p_draw[1, :, :], -2.0).all() - assert np.less_equal(p_draw[1, :, :], 2.0).all() - assert np.allclose(p_draw[1, :, :], p_draw[1, 0, 0]) - - p_draw = params.parameters_quick_draw(4, 3) - assert isinstance(p_draw, np.ndarray) - assert p_draw.dtype == np.float64 - assert p_draw.shape == (2, 4, 3) - assert np.allclose(p_draw[0, :, :], 2.2) - assert np.greater_equal(p_draw[1, :, :], -2.0).all() - assert np.less_equal(p_draw[1, :, :], 2.0).all() - assert np.allclose(p_draw[1, :, :], p_draw[1, 0, 0]) + # Loop over each param and check it individually + for param_name, conf in mock_inputs.config.items(): + i = params.pnames.index(param_name) + if "timeseries" in conf: + # Check if the values in p_draw[i, :, :] match timeseries + timeseries_df = mock_inputs.get_timeseries_df(param_name) + assert np.allclose(p_draw[i, :, :], timeseries_df.values) + elif isinstance((fixed_value := conf.get("value")), float): + # Check if all the values in p_draw[i, :, :] match a const + assert np.allclose(p_draw[i, :, :], fixed_value) + else: + # Check if the values in p_draw[i, :, :] match the distribution + assert np.allclose(p_draw[i, :, :], p_draw[i, 0, 0]) + value = float(p_draw[i, 0, 0]) + assert sample_fits_distribution( + value, **{k: v for k, v in conf.get("value").items()} + ) def test_parameters_load(self) -> None: # Setup From 1571710d87a0b7418760548d0435afdffdd4d55b Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 13:03:06 -0400 Subject: [PATCH 20/31] Parameterize `parameters_load` test fixture Parameterize test fixture for `parameters_load` method of `gempyor.parameters.Parameters` class. Started with simple set of input parameters, will have to expand later. --- .../tests/parameters/test_parameters_class.py | 210 ++++++++---------- 1 file changed, 96 insertions(+), 114 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index cbe2379a1..30328b3fd 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -45,6 +45,8 @@ class MockData: "subpop_names": ["1", "2"], } + empty_param_overrides_df = pd.DataFrame(data={"parameter": [], "value": []}) + class MockParametersInput: def __init__( @@ -82,6 +84,9 @@ def number_of_days(self) -> int: def number_of_parameters(self) -> int: return len(self.config) + def get_timeseries_parameters(self) -> list[str]: + return [k for k, v in self.config.items() if "timeseries" in v] + def has_timeseries_parameter(self) -> bool: for _, v in self.config.items(): if "timeseries" in v: @@ -411,7 +416,9 @@ def test_get_pnames2pindex(self, parameters_inputs: dict[str, Any]) -> None: @pytest.mark.parametrize( "factory,n_days,nsubpops", [ + (fixed_three_valid_parameter_factory, None, None), (fixed_three_valid_parameter_factory, 4, 2), + (distribution_three_valid_parameter_factory, None, None), (distribution_three_valid_parameter_factory, 5, 2), ], ) @@ -419,12 +426,14 @@ def test_parameters_quick_draw( self, tmp_path: pathlib.Path, factory: Callable[[pathlib.Path], MockParametersInput], - n_days: int, - nsubpops: int, + n_days: None | int, + nsubpops: None | int, ) -> None: # Setup mock_inputs = factory(tmp_path) params = mock_inputs.create_parameters_instance() + n_days = mock_inputs.number_of_days() if n_days is None else n_days + nsubpops = mock_inputs.number_of_subpops() if nsubpops is None else nsubpops if mock_inputs.has_timeseries_parameter() and ( (n_days_expected := mock_inputs.number_of_days()) != n_days @@ -469,130 +478,103 @@ def test_parameters_quick_draw( value, **{k: v for k, v in conf.get("value").items()} ) - def test_parameters_load(self) -> None: + @pytest.mark.parametrize( + "factory,param_df,n_days,nsubpops", + [ + ( + fixed_three_valid_parameter_factory, + MockData.empty_param_overrides_df, + None, + None, + ), + ( + fixed_three_valid_parameter_factory, + MockData.empty_param_overrides_df, + 4, + 2, + ), + ( + distribution_three_valid_parameter_factory, + MockData.empty_param_overrides_df, + None, + None, + ), + ( + distribution_three_valid_parameter_factory, + MockData.empty_param_overrides_df, + 5, + 2, + ), + ], + ) + def test_parameters_load( + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + param_df: pd.DataFrame, + n_days: None | int, + nsubpops: None | int, + ) -> None: # Setup - param_overrides_df = pd.DataFrame( - {"parameter": ["nu", "gamma", "nu"], "value": [0.1, 0.2, 0.3]} - ) - param_empty_df = pd.DataFrame({"parameter": [], "value": []}) + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() + n_days = mock_inputs.number_of_days() if n_days is None else n_days + nsubpops = mock_inputs.number_of_subpops() if nsubpops is None else nsubpops - # With time series - param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } - ) - with NamedTemporaryFile(suffix=".csv") as temp_file: - param_df.to_csv(temp_file.name, index=False) - valid_parameters = create_confuse_subview_from_dict( - "parameters", - { - "sigma": {"timeseries": temp_file.name}, - "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, - "Ro": { - "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} - }, - }, - ) - params = Parameters( - valid_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2"], - ) + timeseries_parameters = set(mock_inputs.get_timeseries_parameters()) + override_parameters = set(param_df["parameter"].unique()) + timeseries_not_overridden = timeseries_parameters - override_parameters - # Test the exception + if len(timeseries_not_overridden) and ( + (n_days_expected := mock_inputs.number_of_days()) != n_days + or (nsubpops_expected := mock_inputs.number_of_subpops()) != nsubpops + ): + # Incompatible shapes with pytest.raises( ValueError, match=( - r"could not broadcast input array from shape " - r"\(5\,2\) into shape \(4\,2\)" + rf"could not broadcast input array from shape \({n_days}\," + rf"{nsubpops}\) into shape \({n_days_expected}\," + rf"{nsubpops_expected}\)" ), ): - params.parameters_load(param_empty_df, 4, 2) - - # Empty overrides - p_draw = params.parameters_load(param_empty_df, 5, 2) - assert isinstance(p_draw, np.ndarray) - assert p_draw.dtype == np.float64 - assert p_draw.shape == (3, 5, 2) - assert np.allclose( - p_draw[0, :, :], - np.array([[1.2, 2.3], [2.3, 3.4], [3.4, 4.5], [4.5, 5.6], [5.6, 6.7]]), - ) - assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((5, 2))) - assert np.greater_equal(p_draw[2, :, :], 1.0).all() - assert np.less(p_draw[2, :, :], 2.0).all() - assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) - - # But if we override time series no exception - p_draw = params.parameters_load( - pd.DataFrame({"parameter": ["sigma"], "value": [12.34]}), 4, 2 - ) - assert isinstance(p_draw, np.ndarray) - assert p_draw.dtype == np.float64 - assert p_draw.shape == (3, 4, 2) - assert np.allclose(p_draw[0, :, :], 12.34) - assert np.allclose(p_draw[1, :, :], 0.1234 * np.ones((4, 2))) - assert np.greater_equal(p_draw[2, :, :], 1.0).all() - assert np.less(p_draw[2, :, :], 2.0).all() - assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) - - # If not overriding time series then must conform - p_draw = params.parameters_load(param_overrides_df, 5, 2) + params.parameters_load(param_df, n_days, nsubpops) + else: + # Compatible shapes + p_draw = params.parameters_load(param_df, n_days, nsubpops) assert isinstance(p_draw, np.ndarray) assert p_draw.dtype == np.float64 - assert p_draw.shape == (3, 5, 2) - assert np.allclose( - p_draw[0, :, :], - np.array([[1.2, 2.3], [2.3, 3.4], [3.4, 4.5], [4.5, 5.6], [5.6, 6.7]]), + assert p_draw.shape == ( + mock_inputs.number_of_parameters(), + n_days, + nsubpops, ) - assert np.allclose(p_draw[1, :, :], 0.2 * np.ones((5, 2))) - assert np.greater_equal(p_draw[2, :, :], 1.0).all() - assert np.less(p_draw[2, :, :], 2.0).all() - assert np.allclose(p_draw[2, :, :], p_draw[2, 0, 0]) - # Without time series - valid_parameters = create_confuse_subview_from_dict( - "parameters", - { - "eta": {"value": 2.2}, - "nu": { - "value": { - "distribution": "truncnorm", - "mean": 0.0, - "sd": 2.0, - "a": -2.0, - "b": 2.0, - } - }, - }, - ) - params = Parameters( - valid_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2"], - ) - - # Takes an 'empty' DataFrame - p_draw = params.parameters_load(param_empty_df, 5, 2) - assert isinstance(p_draw, np.ndarray) - assert p_draw.dtype == np.float64 - assert p_draw.shape == (2, 5, 2) - assert np.allclose(p_draw[0, :, :], 2.2) - assert np.greater_equal(p_draw[1, :, :], -2.0).all() - assert np.less_equal(p_draw[1, :, :], 2.0).all() - - # Takes a DataFrame with values, only takes the first - p_draw = params.parameters_load(param_overrides_df, 4, 3) - assert isinstance(p_draw, np.ndarray) - assert p_draw.dtype == np.float64 - assert p_draw.shape == (2, 4, 3) - assert np.allclose(p_draw[0, :, :], 2.2) - assert np.allclose(p_draw[1, :, :], 0.1) + # Loop over each param and check it individually + for param_name, conf in mock_inputs.config.items(): + i = params.pnames.index(param_name) + if param_name in param_df["parameter"].values: + # Check that the values in p_draw[i, :, :] match override + assert np.allclose( + p_draw[i, :, :], + param_df[param_df["parameter"] == param_name] + .get("value") + .item(), + ) + elif "timeseries" in conf: + # Check if the values in p_draw[i, :, :] match timeseries + timeseries_df = mock_inputs.get_timeseries_df(param_name) + assert np.allclose(p_draw[i, :, :], timeseries_df.values) + elif isinstance((fixed_value := conf.get("value")), float): + # Check if all the values in p_draw[i, :, :] match a const + assert np.allclose(p_draw[i, :, :], fixed_value) + else: + # Check if the values in p_draw[i, :, :] match the distribution + assert np.allclose(p_draw[i, :, :], p_draw[i, 0, 0]) + value = float(p_draw[i, 0, 0]) + assert sample_fits_distribution( + value, **{k: v for k, v in conf.get("value").items()} + ) def test_getParameterDF(self) -> None: param_df = pd.DataFrame( From 0284e56acf728d22a92ff3d39fdb099723b2042d Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 13:41:48 -0400 Subject: [PATCH 21/31] Parameterize `getParameterDF` test fixture Parameterized the test fixture for the `getParameterDF` method of `gempyor.parameters.Parameters`. Added `get_nontimeseries_parameters` and `number_of_nontimeseries_parameters` methods to the `MockParametersInput` class to assist. --- .../tests/parameters/test_parameters_class.py | 83 ++++++++++--------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 30328b3fd..be8eacf5d 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -87,6 +87,12 @@ def number_of_parameters(self) -> int: def get_timeseries_parameters(self) -> list[str]: return [k for k, v in self.config.items() if "timeseries" in v] + def get_nontimeseries_parameters(self) -> list[str]: + return [k for k, v in self.config.items() if "timeseries" not in v] + + def number_of_nontimeseries_parameters(self) -> int: + return len(self.get_nontimeseries_parameters()) + def has_timeseries_parameter(self) -> bool: for _, v in self.config.items(): if "timeseries" in v: @@ -576,48 +582,43 @@ def test_parameters_load( value, **{k: v for k, v in conf.get("value").items()} ) - def test_getParameterDF(self) -> None: - param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } - ) - with NamedTemporaryFile(suffix=".csv") as temp_file: - param_df.to_csv(temp_file.name, index=False) - valid_parameters = create_confuse_subview_from_dict( - "parameters", - { - "sigma": {"timeseries": temp_file.name}, - "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, - "Ro": { - "value": {"distribution": "uniform", "low": 1.0, "high": 2.0} - }, - }, - ) - params = Parameters( - valid_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2"], - ) + @pytest.mark.parametrize( + "factory,n_days,nsubpops", + [ + (fixed_three_valid_parameter_factory, None, None), + (fixed_three_valid_parameter_factory, 4, 2), + (distribution_three_valid_parameter_factory, None, None), + (distribution_three_valid_parameter_factory, 5, 2), + ], + ) + def test_getParameterDF( + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + n_days: None | int, + nsubpops: None | int, + ) -> None: + # Setup + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() + n_days = mock_inputs.number_of_days() if n_days is None else n_days + nsubpops = mock_inputs.number_of_subpops() if nsubpops is None else nsubpops - # Create a quick sample - p_draw = params.parameters_quick_draw(5, 2) - df = params.getParameterDF(p_draw) - assert isinstance(df, pd.DataFrame) - assert df.shape == (2, 2) - assert df.columns.to_list() == ["value", "parameter"] - assert df["parameter"].to_list() == ["gamma", "Ro"] - values = df["value"].to_list() - assert values[0] == 0.1234 - assert values[1] >= 1.0 - assert values[1] < 2.0 - assert (df.index.to_series() == df["parameter"]).all() - - # Make clear that 'sigma' is not present because it's a time series - assert "sigma" not in df["parameter"].to_list() + p_draw = params.parameters_quick_draw(n_days, nsubpops) + df = params.getParameterDF(p_draw) + + # Go through assertions on the structure of the DataFrame + assert isinstance(df, pd.DataFrame) + assert df.shape == (mock_inputs.number_of_nontimeseries_parameters(), 2) + assert df.columns.to_list() == ["value", "parameter"] + assert (df.index.to_series() == df["parameter"]).all() + assert not df["parameter"].duplicated().any() + assert set(df["parameter"].to_list()) == set( + mock_inputs.get_nontimeseries_parameters() + ) + for row in df.itertuples(index=False): + i = params.pnames.index(row.parameter) + assert np.isclose(row.value, p_draw[i, 0, 0]) def test_parameters_reduce(self) -> None: # TODO: Come back and unit test this method after getting a better handle on From 5ccd9cd19f0bdcad11c049163e322b5ce7dbfea6 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:38:37 -0400 Subject: [PATCH 22/31] `Parameters` attrs fixture `MockParametersInputs` Reparametrize the `Parameters` attributes test fixture to accept a factory that creates a `MockParametersInputs` object to be compatible with the other test fixtures for `gempyor.parameters.Parameters`. --- .../tests/parameters/test_parameters_class.py | 106 ++++++------------ 1 file changed, 36 insertions(+), 70 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index be8eacf5d..5670a8375 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -2,7 +2,9 @@ from functools import partial import pathlib from typing import Any, Callable +from uuid import uuid4 +import confuse import numpy as np import pandas as pd import pytest @@ -63,12 +65,18 @@ def __init__( self.subpop_names = subpop_names self.path_prefix = path_prefix self._timeseries_dfs = {} + self._confuse_subview = None + + def create_confuse_subview(self) -> confuse.Subview: + if self._confuse_subview is None: + self._confuse_subview = create_confuse_subview_from_dict( + "parameters", self.config + ) + return self._confuse_subview def create_parameters_instance(self) -> Parameters: return Parameters( - parameter_config=create_confuse_subview_from_dict( - "parameters", self.config - ), + parameter_config=self.create_confuse_subview(), ti=self.ti, tf=self.tf, subpop_names=self.subpop_names, @@ -109,7 +117,8 @@ def get_timeseries_df(self, param_name: str) -> pd.DataFrame: raise ValueError( f"The given param '{param_name}' does not have a timeseries dataframe." ) - df = pd.read_csv(df_file, index_col="date") + df = pd.read_csv(df_file, index_col=None) + df["date"] = pd.to_datetime(df["date"]) self._timeseries_dfs[param_name] = df return df.copy() @@ -138,35 +147,20 @@ def distribution_three_valid_parameter_factory( ) -def valid_parameters_factory( - tmp_path: pathlib.Path, -) -> tuple[dict[str, pd.DataFrame], dict[str, dict[str, Any]]]: - """ - Factory for creating small and valid set of parameters. - - Creates the configuration for three parameters: - - 'sigma': A time series, - - 'gamma': A fixed value of 0.1234 with a sum stacked modifier, - - 'Ro': A uniform distribution between 1 and 2. - - Args: - tmp_path: A temporary file path, typically provided by pytest's `tmp_path` - fixture. - - Returns: - A tuple of a dictionary of pandas DataFrames where the keys are the parameter - names and the values are time series values and a dictionary of configuration - values that can be converted to a confuse subview. - """ - tmp_file = tmp_path / "valid_parameters_factory_df.csv" +def valid_parameters_factory(tmp_path: pathlib.Path) -> MockParametersInput: + tmp_file = tmp_path / f"{uuid4().hex}.csv" df = MockData.simple_timeseries_param_df.copy() df.to_csv(tmp_file, index=False) - params = { - "sigma": {"timeseries": str(tmp_file.absolute())}, - "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, - "Ro": {"value": {"distribution": "uniform", "low": 1.0, "high": 2.0}}, - } - return [{"sigma": df}, params] + return MockParametersInput( + config={ + "sigma": {"timeseries": str(tmp_file.absolute())}, + "gamma": {"value": 0.1234, "stacked_modifier_method": "sum"}, + "Ro": {"value": {"distribution": "uniform", "low": 1.0, "high": 2.0}}, + }, + ti=df["date"].dt.date.min(), + tf=df["date"].dt.date.max(), + subpop_names=[c for c in df.columns.to_list() if c != "date"], + ) class TestParameters: @@ -271,56 +265,28 @@ def test_timeseries_parameter_has_insufficient_dates_value_error( def test_parameters_instance_attributes( self, tmp_path: pathlib.Path, - factory: Callable[ - [pathlib.Path], tuple[dict[str, pd.DataFrame], dict[str, dict[str, Any]]] - ], + factory: Callable[[pathlib.Path], MockParametersInput], ) -> None: # Setup - timeseries_dfs, param_config = factory(tmp_path) - if timeseries_dfs: - start_date = None - end_date = None - subpop_names = [] - for _, df in timeseries_dfs.items(): - df_start_date = df["date"].dt.date.min() - if start_date is None or df_start_date < start_date: - start_date = df_start_date - df_end_date = df["date"].dt.date.max() - if end_date is None or df_end_date > end_date: - end_date = df_end_date - if df.shape[1] > 2: - subpop_names += [c for c in df.columns.to_list() if c != "date"] - if not subpop_names: - subpop_names = ["1", "2"] # filler value if all time series are 1 value - else: - start_date = date(2024, 1, 1) - end_date = date(2024, 1, 5) - subpop_names = ["1", "2"] - - valid_parameters = create_confuse_subview_from_dict("parameters", param_config) - params = Parameters( - valid_parameters, - ti=start_date, - tf=end_date, - subpop_names=subpop_names, - ) + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() # The `npar` attribute - assert params.npar == len(param_config) + assert params.npar == mock_inputs.number_of_parameters() # The `pconfig` attribute - assert params.pconfig == valid_parameters + assert params.pconfig == mock_inputs.create_confuse_subview() # The `pdata` attribute - assert set(params.pdata.keys()) == set(param_config.keys()) - for param_name, param_conf in param_config.items(): + assert set(params.pdata.keys()) == set(mock_inputs.config.keys()) + for param_name, param_conf in mock_inputs.config.items(): assert params.pdata[param_name]["idx"] == params.pnames2pindex[param_name] assert params.pdata[param_name][ "stacked_modifier_method" ] == param_conf.get("stacked_modifier_method", "product") if "timeseries" in param_conf: assert params.pdata[param_name]["ts"].equals( - timeseries_dfs[param_name].set_index("date") + mock_inputs.get_timeseries_df(param_name).set_index("date") ) elif isinstance(params.pdata[param_name]["dist"], partial): if isinstance(param_conf.get("value"), float): @@ -356,11 +322,11 @@ def test_parameters_instance_attributes( ) # The `pnames` attribute - assert params.pnames == list(param_config.keys()) + assert params.pnames == list(mock_inputs.config.keys()) # The `pnames2pindex` attribute assert params.pnames2pindex == { - key: idx for idx, key in enumerate(param_config.keys()) + key: idx for idx, key in enumerate(mock_inputs.config.keys()) } # # The `stacked_modifier_method` attribute @@ -369,7 +335,7 @@ def test_parameters_instance_attributes( "product": [], "reduction_product": [], } - for param_name, param_conf in param_config.items(): + for param_name, param_conf in mock_inputs.config.items(): modifier_type = param_conf.get("stacked_modifier_method", "product") expected_stacked_modifier_method[modifier_type].append(param_name.lower()) assert params.stacked_modifier_method == expected_stacked_modifier_method From 50583bc04fe34f2d84d7f85b7d3eeb6c8244ab4a Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 15:34:33 -0400 Subject: [PATCH 23/31] Expanded parameters for test fixtures Expanded the parameters for the `test_parameters_instance_attributes`, `test_parameters_quick_draw`, `test_parameters_load`, and `test_getParameterDF` test fixtures. --- .../tests/parameters/test_parameters_class.py | 54 +++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 5670a8375..1a582a044 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -87,7 +87,7 @@ def number_of_subpops(self) -> int: return len(self.subpop_names) def number_of_days(self) -> int: - return (self.tf - self.ti).days + return (self.tf - self.ti).days + 1 def number_of_parameters(self) -> int: return len(self.config) @@ -119,6 +119,8 @@ def get_timeseries_df(self, param_name: str) -> pd.DataFrame: ) df = pd.read_csv(df_file, index_col=None) df["date"] = pd.to_datetime(df["date"]) + df = df.set_index("date") + df = df[self.subpop_names] self._timeseries_dfs[param_name] = df return df.copy() @@ -261,7 +263,14 @@ def test_timeseries_parameter_has_insufficient_dates_value_error( # `pd.date_range` function only creates monotonic increasing sequences and # 0 == 0. - @pytest.mark.parametrize("factory", [(valid_parameters_factory)]) + @pytest.mark.parametrize( + "factory", + [ + (fixed_three_valid_parameter_factory), + (distribution_three_valid_parameter_factory), + (valid_parameters_factory), + ], + ) def test_parameters_instance_attributes( self, tmp_path: pathlib.Path, @@ -286,7 +295,7 @@ def test_parameters_instance_attributes( ] == param_conf.get("stacked_modifier_method", "product") if "timeseries" in param_conf: assert params.pdata[param_name]["ts"].equals( - mock_inputs.get_timeseries_df(param_name).set_index("date") + mock_inputs.get_timeseries_df(param_name) ) elif isinstance(params.pdata[param_name]["dist"], partial): if isinstance(param_conf.get("value"), float): @@ -392,6 +401,8 @@ def test_get_pnames2pindex(self, parameters_inputs: dict[str, Any]) -> None: (fixed_three_valid_parameter_factory, 4, 2), (distribution_three_valid_parameter_factory, None, None), (distribution_three_valid_parameter_factory, 5, 2), + (valid_parameters_factory, None, None), + (valid_parameters_factory, 13, 3), ], ) def test_parameters_quick_draw( @@ -404,20 +415,20 @@ def test_parameters_quick_draw( # Setup mock_inputs = factory(tmp_path) params = mock_inputs.create_parameters_instance() + n_days_expected = mock_inputs.number_of_days() + nsubpops_expected = mock_inputs.number_of_subpops() n_days = mock_inputs.number_of_days() if n_days is None else n_days nsubpops = mock_inputs.number_of_subpops() if nsubpops is None else nsubpops if mock_inputs.has_timeseries_parameter() and ( - (n_days_expected := mock_inputs.number_of_days()) != n_days - or (nsubpops_expected := mock_inputs.number_of_subpops()) != nsubpops + n_days_expected != n_days or nsubpops_expected != nsubpops ): # Incompatible shapes with pytest.raises( ValueError, match=( - rf"could not broadcast input array from shape \({n_days}\," - rf"{nsubpops}\) into shape \({n_days_expected}\," - rf"{nsubpops_expected}\)" + rf"^could not broadcast input array from shape \({n_days_expected}" + rf"\,{nsubpops_expected}\) into shape \({n_days}\,{nsubpops}\)$" ), ): params.parameters_quick_draw(n_days, nsubpops) @@ -477,6 +488,18 @@ def test_parameters_quick_draw( 5, 2, ), + ( + valid_parameters_factory, + MockData.empty_param_overrides_df, + None, + None, + ), + ( + valid_parameters_factory, + MockData.empty_param_overrides_df, + 13, + 2, + ), ], ) def test_parameters_load( @@ -490,24 +513,24 @@ def test_parameters_load( # Setup mock_inputs = factory(tmp_path) params = mock_inputs.create_parameters_instance() - n_days = mock_inputs.number_of_days() if n_days is None else n_days - nsubpops = mock_inputs.number_of_subpops() if nsubpops is None else nsubpops + n_days_expected = mock_inputs.number_of_days() + nsubpops_expected = mock_inputs.number_of_subpops() + n_days = n_days_expected if n_days is None else n_days + nsubpops = nsubpops_expected if nsubpops is None else nsubpops timeseries_parameters = set(mock_inputs.get_timeseries_parameters()) override_parameters = set(param_df["parameter"].unique()) timeseries_not_overridden = timeseries_parameters - override_parameters if len(timeseries_not_overridden) and ( - (n_days_expected := mock_inputs.number_of_days()) != n_days - or (nsubpops_expected := mock_inputs.number_of_subpops()) != nsubpops + n_days_expected != n_days or nsubpops_expected != nsubpops ): # Incompatible shapes with pytest.raises( ValueError, match=( - rf"could not broadcast input array from shape \({n_days}\," - rf"{nsubpops}\) into shape \({n_days_expected}\," - rf"{nsubpops_expected}\)" + rf"^could not broadcast input array from shape \({n_days_expected}" + rf"\,{nsubpops_expected}\) into shape \({n_days}\,{nsubpops}\)$" ), ): params.parameters_load(param_df, n_days, nsubpops) @@ -555,6 +578,7 @@ def test_parameters_load( (fixed_three_valid_parameter_factory, 4, 2), (distribution_three_valid_parameter_factory, None, None), (distribution_three_valid_parameter_factory, 5, 2), + (valid_parameters_factory, None, None), ], ) def test_getParameterDF( From 9a381d5dbf0e1dccdd9c9562a0576b92860e2703 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 15:42:31 -0400 Subject: [PATCH 24/31] Changed parameterization style on test fixtures Changed parameterization for the `test_picklable_lamda_alpha`, `test_picklable_lamda_sigma`, and `test_get_pnames2pindex` test fixtures to be like others contained within this test file. --- .../tests/parameters/test_parameters_class.py | 52 +++++++++++++++---- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 1a582a044..89148f851 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -350,13 +350,22 @@ def test_parameters_instance_attributes( assert params.stacked_modifier_method == expected_stacked_modifier_method @pytest.mark.parametrize( - "parameters_inputs,alpha_val", [(MockData.simple_inputs, None)] + "factory,alpha_val", + [ + (fixed_three_valid_parameter_factory, None), + (fixed_three_valid_parameter_factory, 123), + (valid_parameters_factory, "abc"), + ], ) def test_picklable_lamda_alpha( - self, parameters_inputs: dict[str, Any], alpha_val: Any + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + alpha_val: Any, ) -> None: # Setup - params = Parameters(**parameters_inputs) + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() # Attribute error if `alpha_val` is not set with pytest.raises(AttributeError): @@ -367,13 +376,22 @@ def test_picklable_lamda_alpha( assert params.picklable_lamda_alpha() == alpha_val @pytest.mark.parametrize( - "parameters_inputs,sigma_val", [(MockData.simple_inputs, None)] + "factory,sigma_val", + [ + (fixed_three_valid_parameter_factory, None), + (fixed_three_valid_parameter_factory, 123), + (valid_parameters_factory, "abc"), + ], ) def test_picklable_lamda_sigma( - self, parameters_inputs: dict[str, Any], sigma_val: Any + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + sigma_val: Any, ) -> None: # Setup - params = Parameters(**parameters_inputs) + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() # Attribute error if `sigma_val` is not set with pytest.raises(AttributeError): @@ -384,14 +402,26 @@ def test_picklable_lamda_sigma( assert params.picklable_lamda_sigma() == sigma_val @pytest.mark.parametrize( - "parameters_inputs", [(MockData.simple_inputs), (MockData.small_inputs)] + "factory", + [ + (fixed_three_valid_parameter_factory), + (distribution_three_valid_parameter_factory), + (valid_parameters_factory), + ], ) - def test_get_pnames2pindex(self, parameters_inputs: dict[str, Any]) -> None: - params = Parameters(**parameters_inputs) + def test_get_pnames2pindex( + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + ) -> None: + # Setup + mock_inputs = factory(tmp_path) + params = mock_inputs.create_parameters_instance() + + # Assertions assert params.get_pnames2pindex() == params.pnames2pindex assert params.pnames2pindex == { - key: idx - for idx, key in enumerate(parameters_inputs["parameter_config"].keys()) + key: idx for idx, key in enumerate(mock_inputs.config.keys()) } @pytest.mark.parametrize( From 33dbf175235afb894cc810d708005daf683a92ca Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:47:46 -0400 Subject: [PATCH 25/31] Parametrize exception test fixtures Parametrized `test_nonunique_parameter_names_value_error`, `test_timeseries_parameter_has_insufficient_columns_value_error`, and `test_timeseries_parameter_has_insufficient_dates_value_error` test fixtures. And moved long comment about dead code in the insufficient dates ValueError to the main issue. --- .../tests/parameters/test_parameters_class.py | 230 +++++++++++------- 1 file changed, 144 insertions(+), 86 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 89148f851..5b77355f0 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -107,10 +107,12 @@ def has_timeseries_parameter(self) -> bool: return True return False - def get_timeseries_df(self, param_name: str) -> pd.DataFrame: + def get_timeseries_df( + self, param_name: str, subset_by_subpops: bool = True + ) -> pd.DataFrame: df = self._timeseries_dfs.get(param_name) if df is not None: - return df.copy() + return df[self.subpop_names].copy() if subset_by_subpops else df.copy() conf = self.config.get(param_name, {}) df_file = conf.get("timeseries") if df_file is None: @@ -120,9 +122,8 @@ def get_timeseries_df(self, param_name: str) -> pd.DataFrame: df = pd.read_csv(df_file, index_col=None) df["date"] = pd.to_datetime(df["date"]) df = df.set_index("date") - df = df[self.subpop_names] self._timeseries_dfs[param_name] = df - return df.copy() + return df[self.subpop_names].copy() if subset_by_subpops else df.copy() def fixed_three_valid_parameter_factory(tmp_path: pathlib.Path) -> MockParametersInput: @@ -165,12 +166,65 @@ def valid_parameters_factory(tmp_path: pathlib.Path) -> MockParametersInput: ) +def nonunique_invalid_parameter_factory(tmp_path: pathlib.Path) -> MockParametersInput: + return MockParametersInput( + config={ + "sigma": {"value": 0.1}, + "eta": {"value": 0.2}, + "SIGMA": {"value": 0.3}, + }, + ti=date(2024, 1, 1), + tf=date(2024, 1, 3), + subpop_names=["1", "2", "3"], + ) + + +def insufficient_columns_parameter_factory( + tmp_path: pathlib.Path, +) -> MockParametersInput: + df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + tmp_file = tmp_path / f"{uuid4().hex}.csv" + df.to_csv(tmp_file, index=False) + return MockParametersInput( + config={"sigma": {"timeseries": str(tmp_file.absolute())}}, + ti=date(2024, 1, 1), + tf=date(2024, 1, 5), + subpop_names=["1", "2", "3"], + ) + + +def insufficient_dates_parameter_factory(tmp_path: pathlib.Path) -> MockParametersInput: + df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) + tmp_file = tmp_path / f"{uuid4().hex}.csv" + df.to_csv(tmp_file, index=False) + return MockParametersInput( + config={"sigma": {"timeseries": str(tmp_file.absolute())}}, + ti=date(2024, 1, 1), + tf=date(2024, 1, 6), + subpop_names=["1", "2"], + ) + + class TestParameters: - def test_nonunique_parameter_names_value_error(self) -> None: - duplicated_parameters = create_confuse_subview_from_dict( - "parameters", - {"sigma": {"value": 0.1}, "gamma": {"value": 0.2}, "GAMMA": {"value": 0.3}}, - ) + @pytest.mark.parametrize("factory", [(nonunique_invalid_parameter_factory)]) + def test_nonunique_parameter_names_value_error( + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + ) -> None: + mock_inputs = factory(tmp_path) with pytest.raises( ValueError, match=( @@ -178,90 +232,94 @@ def test_nonunique_parameter_names_value_error(self) -> None: r"\(remember that case is not sufficient\!\)" ), ): - Parameters( - duplicated_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 12, 31), - subpop_names=["1", "2"], - ) + mock_inputs.create_parameters_instance() - def test_timeseries_parameter_has_insufficient_columns_value_error(self) -> None: - param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } - ) - with NamedTemporaryFile(suffix=".csv") as temp_file: - param_df.to_csv(temp_file.name, index=False) - invalid_timeseries_parameters = create_confuse_subview_from_dict( - "parameters", {"sigma": {"timeseries": temp_file.name}} - ) - with pytest.raises( - ValueError, - match=( - rf"ERROR loading file {temp_file.name} for parameter sigma\: " - rf"the number of non 'date'\s+columns are 2, expected 3 " - rf"\(the number of subpops\) or one\." - ), - ): - Parameters( - invalid_timeseries_parameters, - ti=date(2024, 1, 1), - tf=date(2024, 1, 5), - subpop_names=["1", "2", "3"], + @pytest.mark.parametrize("factory", [(insufficient_columns_parameter_factory)]) + def test_timeseries_parameter_has_insufficient_columns_value_error( + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], + ) -> None: + mock_inputs = factory(tmp_path) + tmp_file = None + for param_name, conf in mock_inputs.config.items(): + if "timeseries" in conf: + df = mock_inputs.get_timeseries_df(param_name, subset_by_subpops=False) + actual_columns = len(df.columns) + if ( + actual_columns != mock_inputs.number_of_subpops() + and actual_columns != 1 + ): + tmp_file = conf.get("timeseries") + break + if tmp_file is None: + raise RuntimeError( + ( + "The given factory does not produce a timeseries " + "with an insufficient number of columns." ) + ) + with pytest.raises( + ValueError, + match=( + rf"^ERROR loading file {tmp_file} for parameter sigma\: the number of " + rf"non 'date'\s+columns are {actual_columns}, expected " + rf"{mock_inputs.number_of_subpops()} \(the number of subpops\) or " + rf"one\.$" + ), + ): + mock_inputs.create_parameters_instance() - @pytest.mark.parametrize( - "start_date,end_date,timeseries_df", - [(date(2024, 1, 1), date(2024, 1, 6), MockData.simple_timeseries_param_df)], - ) + @pytest.mark.parametrize("factory", [(insufficient_dates_parameter_factory)]) def test_timeseries_parameter_has_insufficient_dates_value_error( - self, start_date: date, end_date: date, timeseries_df: pd.DataFrame + self, + tmp_path: pathlib.Path, + factory: Callable[[pathlib.Path], MockParametersInput], ) -> None: - # First way to get at this error, purely a length difference - with NamedTemporaryFile(suffix=".csv") as temp_file: - timeseries_df.to_csv(temp_file.name, index=False) - invalid_timeseries_parameters = create_confuse_subview_from_dict( - "parameters", {"sigma": {"timeseries": temp_file.name}} - ) - timeseries_start_date = timeseries_df["date"].dt.date.min() - timeseries_end_date = timeseries_df["date"].dt.date.max() - subpop_names = [c for c in timeseries_df.columns.to_list() if c != "date"] - with pytest.raises( - ValueError, - match=( - rf"ERROR loading file {temp_file.name} for parameter sigma\:\s+" - rf"the \'date\' entries of the provided file do not include all the" - rf" days specified to be modeled by\s+the config\. the provided " - rf"file includes 5 days between {timeseries_start_date}" - rf"( 00\:00\:00)? to {timeseries_end_date}( 00\:00\:00)?,\s+while " - rf"there are 6 days in the config time span of {start_date}->" - rf"{end_date}\. The file must contain entries for the\s+the exact " - rf"start and end dates from the config\. " - ), - ): - Parameters( - invalid_timeseries_parameters, - ti=start_date, - tf=end_date, - subpop_names=subpop_names, + mock_inputs = factory(tmp_path) + + tmp_file = None + for param_name, conf in mock_inputs.config.items(): + if "timeseries" in conf: + df = mock_inputs.get_timeseries_df(param_name) + timeseries_start_date = df.index.to_series().dt.date.min() + timeseries_end_date = df.index.to_series().dt.date.max() + if ( + (timeseries_start_date > mock_inputs.ti) + or (timeseries_end_date < mock_inputs.tf) + or ( + not pd.date_range(mock_inputs.ti, mock_inputs.tf) + .isin(df.index) + .all() + ) + ): + tmp_file = conf.get("timeseries") + break + + if tmp_file is None: + raise RuntimeError( + ( + "The given factory does not produce a timeseries with an " + "insufficient date range." ) + ) - # TODO: I'm not sure how to get to the second pathway to this error message. - # 1) We subset the read in dataframe to `ti` to `tf` so if the dataframe goes - # from 2024-01-01 through 2024-01-05 and the given date range is 2024-01-02 - # through 2024-01-06 the dataframe's date range will be subsetted to 2024-01-02 - # through 2024-01-05 which is a repeat of the above. - # 2) Because of the subsetting you can't provide anything except a monotonic - # increasing sequence of dates, pandas only allows subsetting on ordered date - # indexes so you'll get a different error. - # 3) If you provide a monotonic increasing sequence of dates but 'reverse' `ti` - # and `tf` you get no errors (which I think is also bad) because the slice - # operation returns an empty dataframe with the right columns & index and the - # `pd.date_range` function only creates monotonic increasing sequences and - # 0 == 0. + file_days = (timeseries_end_date - timeseries_start_date).days + 1 + with pytest.raises( + ValueError, + match=( + rf"^ERROR loading file {tmp_file} for parameter sigma\:\s+the \'date\' " + rf"entries of the provided file do not include all the days specified " + rf"to be modeled by\s+the config\. the provided file includes " + rf"{(timeseries_end_date - timeseries_start_date).days + 1} days " + rf"between {timeseries_start_date}( 00\:00\:00)? to " + rf"{timeseries_end_date}( 00\:00\:00)?,\s+while there are " + rf"{mock_inputs.number_of_days()} days in the config time span of " + rf"{mock_inputs.ti}->{mock_inputs.tf}\. The file must contain entries " + rf"for the\s+the exact start and end dates from the config\. $" + ), + ): + mock_inputs.create_parameters_instance() @pytest.mark.parametrize( "factory", From e19c0b31fb83182938cbe9f6e53cd08790da44c7 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:01:32 -0400 Subject: [PATCH 26/31] Remove `MockData`, expand `parameters_load` test * Removed the no longer needed `MockData` class, has been replaced by the factory style parametrization. * Greatly expanded the parameters for the `parameters_load` test fixture to cover more cases. --- .../tests/parameters/test_parameters_class.py | 83 ++++++++++--------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 5b77355f0..163526efd 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -19,37 +19,6 @@ from gempyor.utils import random_distribution_sampler -class MockData: - simple_timeseries_param_df = pd.DataFrame( - data={ - "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), - "1": [1.2, 2.3, 3.4, 4.5, 5.6], - "2": [2.3, 3.4, 4.5, 5.6, 6.7], - } - ) - - simple_inputs = { - "parameter_config": create_confuse_subview_from_dict( - "parameters", {"sigma": {"value": 0.1}} - ), - "ti": date(2024, 1, 1), - "tf": date(2024, 1, 10), - "subpop_names": ["1", "2", "3"], - } - - small_inputs = { - "parameter_config": create_confuse_subview_from_dict( - "parameters", - {"sigma": {"value": 0.1}, "gamma": {"value": 0.2}, "eta": {"value": 0.3}}, - ), - "ti": date(2024, 1, 1), - "tf": date(2024, 1, 31), - "subpop_names": ["1", "2"], - } - - empty_param_overrides_df = pd.DataFrame(data={"parameter": [], "value": []}) - - class MockParametersInput: def __init__( self, @@ -152,7 +121,13 @@ def distribution_three_valid_parameter_factory( def valid_parameters_factory(tmp_path: pathlib.Path) -> MockParametersInput: tmp_file = tmp_path / f"{uuid4().hex}.csv" - df = MockData.simple_timeseries_param_df.copy() + df = pd.DataFrame( + data={ + "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 5)), + "1": [1.2, 2.3, 3.4, 4.5, 5.6], + "2": [2.3, 3.4, 4.5, 5.6, 6.7], + } + ) df.to_csv(tmp_file, index=False) return MockParametersInput( config={ @@ -554,37 +529,67 @@ def test_parameters_quick_draw( [ ( fixed_three_valid_parameter_factory, - MockData.empty_param_overrides_df, + pd.DataFrame(data={"parameter": [], "value": []}), None, None, ), ( fixed_three_valid_parameter_factory, - MockData.empty_param_overrides_df, + pd.DataFrame(data={"parameter": [], "value": []}), + 4, + 2, + ), + ( + fixed_three_valid_parameter_factory, + pd.DataFrame(data={"parameter": ["sigma"], "value": [-0.123]}), + None, + None, + ), + ( + fixed_three_valid_parameter_factory, + pd.DataFrame(data={"parameter": ["sigma"], "value": [-0.123]}), 4, 2, ), ( distribution_three_valid_parameter_factory, - MockData.empty_param_overrides_df, + pd.DataFrame(data={"parameter": [], "value": []}), None, None, ), ( distribution_three_valid_parameter_factory, - MockData.empty_param_overrides_df, + pd.DataFrame(data={"parameter": [], "value": []}), 5, 2, ), + ( + distribution_three_valid_parameter_factory, + pd.DataFrame(data={"parameter": ["nu", "alpha"], "value": [-9.9, 0.0]}), + None, + None, + ), + ( + valid_parameters_factory, + pd.DataFrame(data={"parameter": [], "value": []}), + None, + None, + ), + ( + valid_parameters_factory, + pd.DataFrame(data={"parameter": [], "value": []}), + 13, + 2, + ), ( valid_parameters_factory, - MockData.empty_param_overrides_df, + pd.DataFrame(data={"parameter": ["Ro", "Ro"], "value": [2.5, 3.6]}), None, None, ), ( valid_parameters_factory, - MockData.empty_param_overrides_df, + pd.DataFrame(data={"parameter": ["Ro", "Ro"], "value": [2.5, 3.6]}), 13, 2, ), @@ -641,7 +646,7 @@ def test_parameters_load( assert np.allclose( p_draw[i, :, :], param_df[param_df["parameter"] == param_name] - .get("value") + .iloc[0]["value"] .item(), ) elif "timeseries" in conf: From 01c53296f44dbb837ae9c72f547eaade08fc8651 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:18:45 -0400 Subject: [PATCH 27/31] Loosen guarantees on `pnames2pindex` attribute Modified the `test_get_pnames2pindex` test fixture to guarantee that the `pnames2pindex` attribute only provides the correct index of a parameter name in the `pnames` attribute rather than the stronger constraint of index also matching the order of the parameters provided. --- .../gempyor_pkg/tests/parameters/test_parameters_class.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index 163526efd..c74a19015 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -364,11 +364,11 @@ def test_parameters_instance_attributes( ) # The `pnames` attribute - assert params.pnames == list(mock_inputs.config.keys()) + assert set(params.pnames) == set(mock_inputs.config.keys()) # The `pnames2pindex` attribute assert params.pnames2pindex == { - key: idx for idx, key in enumerate(mock_inputs.config.keys()) + p: params.pnames.index(p) for p in params.pnames2pindex } # # The `stacked_modifier_method` attribute @@ -454,7 +454,7 @@ def test_get_pnames2pindex( # Assertions assert params.get_pnames2pindex() == params.pnames2pindex assert params.pnames2pindex == { - key: idx for idx, key in enumerate(mock_inputs.config.keys()) + p: params.pnames.index(p) for p in params.pnames } @pytest.mark.parametrize( From 8d38d0f772fb70dd3693a9bf84e798426768e852 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:30:08 -0400 Subject: [PATCH 28/31] Types for `Parameters` instance attrs, formatting * Added type hints for the `gempyor.parameters.Parameters` instance attributes. * Applied formatter to `src/gempyor/parameters.py`. --- .../gempyor_pkg/src/gempyor/parameters.py | 157 +++++++++++------- 1 file changed, 96 insertions(+), 61 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index 8a304066b..79292fcd9 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -12,6 +12,7 @@ import datetime import logging import os +from typing import Any, Literal import confuse import numpy as np @@ -27,19 +28,19 @@ class Parameters: """ Encapsulates logic for loading, parsing, and summarizing parameter configurations. - + Attributes: npar: The number of parameters contained within the given configuration. pconfig: A view subsetting to the parameters section of a given config file. pdata: A dictionary containing a processed and reformatted view of the `pconfig` attribute. pnames: The names of the parameters given. - pnames2pindex: A mapping parameter names to their location in the `pnames` + pnames2pindex: A mapping parameter names to their location in the `pnames` attribute. stacked_modifier_method: A mapping of modifier method to the parameters to which that modifier method is relevant for. """ - + def __init__( self, parameter_config: confuse.ConfigView, @@ -51,7 +52,7 @@ def __init__( ): """ Initialize a `Parameters` instance from a parameter config view. - + Args: parameter_config: A view subsetting to the parameters section of a given config file. @@ -60,7 +61,7 @@ def __init__( subpop_names: A list of subpopulation names. path_prefix: A file path prefix to use when reading in parameter values from a dataframe like file. - + Raises: ValueError: The parameter names for the SEIR model are not unique. ValueError: The dataframe file found for a given parameter contains an @@ -69,18 +70,22 @@ def __init__( enough date entries to cover the time span being considered by the given `ti` and `tf`. """ - self.pconfig = parameter_config - self.pnames = [] - self.npar = len(self.pnames) + self.pconfig: confuse.ConfigView = parameter_config + self.pnames: list[str] = [] + self.npar: int = len(self.pnames) - self.pdata = {} - self.pnames2pindex = {} - self.stacked_modifier_method = {"sum": [], "product": [], "reduction_product": []} + self.pdata: dict[str, dict[str, Any]] = {} + self.pnames2pindex: dict[str, int] = {} + self.stacked_modifier_method: dict[ + Literal["sum", "product", "reduction_product"], list[str] + ] = {"sum": [], "product": [], "reduction_product": []} self.pnames = self.pconfig.keys() self.npar = len(self.pnames) if self.npar != len(set([name.lower() for name in self.pnames])): - raise ValueError("Parameters of the SEIR model have the same name (remember that case is not sufficient!)") + raise ValueError( + "Parameters of the SEIR model have the same name (remember that case is not sufficient!)" + ) # Attributes of dictionary for idx, pn in enumerate(self.pnames): @@ -90,19 +95,29 @@ def __init__( # Parameter characterized by it's distribution if self.pconfig[pn]["value"].exists(): - self.pdata[pn]["dist"] = self.pconfig[pn]["value"].as_random_distribution() + self.pdata[pn]["dist"] = self.pconfig[pn][ + "value" + ].as_random_distribution() # Parameter given as a file elif self.pconfig[pn]["timeseries"].exists(): - fn_name = os.path.join(path_prefix, self.pconfig[pn]["timeseries"].get()) + fn_name = os.path.join( + path_prefix, self.pconfig[pn]["timeseries"].get() + ) df = utils.read_df(fn_name).set_index("date") df.index = pd.to_datetime(df.index) - if len(df.columns) == 1: # if only one ts, assume it applies to all subpops + if ( + len(df.columns) == 1 + ): # if only one ts, assume it applies to all subpops df = pd.DataFrame( - pd.concat([df] * len(subpop_names), axis=1).values, index=df.index, columns=subpop_names + pd.concat([df] * len(subpop_names), axis=1).values, + index=df.index, + columns=subpop_names, ) elif len(df.columns) >= len(subpop_names): # one ts per subpop - df = df[subpop_names] # make sure the order of subpops is the same as the reference + df = df[ + subpop_names + ] # make sure the order of subpops is the same as the reference # (subpop_names from spatial setup) and select the columns else: print("loaded col :", sorted(list(df.columns))) @@ -136,15 +151,23 @@ def __init__( self.pdata[pn]["ts"] = df if self.pconfig[pn]["stacked_modifier_method"].exists(): - self.pdata[pn]["stacked_modifier_method"] = self.pconfig[pn]["stacked_modifier_method"].as_str() + self.pdata[pn]["stacked_modifier_method"] = self.pconfig[pn][ + "stacked_modifier_method" + ].as_str() else: self.pdata[pn]["stacked_modifier_method"] = "product" - logging.debug(f"No 'stacked_modifier_method' for parameter {pn}, assuming multiplicative NPIs") + logging.debug( + f"No 'stacked_modifier_method' for parameter {pn}, assuming multiplicative NPIs" + ) if self.pconfig[pn]["rolling_mean_windows"].exists(): - self.pdata[pn]["rolling_mean_windows"] = self.pconfig[pn]["rolling_mean_windows"].get() + self.pdata[pn]["rolling_mean_windows"] = self.pconfig[pn][ + "rolling_mean_windows" + ].get() - self.stacked_modifier_method[self.pdata[pn]["stacked_modifier_method"]].append(pn.lower()) + self.stacked_modifier_method[ + self.pdata[pn]["stacked_modifier_method"] + ].append(pn.lower()) logging.debug(f"We have {self.npar} parameter: {self.pnames}") logging.debug(f"Data to sample is: {self.pdata}") @@ -154,11 +177,11 @@ def __init__( def picklable_lamda_alpha(self): """ Read the `alpha_val` attribute. - + This defunct method returns the `alpha_val` attribute of this class which is never set by this class. If this method is called and the `alpha_val` attribute is not set an AttributeError will be raised. - + Returns: The `alpha_val` attribute. """ @@ -167,11 +190,11 @@ def picklable_lamda_alpha(self): def picklable_lamda_sigma(self): """ Read the `sigma_val` attribute. - + This defunct method returns the `sigma_val` attribute of this class which is never set by this class. If this method is called and the `sigma_val` attribute is not set an AttributeError will be raised. - + Returns: The `sigma_val` attribute. """ @@ -180,9 +203,9 @@ def picklable_lamda_sigma(self): def get_pnames2pindex(self) -> dict: """ Read the `pnames2pindex` attribute. - + This redundant method returns the `pnames2pindex` attribute of this class. - + Returns: A mapping parameter names to their location in the `pnames` attribute. """ @@ -191,22 +214,22 @@ def get_pnames2pindex(self) -> dict: def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: """ Format all parameters as a numpy array including sampling. - - The entries in the output array are filled based on the input given in the + + The entries in the output array are filled based on the input given in the parameters section of a yaml config file. If the given parameter is pulled from - a distribution rather than fixed the values will be pulled from that - distribution. If an appropriate value cannot be found for an entry then a + a distribution rather than fixed the values will be pulled from that + distribution. If an appropriate value cannot be found for an entry then a `np.nan` is returned. - + Args: n_days: The number of days to generate an array for. nsubpops: The number of subpopulations to generate an array for. - + Returns: - A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` - corresponds to the `npar` attribute of this class. - - Note: + A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` + corresponds to the `npar` attribute of this class. + + Notes: If any of the parameters are 'timeseries' type parameters then `n_days` and `nsubpops` must be equal to the number of days between `ti` and `tf` given when initializing this class and the number of subpopulations given to this @@ -223,29 +246,31 @@ class via `subpop_names`. return param_arr # we don't store it as a member because this object needs to be small to be pickable - def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> ndarray: + def parameters_load( + self, param_df: pd.DataFrame, n_days: int, nsubpops: int + ) -> ndarray: """ Format all parameters as a numpy array including sampling and overrides. - + This method serves largely the same purpose as the `parameters_quick_draw`, but has the ability to override the parameter specifications contained by this class with a given dataframe. - + Args: - param_df: A DataFrame containing the columns 'parameter' and 'value'. If - more than one entry for a given parameter is given then only the first + param_df: A DataFrame containing the columns 'parameter' and 'value'. If + more than one entry for a given parameter is given then only the first value will be taken. n_days: The number of days to generate an array for. nsubpops: The number of subpopulations to generate an array for. - + Returns: - A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` + A numpy array of size (`npar`, `n_days`, `nsubpops`) where `npar` corresponds to the `npar` attribute of this class. - - Note: - If any of the parameters are 'timeseries' type parameters and are not being - overridden then `n_days` and `nsubpops` must be equal to the number of days - between `ti` and `tf` given when initializing this class and the number of + + Notes: + If any of the parameters are 'timeseries' type parameters and are not being + overridden then `n_days` and `nsubpops` must be equal to the number of days + between `ti` and `tf` given when initializing this class and the number of subpopulations given to this class via `subpop_names`. """ param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") @@ -258,7 +283,9 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> elif "ts" in self.pdata[pn]: param_arr[idx] = self.pdata[pn]["ts"].values else: - print(f"PARAM: parameter {pn} NOT found in loadID file. Drawing from config distribution") + print( + f"PARAM: parameter {pn} NOT found in loadID file. Drawing from config distribution" + ) pval = self.pdata[pn]["dist"]() param_arr[idx] = np.full((n_days, nsubpops), pval) @@ -267,14 +294,14 @@ def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: """ Serialize a parameter draw as a pandas `DataFrame`. - + This method only considers distribution parameters, which does include fixed parameters. - + Args: - p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that + p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that returned by `parameters_quick_draw`. - + Returns: A pandas `DataFrame` with the columns 'parameter' and 'value' corresponding to the parameter name and value as well as an index containing the parameter @@ -282,9 +309,15 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: """ # we don't write to disk time series parameters. out_df = pd.DataFrame( - [p_draw[idx, 0, 0] for idx, pn in enumerate(self.pnames) if "dist" in self.pdata[pn]], + [ + p_draw[idx, 0, 0] + for idx, pn in enumerate(self.pnames) + if "dist" in self.pdata[pn] + ], columns=["value"], - index=[pn for idx, pn in enumerate(self.pnames) if "dist" in self.pdata[pn]], + index=[ + pn for idx, pn in enumerate(self.pnames) if "dist" in self.pdata[pn] + ], ) out_df["parameter"] = out_df.index return out_df @@ -292,14 +325,14 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: """ Params reduced according to the NPI provided. - + Args: - p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that + p_draw: A numpy array of shape (`npar`, `n_days`, `nsubpops`) like that returned by `parameters_quick_draw`. npi: An NPI object describing the parameter reduction to perform. - + Returns: - An array the same shape as `p_draw` with the prescribed reductions + An array the same shape as `p_draw` with the prescribed reductions performed. """ p_reduced = copy.deepcopy(p_draw) @@ -312,6 +345,8 @@ def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: ) p_reduced[idx] = npi_val if "rolling_mean_windows" in self.pdata[pn]: - p_reduced[idx] = utils.rolling_mean_pad(data=npi_val, window=self.pdata[pn]["rolling_mean_windows"]) + p_reduced[idx] = utils.rolling_mean_pad( + data=npi_val, window=self.pdata[pn]["rolling_mean_windows"] + ) return p_reduced From a82596db80fe18ace12ee2d293733cf0d6307ed7 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 7 Aug 2024 15:54:28 -0400 Subject: [PATCH 29/31] Consolidated confuse testing helpers Combined `create_confuse_rootview_from_dict` and `create_confuse_subview_from_dict` into one `create_confuse_configview_from_dict` function in `gempyor.testing`. --- flepimop/gempyor_pkg/src/gempyor/testing.py | 49 ++++--------- ...est_create_confuse_configview_from_dict.py | 68 +++++++++++++++++++ .../test_create_confuse_rootview_from_dict.py | 32 --------- .../test_create_confuse_subview_from_dict.py | 33 --------- 4 files changed, 83 insertions(+), 99 deletions(-) create mode 100644 flepimop/gempyor_pkg/tests/testing/test_create_confuse_configview_from_dict.py delete mode 100644 flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py delete mode 100644 flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py diff --git a/flepimop/gempyor_pkg/src/gempyor/testing.py b/flepimop/gempyor_pkg/src/gempyor/testing.py index 56f92c3eb..742f8af85 100644 --- a/flepimop/gempyor_pkg/src/gempyor/testing.py +++ b/flepimop/gempyor_pkg/src/gempyor/testing.py @@ -7,8 +7,7 @@ __all__ = [ "change_directory_to_temp_directory", - "create_confuse_rootview_from_dict", - "create_confuse_subview_from_dict", + "create_confuse_configview_from_dict", "partials_are_similar", "sample_fits_distribution", ] @@ -44,15 +43,19 @@ def change_directory_to_temp_directory() -> Generator[None, None, None]: temp_dir.cleanup() -def create_confuse_rootview_from_dict(data: dict[str, Any]) -> confuse.RootView: +def create_confuse_configview_from_dict( + data: dict[str, Any], name: None | str = None +) -> confuse.ConfigView: """ - Create a RootView from a dictionary for unit testing confuse parameters. + Create a ConfigView from a dictionary for unit testing confuse parameters. Args: - data: The data to populate the confuse root view with. + data: The data to populate the confuse ConfigView with. + name: The name of the Subview being created or if is `None` a RootView is + created instead. Returns: - A confuse root view. + Either a confuse Subview or RootView depending on the value of `name`. Examples: >>> data = { @@ -61,7 +64,7 @@ def create_confuse_rootview_from_dict(data: dict[str, Any]) -> confuse.RootView: ... "alphabet": ["a", "b", "c"], ... "mapping": {"x": 1, "y": 2}, ... } - >>> rv = create_confuse_rootview_from_dict(data) + >>> rv = create_confuse_configview_from_dict(data) >>> rv >>> rv.keys() @@ -72,31 +75,7 @@ def create_confuse_rootview_from_dict(data: dict[str, Any]) -> confuse.RootView: True >>> rv.name 'root' - """ - return confuse.RootView([confuse.ConfigSource.of(data)]) - - -def create_confuse_subview_from_dict( - name: str, data: dict[str, Any] -) -> confuse.Subview: - """ - Create a Subview from a dictionary for unit testing confuse parameters. - - Args: - name: The name of the subview being created. - data: The data to populate the confuse subview with. - - Returns: - A confuse subview. - - Examples: - >>> data = { - ... "foo": "bar", - ... "fizz": 123, - ... "alphabet": ["a", "b", "c"], - ... "mapping": {"x": 1, "y": 2}, - ... } - >>> sv = create_confuse_subview_from_dict("params", data) + >>> sv = create_confuse_configview_from_dict(data, "params") >>> sv >>> sv.keys() @@ -108,8 +87,10 @@ def create_confuse_subview_from_dict( >>> sv.name 'params' """ - root_view = create_confuse_rootview_from_dict({name: data}) - return root_view[name] + data = {name: data} if name is not None else data + cv = confuse.RootView([confuse.ConfigSource.of(data)]) + cv = cv[name] if name is not None else cv + return cv def partials_are_similar( diff --git a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_configview_from_dict.py b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_configview_from_dict.py new file mode 100644 index 000000000..031741abd --- /dev/null +++ b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_configview_from_dict.py @@ -0,0 +1,68 @@ +from datetime import date +from typing import Any + +import confuse +import pytest + +from gempyor.testing import create_confuse_configview_from_dict + + +class TestCreateConfuseConfigviewFromDict: + @pytest.mark.parametrize( + "name,data", + [ + (None, {}), + ("nil", {}), + (None, {"foo": "bar"}), + ("basic", {"foo": "bar"}), + (None, {"a": "b", "c": 1}), + ("small", {"a": "b", "c": 1}), + ( + None, + { + "alphabet": ["a", "b", "c", "d", "e"], + "integers": [1, 2, 3, 4, 5], + "floats": [1.2, 2.3, 3.4, 4.5, 5.6], + }, + ), + ( + "big", + { + "alphabet": ["a", "b", "c", "d", "e"], + "integers": [1, 2, 3, 4, 5], + "floats": [1.2, 2.3, 3.4, 4.5, 5.6], + }, + ), + (None, {"as_of_date": date(2024, 1, 1)}), + ("date_data_type", {"as_of_date": date(2024, 1, 1)}), + ( + None, + { + "foo": "bar", + "fizz": 123, + "alphabet": ["a", "b", "c"], + "mapping": {"x": 1, "y": 2}, + }, + ), + ( + "root", + { + "foo": "bar", + "fizz": 123, + "alphabet": ["a", "b", "c"], + "mapping": {"x": 1, "y": 2}, + }, + ), + ], + ) + def test_output_validation(self, name: str, data: dict[str, Any]) -> None: + view = create_confuse_configview_from_dict(data, name=name) + assert isinstance(view, confuse.ConfigView) + assert ( + isinstance(view, confuse.RootView) + if name is None + else isinstance(view, confuse.Subview) + ) + assert view == view.root() if name is None else view != view.root() + assert view.name == "root" if name is None else view.name == name + assert view.get() == data diff --git a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py deleted file mode 100644 index 8a889fa6a..000000000 --- a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_rootview_from_dict.py +++ /dev/null @@ -1,32 +0,0 @@ -from datetime import date -from typing import Any - -import confuse -import pytest - -from gempyor.testing import create_confuse_rootview_from_dict - - -class TestCreateConfuseRootviewFromDict: - @pytest.mark.parametrize( - "data", - [ - ({}), - ({"foo": "bar"}), - ({"a": "b", "c": 1}), - ( - { - "alphabet": ["a", "b", "c", "d", "e"], - "integers": [1, 2, 3, 4, 5], - "floats": [1.2, 2.3, 3.4, 4.5, 5.6], - } - ), - ({"as_of_date": date(2024, 1, 1)}), - ], - ) - def test_output_validation(self, data: dict[str, Any]) -> None: - root_view = create_confuse_rootview_from_dict(data) - assert isinstance(root_view, confuse.RootView) - assert root_view == root_view.root() - assert root_view.name == "root" - assert root_view.get() == data diff --git a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py b/flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py deleted file mode 100644 index 20de677a3..000000000 --- a/flepimop/gempyor_pkg/tests/testing/test_create_confuse_subview_from_dict.py +++ /dev/null @@ -1,33 +0,0 @@ -from datetime import date -from typing import Any - -import confuse -import pytest - -from gempyor.testing import create_confuse_subview_from_dict - - -class TestCreateConfuseSubviewFromDict: - @pytest.mark.parametrize( - "name,data", - [ - ("nil", {}), - ("basic", {"foo": "bar"}), - ("small", {"a": "b", "c": 1}), - ( - "big", - { - "alphabet": ["a", "b", "c", "d", "e"], - "integers": [1, 2, 3, 4, 5], - "floats": [1.2, 2.3, 3.4, 4.5, 5.6], - }, - ), - ("date_data_type", {"as_of_date": date(2024, 1, 1)}), - ], - ) - def test_output_validation(self, name: str, data: dict[str, Any]) -> None: - root_view = create_confuse_subview_from_dict(name, data) - assert isinstance(root_view, confuse.Subview) - assert root_view != root_view.root() - assert root_view.name == name - assert root_view.get() == data From 2ec96957b72bb272d052e8ab87b3666b11e995f1 Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 7 Aug 2024 15:59:25 -0400 Subject: [PATCH 30/31] Update `Parameters` unit tests to use new helper Updated unit tests for `gempyor.parameters.Parameters` to use the new `create_confuse_configview_from_dict` helper instead of the two prior versions of this function. --- .../gempyor_pkg/tests/parameters/test_parameters_class.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py index c74a19015..4bfb86fd2 100644 --- a/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py +++ b/flepimop/gempyor_pkg/tests/parameters/test_parameters_class.py @@ -12,7 +12,7 @@ from gempyor.parameters import Parameters from gempyor.testing import ( - create_confuse_subview_from_dict, + create_confuse_configview_from_dict, partials_are_similar, sample_fits_distribution, ) @@ -38,8 +38,8 @@ def __init__( def create_confuse_subview(self) -> confuse.Subview: if self._confuse_subview is None: - self._confuse_subview = create_confuse_subview_from_dict( - "parameters", self.config + self._confuse_subview = create_confuse_configview_from_dict( + self.config, name="parameters" ) return self._confuse_subview From 5b4d062002081ccb817de366cc1e721d2d066cfe Mon Sep 17 00:00:00 2001 From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:44:02 -0400 Subject: [PATCH 31/31] Added brief desc of example for testing util Added a brief description to the example for the `create_confuse_configview_from_dict` function that also shows the corresponding yaml that is represented by the example. --- flepimop/gempyor_pkg/src/gempyor/testing.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/flepimop/gempyor_pkg/src/gempyor/testing.py b/flepimop/gempyor_pkg/src/gempyor/testing.py index 742f8af85..8e7e4d3c2 100644 --- a/flepimop/gempyor_pkg/src/gempyor/testing.py +++ b/flepimop/gempyor_pkg/src/gempyor/testing.py @@ -58,6 +58,16 @@ def create_confuse_configview_from_dict( Either a confuse Subview or RootView depending on the value of `name`. Examples: + This example gives a brief demonstration of how to represent this yaml: + ```yaml + foo: bar + fizz: 123 + alphabet: [a, b, c] + mapping: + x: 1 + y: 2 + ``` + with this function as a python dict for unit testing purposes. >>> data = { ... "foo": "bar", ... "fizz": 123,