From 6e46c6599c98a00fc64b24f902f692c18248e49a Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Sun, 14 Nov 2021 01:34:49 +0100 Subject: [PATCH] Add `DistributionMixin` and some corresponding noise models --- calibr8/__init__.py | 8 +++++ calibr8/contrib/noise.py | 64 +++++++++++++++++++++++++++++++++++ calibr8/core.py | 37 +++++++++++++++++++- calibr8/tests.py | 73 +++++++++++++++++++++++++++++++++++++--- 4 files changed, 176 insertions(+), 6 deletions(-) create mode 100644 calibr8/contrib/noise.py diff --git a/calibr8/__init__.py b/calibr8/__init__.py index af6a14a..298e1df 100644 --- a/calibr8/__init__.py +++ b/calibr8/__init__.py @@ -1,3 +1,10 @@ + +from .contrib.noise import ( + NormalNoise, + LaplaceNoise, + LogNormalNoise, + StudentTNoise, +) from .contrib.base import ( BaseAsymmetricLogisticT, BaseLogIndependentAsymmetricLogisticT, @@ -6,6 +13,7 @@ ) from .core import ( CalibrationModel, + DistributionMixin, InferenceResult, UnivariateInferenceResult, NumericPosterior, diff --git a/calibr8/contrib/noise.py b/calibr8/contrib/noise.py new file mode 100644 index 0000000..9df722a --- /dev/null +++ b/calibr8/contrib/noise.py @@ -0,0 +1,64 @@ +import numpy +import scipy.stats + +from .. core import DistributionMixin +from .. utils import HAS_PYMC, pm + + +class NormalNoise(DistributionMixin): + """Normal noise, predicted in terms of mean and standard deviation.""" + scipy_dist = scipy.stats.norm + pymc_dist = pm.Normal if HAS_PYMC else None + + @staticmethod + def to_scipy(*params): + return dict(loc=params[0], scale=params[1]) + + @staticmethod + def to_pymc(*params): + return dict(mu=params[0], sigma=params[1]) + + +class LaplaceNoise(DistributionMixin): + """Normal noise, predicted in terms of mean and scale.""" + scipy_dist = scipy.stats.laplace + pymc_dist = pm.Laplace if HAS_PYMC else None + + @staticmethod + def to_scipy(*params): + return dict(loc=params[0], scale=params[1]) + + @staticmethod + def to_pymc(*params): + return dict(mu=params[0], b=params[1]) + + +class LogNormalNoise(DistributionMixin): + """Log-Normal noise, predicted in logarithmic mean and standard deviation. + ⚠ This corresponds to the NumPy/Aesara/PyMC parametrization! + """ + scipy_dist = scipy.stats.lognorm + pymc_dist = pm.Lognormal if HAS_PYMC else None + + @staticmethod + def to_scipy(*params): + # SciPy wants linear scale mean and log scale standard deviation! + return dict(scale=numpy.exp(params[0]), s=params[1]) + + @staticmethod + def to_pymc(*params): + return dict(mu=params[0], sigma=params[1]) + + +class StudentTNoise(DistributionMixin): + """Student-t noise, predicted in terms of mean, scale and degree of freedom.""" + scipy_dist = scipy.stats.t + pymc_dist = pm.StudentT if HAS_PYMC else None + + @staticmethod + def to_scipy(*params): + return dict(loc=params[0], scale=params[1], df=params[2]) + + @staticmethod + def to_pymc(*params): + return dict(mu=params[0], sigma=params[1], nu=params[2]) diff --git a/calibr8/core.py b/calibr8/core.py index 0b4d7ad..9c9102d 100644 --- a/calibr8/core.py +++ b/calibr8/core.py @@ -369,7 +369,34 @@ def _infer_univariate_independent( ) -class CalibrationModel: +class DistributionMixin: + """Maps the values returned by `CalibrationModel.predict_dependent` + to a SciPy distribution and its parameters, and optionally also to + a PyMC distribution and its parameters. + """ + scipy_dist = None + pymc_dist = None + + def to_scipy(*args) -> dict: + raise NotImplementedError("This model does not implement a mapping to SciPy distribution parameters.") + + def to_pymc(*args) -> dict: + raise NotImplementedError("This model does not implement a mapping to PyMC distribution parameters.") + + +def _inherits_noisemodel(cls): + """Determines if cls is a sub-type of DistributionMixin that's not DistributionMixin or a calibration model.""" + for m in cls.__mro__: + if ( + issubclass(m, DistributionMixin) + and m is not DistributionMixin + and not issubclass(m, CalibrationModel) + ): + return True + return False + + +class CalibrationModel(DistributionMixin): """A parent class providing the general structure of a calibration model.""" def __init__(self, independent_key:str, dependent_key:str, *, theta_names:typing.Tuple[str], ndim=1): @@ -388,6 +415,14 @@ def __init__(self, independent_key:str, dependent_key:str, *, theta_names:typing Most calibrations are univariate (ndim=1). Multivariate calibrations (ndim > 1) may need to override the `infer_independent` implementation. """ + if not _inherits_noisemodel(type(self)): + warnings.warn( + "This model does not implement a noise model yet." + "\nAdd a noise model mixin to your class definition. For example:" + "\n`class MyModel(CalibrationModel, LaplaceNoise)`", + DeprecationWarning, + stacklevel=2 + ) # make sure that the inheriting type has no required constructor (kw)args args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations = inspect.getfullargspec(type(self).__init__) n_defaults = 0 if not defaults else len(defaults) diff --git a/calibr8/tests.py b/calibr8/tests.py index d27cd69..bffe133 100644 --- a/calibr8/tests.py +++ b/calibr8/tests.py @@ -37,7 +37,7 @@ dir_testfiles = pathlib.Path(pathlib.Path(__file__).absolute().parent, 'testfiles') -class _TestModel(calibr8.CalibrationModel): +class _TestModel(calibr8.CalibrationModel, calibr8.NormalNoise): def __init__(self, independent_key=None, dependent_key=None, theta_names=None): if theta_names is None: theta_names = tuple('a,b,c'.split(',')) @@ -85,12 +85,50 @@ def test_warnings(self): pass -class TestBasicCalibrationModel: +class TestNoiseModels: + def test_base_class(self): + assert calibr8.DistributionMixin.scipy_dist is None + assert calibr8.DistributionMixin.pymc_dist is None + with pytest.raises(NotImplementedError, match="mapping to SciPy"): + assert calibr8.DistributionMixin.to_scipy() + with pytest.raises(NotImplementedError, match="mapping to PyMC"): + assert calibr8.DistributionMixin.to_pymc() + pass + + @pytest.mark.skipif(not HAS_PYMC, reason='requires PyMC') + @pytest.mark.parametrize( + "cls,a,b,params", [ + (calibr8.NormalNoise, -1, 1, (0.2, 1.2)), + (calibr8.LaplaceNoise, -1, 1, (-0.2, 0.8)), + (calibr8.LogNormalNoise, 0.1, 2, (-0.1, 0.2)), + (calibr8.StudentTNoise, -2, 10, (1.5, 0.3, 4)), + ] + ) + def test_parametrization_equivalence(self, cls: calibr8.DistributionMixin, a, b, params): + """Validates that SciPy and PyMC parametrization give the same distribution.""" + x = numpy.linspace(a, b, 7) + kwargs_scipy = cls.to_scipy(*params) + kwargs_pymc = cls.to_pymc(*params) + + result_scipy = cls.scipy_dist.logpdf(x=x, **kwargs_scipy) + + rv = cls.pymc_dist.dist(**kwargs_pymc) + if not hasattr(pm, "logp"): + # PyMC v3 syntax + result_pymc = rv.logp(x).eval() + else: + result_pymc = [pm.logp(rv, xi).eval() for xi in x] + + # The resulting log-PDF evaluations should be really close + numpy.testing.assert_allclose(result_scipy, result_pymc) + pass + + +class TestBaseCalibrationModel: def test_init(self): em = _TestModel('I', 'D', theta_names=tuple('c,d,e'.split(','))) assert em.independent_key == 'I' assert em.dependent_key == 'D' - print(em.theta_names) assert em.theta_names == ('c', 'd', 'e') assert em.theta_bounds is None assert em.theta_guess is None @@ -99,6 +137,31 @@ def test_init(self): assert em.cal_independent is None assert em.cal_dependent is None pass + + def test_init_requires_noisemodel(self): + assert issubclass(calibr8.CalibrationModel, calibr8.DistributionMixin) + + class _InvalidModel(calibr8.CalibrationModel): + def __init__(self): + super().__init__("A", "B", theta_names=tuple('abc'), ndim=1) + + assert not calibr8.core._inherits_noisemodel(calibr8.DistributionMixin) + assert not calibr8.core._inherits_noisemodel(calibr8.CalibrationModel) + assert calibr8.core._inherits_noisemodel(calibr8.LaplaceNoise) + assert not calibr8.core._inherits_noisemodel(_InvalidModel) + assert calibr8.core._inherits_noisemodel(_TestModel) + + # Check that a warning is raised when the model class does not inherit a DistributionMixin _sub_class + with pytest.warns(DeprecationWarning, match="does not implement a noise model"): + _InvalidModel() + + # Check that no warning is raised when the inheritance is correct + with pytest.warns(None) as record: + _TestModel() + assert len(record) == 0 + + assert issubclass(_TestModel, calibr8.DistributionMixin) + pass def test_constructor_signature_check(self): class EM_OK(calibr8.CalibrationModel): @@ -109,13 +172,13 @@ def __init__(self, arg1=1, *, kwonly=2, kwonlydefault=4): class EM_args(calibr8.CalibrationModel): def __init__(self, arg1): super().__init__('I', 'D', theta_names=tuple('abc')) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=r"constructor must not have any required \(kw\)arguments"): EM_args(arg1=3) class EM_kwargs(calibr8.CalibrationModel): def __init__(self, *, kwonly, kwonlydefault=4): super().__init__('I', 'D', theta_names=tuple('abc')) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=r"constructor must not have any required \(kw\)arguments"): EM_kwargs(kwonly=3) pass