From 6e46c6599c98a00fc64b24f902f692c18248e49a Mon Sep 17 00:00:00 2001
From: Michael Osthege <m.osthege@fz-juelich.de>
Date: Sun, 14 Nov 2021 01:34:49 +0100
Subject: [PATCH] Add `DistributionMixin` and some corresponding noise models

---
 calibr8/__init__.py      |  8 +++++
 calibr8/contrib/noise.py | 64 +++++++++++++++++++++++++++++++++++
 calibr8/core.py          | 37 +++++++++++++++++++-
 calibr8/tests.py         | 73 +++++++++++++++++++++++++++++++++++++---
 4 files changed, 176 insertions(+), 6 deletions(-)
 create mode 100644 calibr8/contrib/noise.py

diff --git a/calibr8/__init__.py b/calibr8/__init__.py
index af6a14a..298e1df 100644
--- a/calibr8/__init__.py
+++ b/calibr8/__init__.py
@@ -1,3 +1,10 @@
+
+from .contrib.noise import (
+    NormalNoise,
+    LaplaceNoise,
+    LogNormalNoise,
+    StudentTNoise,
+)
 from .contrib.base import (
     BaseAsymmetricLogisticT,
     BaseLogIndependentAsymmetricLogisticT,
@@ -6,6 +13,7 @@
 )
 from .core import (
     CalibrationModel,
+    DistributionMixin,
     InferenceResult,
     UnivariateInferenceResult,
     NumericPosterior,
diff --git a/calibr8/contrib/noise.py b/calibr8/contrib/noise.py
new file mode 100644
index 0000000..9df722a
--- /dev/null
+++ b/calibr8/contrib/noise.py
@@ -0,0 +1,64 @@
+import numpy
+import scipy.stats
+
+from .. core import DistributionMixin
+from .. utils import HAS_PYMC, pm
+
+
+class NormalNoise(DistributionMixin):
+    """Normal noise, predicted in terms of mean and standard deviation."""
+    scipy_dist = scipy.stats.norm
+    pymc_dist = pm.Normal if HAS_PYMC else None
+
+    @staticmethod
+    def to_scipy(*params):
+        return dict(loc=params[0], scale=params[1])
+
+    @staticmethod
+    def to_pymc(*params):
+        return dict(mu=params[0], sigma=params[1])
+
+
+class LaplaceNoise(DistributionMixin):
+    """Normal noise, predicted in terms of mean and scale."""
+    scipy_dist = scipy.stats.laplace
+    pymc_dist = pm.Laplace if HAS_PYMC else None
+
+    @staticmethod
+    def to_scipy(*params):
+        return dict(loc=params[0], scale=params[1])
+
+    @staticmethod
+    def to_pymc(*params):
+        return dict(mu=params[0], b=params[1])
+
+
+class LogNormalNoise(DistributionMixin):
+    """Log-Normal noise, predicted in logarithmic mean and standard deviation.
+    ⚠ This corresponds to the NumPy/Aesara/PyMC parametrization!
+    """
+    scipy_dist = scipy.stats.lognorm
+    pymc_dist = pm.Lognormal if HAS_PYMC else None
+
+    @staticmethod
+    def to_scipy(*params):
+        # SciPy wants linear scale mean and log scale standard deviation!
+        return dict(scale=numpy.exp(params[0]), s=params[1])
+
+    @staticmethod
+    def to_pymc(*params):
+        return dict(mu=params[0], sigma=params[1])
+
+
+class StudentTNoise(DistributionMixin):
+    """Student-t noise, predicted in terms of mean, scale and degree of freedom."""
+    scipy_dist = scipy.stats.t
+    pymc_dist = pm.StudentT if HAS_PYMC else None
+
+    @staticmethod
+    def to_scipy(*params):
+        return dict(loc=params[0], scale=params[1], df=params[2])
+
+    @staticmethod
+    def to_pymc(*params):
+        return dict(mu=params[0], sigma=params[1], nu=params[2])
diff --git a/calibr8/core.py b/calibr8/core.py
index 0b4d7ad..9c9102d 100644
--- a/calibr8/core.py
+++ b/calibr8/core.py
@@ -369,7 +369,34 @@ def _infer_univariate_independent(
     )
 
 
-class CalibrationModel:
+class DistributionMixin:
+    """Maps the values returned by `CalibrationModel.predict_dependent`
+    to a SciPy distribution and its parameters, and optionally also to
+    a PyMC distribution and its parameters.
+    """
+    scipy_dist = None
+    pymc_dist = None
+
+    def to_scipy(*args) -> dict:
+        raise NotImplementedError("This model does not implement a mapping to SciPy distribution parameters.")
+
+    def to_pymc(*args) -> dict:
+        raise NotImplementedError("This model does not implement a mapping to PyMC distribution parameters.")
+
+
+def _inherits_noisemodel(cls):
+    """Determines if cls is a sub-type of DistributionMixin that's not DistributionMixin or a calibration model."""
+    for m in cls.__mro__:
+        if (
+            issubclass(m, DistributionMixin)
+            and m is not DistributionMixin
+            and not issubclass(m, CalibrationModel)
+        ):
+            return True
+    return False
+
+
+class CalibrationModel(DistributionMixin):
     """A parent class providing the general structure of a calibration model."""
 
     def __init__(self, independent_key:str, dependent_key:str, *, theta_names:typing.Tuple[str], ndim=1):
@@ -388,6 +415,14 @@ def __init__(self, independent_key:str, dependent_key:str, *, theta_names:typing
             Most calibrations are univariate (ndim=1).
             Multivariate calibrations (ndim > 1) may need to override the `infer_independent` implementation.
         """
+        if not _inherits_noisemodel(type(self)):
+            warnings.warn(
+                "This model does not implement a noise model yet."
+                "\nAdd a noise model mixin to your class definition. For example:"
+                "\n`class MyModel(CalibrationModel, LaplaceNoise)`",
+                DeprecationWarning,
+                stacklevel=2
+            )
         # make sure that the inheriting type has no required constructor (kw)args
         args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations = inspect.getfullargspec(type(self).__init__)
         n_defaults = 0 if not defaults else len(defaults)
diff --git a/calibr8/tests.py b/calibr8/tests.py
index d27cd69..bffe133 100644
--- a/calibr8/tests.py
+++ b/calibr8/tests.py
@@ -37,7 +37,7 @@
 dir_testfiles = pathlib.Path(pathlib.Path(__file__).absolute().parent, 'testfiles')
 
 
-class _TestModel(calibr8.CalibrationModel):
+class _TestModel(calibr8.CalibrationModel, calibr8.NormalNoise):
     def __init__(self, independent_key=None, dependent_key=None, theta_names=None):
         if theta_names is None:
             theta_names = tuple('a,b,c'.split(','))
@@ -85,12 +85,50 @@ def test_warnings(self):
         pass
 
 
-class TestBasicCalibrationModel:
+class TestNoiseModels:
+    def test_base_class(self):
+        assert calibr8.DistributionMixin.scipy_dist is None
+        assert calibr8.DistributionMixin.pymc_dist is None
+        with pytest.raises(NotImplementedError, match="mapping to SciPy"):
+            assert calibr8.DistributionMixin.to_scipy()
+        with pytest.raises(NotImplementedError, match="mapping to PyMC"):
+            assert calibr8.DistributionMixin.to_pymc()
+        pass
+
+    @pytest.mark.skipif(not HAS_PYMC, reason='requires PyMC')
+    @pytest.mark.parametrize(
+        "cls,a,b,params", [
+            (calibr8.NormalNoise, -1, 1, (0.2, 1.2)),
+            (calibr8.LaplaceNoise, -1, 1, (-0.2, 0.8)),
+            (calibr8.LogNormalNoise, 0.1, 2, (-0.1, 0.2)),
+            (calibr8.StudentTNoise, -2, 10, (1.5, 0.3, 4)),
+        ]
+    )
+    def test_parametrization_equivalence(self, cls: calibr8.DistributionMixin, a, b, params):
+        """Validates that SciPy and PyMC parametrization give the same distribution."""
+        x = numpy.linspace(a, b, 7)
+        kwargs_scipy = cls.to_scipy(*params)
+        kwargs_pymc = cls.to_pymc(*params)
+
+        result_scipy = cls.scipy_dist.logpdf(x=x, **kwargs_scipy)
+
+        rv = cls.pymc_dist.dist(**kwargs_pymc)
+        if not hasattr(pm, "logp"):
+            # PyMC v3 syntax
+            result_pymc = rv.logp(x).eval()
+        else:
+            result_pymc = [pm.logp(rv, xi).eval() for xi in x]
+
+        # The resulting log-PDF evaluations should be really close
+        numpy.testing.assert_allclose(result_scipy, result_pymc)
+        pass
+
+
+class TestBaseCalibrationModel:
     def test_init(self):
         em = _TestModel('I', 'D', theta_names=tuple('c,d,e'.split(',')))
         assert em.independent_key == 'I'
         assert em.dependent_key == 'D'
-        print(em.theta_names)
         assert em.theta_names == ('c', 'd', 'e')
         assert em.theta_bounds is None
         assert em.theta_guess is None
@@ -99,6 +137,31 @@ def test_init(self):
         assert em.cal_independent is None
         assert em.cal_dependent is None
         pass
+
+    def test_init_requires_noisemodel(self):
+        assert issubclass(calibr8.CalibrationModel, calibr8.DistributionMixin)
+
+        class _InvalidModel(calibr8.CalibrationModel):
+            def __init__(self):
+                super().__init__("A", "B", theta_names=tuple('abc'), ndim=1)
+
+        assert not calibr8.core._inherits_noisemodel(calibr8.DistributionMixin)
+        assert not calibr8.core._inherits_noisemodel(calibr8.CalibrationModel)
+        assert calibr8.core._inherits_noisemodel(calibr8.LaplaceNoise)
+        assert not calibr8.core._inherits_noisemodel(_InvalidModel)
+        assert calibr8.core._inherits_noisemodel(_TestModel)
+
+        # Check that a warning is raised when the model class does not inherit a DistributionMixin _sub_class
+        with pytest.warns(DeprecationWarning, match="does not implement a noise model"):
+            _InvalidModel()
+
+        # Check that no warning is raised when the inheritance is correct
+        with pytest.warns(None) as record:
+            _TestModel()
+        assert len(record) == 0
+
+        assert issubclass(_TestModel, calibr8.DistributionMixin)
+        pass
     
     def test_constructor_signature_check(self):
         class EM_OK(calibr8.CalibrationModel):
@@ -109,13 +172,13 @@ def __init__(self, arg1=1, *, kwonly=2, kwonlydefault=4):
         class EM_args(calibr8.CalibrationModel):
             def __init__(self, arg1):
                 super().__init__('I', 'D', theta_names=tuple('abc'))
-        with pytest.raises(TypeError):
+        with pytest.raises(TypeError, match=r"constructor must not have any required \(kw\)arguments"):
             EM_args(arg1=3)
 
         class EM_kwargs(calibr8.CalibrationModel):
             def __init__(self, *, kwonly, kwonlydefault=4):
                 super().__init__('I', 'D', theta_names=tuple('abc'))
-        with pytest.raises(TypeError):
+        with pytest.raises(TypeError, match=r"constructor must not have any required \(kw\)arguments"):
             EM_kwargs(kwonly=3)
         
         pass