Skip to content

Commit

Permalink
Merge pull request #277 from HopkinsIDD/unit-test-gempyor-parameters
Browse files Browse the repository at this point in the history
Document/Unit Test `gempyor.parameters`
  • Loading branch information
TimothyWillard authored Sep 13, 2024
2 parents 178c730 + 523de80 commit ed29ca8
Show file tree
Hide file tree
Showing 8 changed files with 1,515 additions and 61 deletions.
259 changes: 201 additions & 58 deletions flepimop/gempyor_pkg/src/gempyor/parameters.py

Large diffs are not rendered by default.

191 changes: 191 additions & 0 deletions flepimop/gempyor_pkg/src/gempyor/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,21 @@
the optional test dependencies must be installed.
"""

__all__ = [
"change_directory_to_temp_directory",
"create_confuse_configview_from_dict",
"partials_are_similar",
"sample_fits_distribution",
]

from collections.abc import Generator
import functools
import os
from tempfile import TemporaryDirectory
from typing import Any, Literal

import confuse
import numpy as np
import pytest


Expand All @@ -30,3 +41,183 @@ def change_directory_to_temp_directory() -> Generator[None, None, None]:
yield
os.chdir(current_dir)
temp_dir.cleanup()


def create_confuse_configview_from_dict(
data: dict[str, Any], name: None | str = None
) -> confuse.ConfigView:
"""
Create a ConfigView from a dictionary for unit testing confuse parameters.
Args:
data: The data to populate the confuse ConfigView with.
name: The name of the Subview being created or if is `None` a RootView is
created instead.
Returns:
Either a confuse Subview or RootView depending on the value of `name`.
Examples:
This example gives a brief demonstration of how to represent this yaml:
```yaml
foo: bar
fizz: 123
alphabet: [a, b, c]
mapping:
x: 1
y: 2
```
with this function as a python dict for unit testing purposes.
>>> data = {
... "foo": "bar",
... "fizz": 123,
... "alphabet": ["a", "b", "c"],
... "mapping": {"x": 1, "y": 2},
... }
>>> rv = create_confuse_configview_from_dict(data)
>>> rv
<RootView: root>
>>> rv.keys()
['foo', 'fizz', 'alphabet', 'mapping']
>>> rv.get()
{'foo': 'bar', 'fizz': 123, 'alphabet': ['a', 'b', 'c'], 'mapping': {'x': 1, 'y': 2}}
>>> rv == rv.root()
True
>>> rv.name
'root'
>>> sv = create_confuse_configview_from_dict(data, "params")
>>> sv
<Subview: params>
>>> sv.keys()
['foo', 'fizz', 'alphabet', 'mapping']
>>> sv.get()
{'foo': 'bar', 'fizz': 123, 'alphabet': ['a', 'b', 'c'], 'mapping': {'x': 1, 'y': 2}}
>>> sv == sv.root()
False
>>> sv.name
'params'
"""
data = {name: data} if name is not None else data
cv = confuse.RootView([confuse.ConfigSource.of(data)])
cv = cv[name] if name is not None else cv
return cv


def partials_are_similar(
f: functools.partial,
g: functools.partial,
check_func: bool = True,
check_args: bool = True,
check_keywords: bool = True,
) -> bool:
"""
Check if two partials are 'similar' enough to be equal.
For most unit testing purposes python's default `__eq__` method does not have the
desired behavior for `functools.partial`. For unit testing purposes it is usually
sufficient that two partials are similar enough. See python/cpython#47814 for more
details on why `__eq__` is tricky for `functools.partial`.
Args:
f: A partial function to test.
g: A partial function to test.
check_func: If the `func` attributes of `f` and `g` should be checked for
equality.
check_args: If the `args` attributes of `f` and `g` should be checked for
equality.
check_keywords: If the `keywords` attributes of `f` and `g` should be checked
for equality.
Returns:
A boolean indicating if `f` and `g` are similar.
Examples:
>>> from functools import partial
>>> a = lambda x, y: x + y
>>> b = partial(a, 1)
>>> c = partial(a, 1.)
>>> b == c
False
>>> partials_are_similar(b, c)
True
"""
if check_func and f.func != g.func:
return False
elif check_args and f.args != g.args:
return False
elif check_keywords and f.keywords != g.keywords:
return False
return True


def sample_fits_distribution(
sample: float | int,
distribution: Literal[
"fixed", "uniform", "poisson", "binomial", "truncnorm", "lognorm"
],
**kwargs: dict[str, Any],
) -> bool:
"""
Test if a sample fits a distribution with a given set of parameters.
This function tests if the given `sample` could possibly be drawn from the
distribution given with its parameters, but it does not test if it could reasonably
be drawn from that distribution.
Args:
sample: The value to test.
distribution: The name of the distribution to test against.
**kwargs: Further arguments to specify the parameters of a distribution.
Returns:
A boolean indicating if the sample given could be from the distribution.
See Also:
gempyor.utils.random_distribution_sampler
Examples:
>>> sample_fits_distribution(0.0, "fixed", value=0.0)
True
>>> sample_fits_distribution(0.0, "fixed", value=0.5)
False
>>> sample_fits_distribution(0.5, "poisson", lam=3.0)
False
>>> sample_fits_distribution(
... -3.5, "truncnorm", a=-5.5, b=3.4, mean=-1.4, sd=1.1
... )
True
>>> sample_fits_distribution(100000000, "lognorm", meanlog=1.0, sdlog=1.0)
True
"""
# Poisson and binomial only have support on a subset of the integers
if distribution in ["poisson", "binomial"] and not (
isinstance(sample, int) or (isinstance(sample, float) and sample.is_integer())
):
return False
# Now check distribution constraints
if distribution == "fixed":
return bool(np.isclose(sample, kwargs.get("value")))
elif distribution == "uniform":
# Uniform is on [low,high), but want uniform to match fixed when low == high.
return bool(
(
np.isclose(kwargs.get("high"), kwargs.get("low"))
and np.isclose(sample, kwargs.get("low"))
)
or (
np.greater_equal(sample, kwargs.get("low"))
and np.less(sample, kwargs.get("high"))
)
)
elif distribution == "poisson":
return bool(np.greater_equal(sample, 0.0))
elif distribution == "binomial":
return bool(
np.greater_equal(sample, 0.0) and np.less_equal(sample, kwargs.get("n"))
)
elif distribution == "truncnorm":
return bool(
np.greater(sample, kwargs.get("a")) and np.less(sample, kwargs.get("b"))
)
elif distribution == "lognorm":
return bool(np.greater(sample, 0.0))
80 changes: 77 additions & 3 deletions flepimop/gempyor_pkg/src/gempyor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import shutil
import subprocess
import time
import typing
from typing import List, Dict, Literal
from typing import Any, Callable, Literal

import confuse
import numpy as np
Expand Down Expand Up @@ -160,7 +159,7 @@ def wrapper(*args, **kwargs):
return decorator


def search_and_import_plugins_class(plugin_file_path: str, path_prefix: str, class_name: str, **kwargs: dict[str, typing.Any]) -> typing.Any:
def search_and_import_plugins_class(plugin_file_path: str, path_prefix: str, class_name: str, **kwargs: dict[str, Any]) -> Any:
"""
Function serving to create a class that finds and imports the necessary modules.
Expand Down Expand Up @@ -413,6 +412,81 @@ def get_log_normal(
return scipy.stats.lognorm(s=sdlog, scale=np.exp(meanlog), loc=0)


def random_distribution_sampler(
distribution: Literal[
"fixed", "uniform", "poisson", "binomial", "truncnorm", "lognorm"
],
**kwargs: dict[str, Any]
) -> Callable[[], float | int]:
"""
Create function to sample from a random distribution.
Args:
distribution: The type of distribution to generate a sampling function for.
**kwargs: Further parameters that are passed to the underlying function for the
given distribution.
Notes:
The further args expected by each distribution type are:
- fixed: value,
- uniform: low, high,
- poisson: lam,
- binomial: n, p,
- truncnorm: mean, sd, a, b,
- lognorm: meanlog, sdlog.
Returns:
A function that can be called to sample from that distribution.
Raises:
ValueError: If `distribution` is 'binomial' the given `p` must be in (0,1).
NotImplementedError: If `distribution` is not one of the type hinted options.
Examples:
>>> import numpy as np
>>> np.random.seed(123)
>>> uniform_sampler = random_distribution_sampler("uniform", low=0.0, high=3.0)
>>> uniform_sampler()
2.089407556793585
>>> uniform_sampler()
0.8584180048511384
"""
if distribution == "fixed":
# Fixed value is the same as uniform on [a, a)
return functools.partial(
np.random.uniform,
kwargs.get("value"),
kwargs.get("value"),
)
elif distribution == "uniform":
# Uniform on [low, high)
return functools.partial(
np.random.uniform,
kwargs.get("low"),
kwargs.get("high"),
)
elif distribution == "poisson":
# Poisson with mean lambda
return functools.partial(np.random.poisson, kwargs.get("lam"))
elif distribution == "binomial":
p = kwargs.get("p")
if not (0 < p < 1):
raise ValueError(f"p value {p} is out of range [0,1]")
return functools.partial(np.random.binomial, kwargs.get("n"), p)
elif distribution == "truncnorm":
# Truncated normal with mean, sd on interval [a, b]
return get_truncated_normal(
mean=kwargs.get("mean"),
sd=kwargs.get("sd"),
a=kwargs.get("a"),
b=kwargs.get("b"),
).rvs
elif distribution == "lognorm":
# Lognormal distribution with meanlog, sdlog
return get_log_normal(kwargs.get("meanlog"), kwargs.get("sdlog")).rvs
raise NotImplementedError(f"unknown distribution [got: {distribution}]")


@add_method(confuse.ConfigView)
def as_random_distribution(self):
"""
Expand Down
Loading

0 comments on commit ed29ca8

Please sign in to comment.