Skip to content

Commit

Permalink
rename variables in EnsembleDistribution consstructor, begin to imple…
Browse files Browse the repository at this point in the history
…ment MSCABeta
  • Loading branch information
mbi6245 committed Oct 17, 2024
1 parent ef6e027 commit f68fd08
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 37 deletions.
87 changes: 80 additions & 7 deletions plots.ipynb

Large diffs are not rendered by default.

101 changes: 100 additions & 1 deletion src/ensemble/distributions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Tuple, Union

Expand All @@ -17,9 +19,17 @@ class Distribution(ABC):
"""

def __init__(self, mean: float = None, variance: float = None):
def __init__(
self,
mean: float = None,
variance: float = None,
lb: float = None,
ub: float = None,
):
self.mean = mean
self.variance = variance
self.lb = lb
self.ub = ub
# # some kind of dictionary with
# # key: the support (full real line, semi infinite, etc...)
# # value: function that gets called when distribution is initialized
Expand Down Expand Up @@ -256,6 +266,13 @@ def _create_scipy_dist(self) -> None:

# analytic sol
class Beta(Distribution):
# TODO: WANT TO BE ABLE TO PASS IN UPPER AND LOWER BOUNDS TO BE REFLECTED IN THE DIST
# EX: MEAN 6, VAR 0.2, LB 5, UB 10
# ADJ_MEAN = (MEAN - LB) / INTERVAL_WIDTH
# ADJ_VAR = VAR / INTERVAL_WIDTH
# INPUT ADJ MEAN & VAR INTO FUNCTION
# JUST GET RVS TO WORK FOR NOW, WHEN YOU TAKE A SAMPLE OF SIZE 100,
# JUST MULTIPLICATIVELY SCALE AND THEN LINERALY SHIFT THE DATA TO THE ORIGINAL BOUNDS
"""https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.beta.html#scipy.stats.beta"""

def support(self) -> Tuple[float, float]:
Expand All @@ -275,6 +292,87 @@ def _create_scipy_dist(self) -> None:
self._scipy_dist = stats.beta(a=alpha, b=beta)


class MSCABeta(Distribution):
def _create_scipy_dist(self) -> None:
self.width = self.ub - self.lb
adj_mean = (self.mean - self.lb) / self.width
adj_var = self.variance / self.width
self._scipy_dist = Beta(adj_mean, adj_var)

def support(self) -> Tuple[float, float]:
"""create tuple representing endpoints of support"""

def rvs(self, *args, **kwds):
"""defaults to scipy implementation for generating random variates
Returns
-------
np.ndarray
random variates from a given distribution/parameters
"""
return (self._scipy_dist.rvs(*args, **kwds) + self.lb) * self.width

def pdf(self, x: npt.ArrayLike) -> np.ndarray:
"""defaults to scipy implementation for probability density function
Parameters
----------
x : npt.ArrayLike
quantiles
Returns
-------
np.ndarray
PDF evaluated at quantile x
"""
return (self._scipy_dist.pdf(x) + self.lb) * self.width

# def cdf(self, q: npt.ArrayLike) -> np.ndarray:
# """defaults to scipy implementation for cumulative density function

# Parameters
# ----------
# q : npt.ArrayLike
# quantiles

# Returns
# -------
# np.ndarray
# CDF evaluated at quantile q
# """
# return self._scipy_dist.cdf(q)

# def ppf(self, p: npt.ArrayLike) -> np.ndarray:
# """defaults to scipy implementation for percent point function

# Parameters
# ----------
# p : npt.ArrayLike
# lower tail probability

# Returns
# -------
# np.ndarray
# PPF evaluated at lower tail probability p
# """
# return self._scipy_dist.ppf(p)

# def stats(self, moments: str) -> Union[float, Tuple[float, ...]]:
# """defaults to scipy implementation for obtaining moments

# Parameters
# ----------
# moments : str
# m for mean, v for variance, s for skewness, k for kurtosis

# Returns
# -------
# Union[float, Tuple[float, ...]]
# mean, variance, skewness, and/or kurtosis
# """
# return self._scipy_dist.stats(moments=moments)


distribution_dict = {
"exponential": Exponential,
"gamma": Gamma,
Expand All @@ -285,6 +383,7 @@ def _create_scipy_dist(self) -> None:
"lognormal": LogNormal,
"normal": Normal,
"beta": Beta,
"MSCAbeta": MSCABeta,
}


Expand Down
86 changes: 59 additions & 27 deletions src/ensemble/model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import json
from typing import List, Tuple, Union

Expand Down Expand Up @@ -33,8 +35,6 @@ class EnsembleDistribution:

def __init__(
self,
# distributions: List[str],
# weights: List[float],
named_weights: dict,
mean: float,
variance: float,
Expand Down Expand Up @@ -119,7 +119,9 @@ def pdf(self, x: npt.ArrayLike) -> np.ndarray:
"""
return sum(
weight * distribution.pdf(x)
for distribution, weight in zip(self.my_objs, self.weights)
for distribution, weight in zip(
self.fitted_distributions, self._weights
)
)

def cdf(self, q: npt.ArrayLike) -> np.ndarray:
Expand All @@ -138,7 +140,9 @@ def cdf(self, q: npt.ArrayLike) -> np.ndarray:
"""
return sum(
weight * distribution.cdf(q)
for distribution, weight in zip(self.my_objs, self.weights)
for distribution, weight in zip(
self.fitted_distributions, self._weights
)
)

def ppf(self, p: npt.ArrayLike) -> np.ndarray:
Expand Down Expand Up @@ -276,33 +280,61 @@ def to_json(self, file_path: str, appending: bool = False) -> None:
with open(file_path, "w") as outfile:
json.dump([distribution_summary], outfile)

@classmethod
def from_json(cls, file_path: str) -> List[EnsembleDistribution]:
"""deserializes JSON object into list of Ensemble Distribution objects
def from_json(file_path: str) -> List[EnsembleDistribution]:
"""deserializes JSON object into list of Ensemble Distribution objects
Parameters
----------
file_path : str
path to file that JSON object is stored in
Parameters
----------
file_path : str
path to file that JSON object is stored in
Returns
-------
List[EnsembleDistribution]
list of EnsembleDistribution objects
"""
with open(file_path, "r") as infile:
distribution_summaries = json.load(infile)

res = [None] * len(distribution_summaries)
for i in range(len(distribution_summaries)):
named_weights, mean, variance = (
distribution_summaries[i]["named_weights"],
distribution_summaries[i]["mean"],
distribution_summaries[i]["variance"],
)
res[i] = cls(named_weights, mean, variance)

return res

Returns
-------
List[EnsembleDistribution]
list of EnsembleDistribution objects
"""
with open(file_path, "r") as infile:
distribution_summaries = json.load(infile)

res = [None] * len(distribution_summaries)
for i in range(len(distribution_summaries)):
named_weights, mean, variance = (
distribution_summaries[i]["named_weights"],
distribution_summaries[i]["mean"],
distribution_summaries[i]["variance"],
)
res[i] = EnsembleDistribution(named_weights, mean, variance)

return res
# def from_json(file_path: str) -> List[EnsembleDistribution]:
# """deserializes JSON object into list of Ensemble Distribution objects

# Parameters
# ----------
# file_path : str
# path to file that JSON object is stored in

# Returns
# -------
# List[EnsembleDistribution]
# list of EnsembleDistribution objects
# """
# with open(file_path, "r") as infile:
# distribution_summaries = json.load(infile)

# res = [None] * len(distribution_summaries)
# for i in range(len(distribution_summaries)):
# named_weights, mean, variance = (
# distribution_summaries[i]["named_weights"],
# distribution_summaries[i]["mean"],
# distribution_summaries[i]["variance"],
# )
# res[i] = EnsembleDistribution(named_weights, mean, variance)

# return res


class EnsembleResult:
Expand Down
1 change: 1 addition & 0 deletions tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_beta():
beta = Beta(BETA_MEAN, BETA_VARIANCE)
res = beta.stats(moments="mv")
print("resulting mean and var: ", res)
assert False
assert np.isclose(res[0], BETA_MEAN)
assert np.isclose(res[1], BETA_VARIANCE)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest
import scipy.stats as stats

from ensemble.model import EnsembleDistribution, EnsembleFitter, from_json
from ensemble.model import EnsembleDistribution, EnsembleFitter

STD_NORMAL_DRAWS = stats.norm(loc=0, scale=1).rvs(100)

Expand Down Expand Up @@ -78,7 +78,7 @@ def test_json():
)
model1.to_json("test_read.json", appending=True)

m1 = from_json("test_read.json")[1]
m1 = EnsembleDistribution.from_json("test_read.json")[1]
assert m1.stats_temp("mv") == DEFAULT_SETTINGS
assert m1._distributions == ["gamma", "invgamma"]
assert m1._weights == [0.2, 0.8]

0 comments on commit f68fd08

Please sign in to comment.