Skip to content

Commit

Permalink
Merge pull request #428 from UCL-CCS/feature_enseble_boot_multiple
Browse files Browse the repository at this point in the history
Added EnsebleBootMultiple class to enable analysis of many statistics…
  • Loading branch information
DavidPCoster authored Dec 4, 2024
2 parents 309d897 + aa44cc5 commit 1159d3b
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 2 deletions.
2 changes: 1 addition & 1 deletion easyvvuq/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .basic_stats import BasicStats
from .ensemble_boot import EnsembleBoot
from .ensemble_boot import EnsembleBoot, EnsembleBootMultiple
from .sc_analysis import SCAnalysis
from .ssc_analysis import SSCAnalysis
from .pce_analysis import PCEAnalysis
Expand Down
93 changes: 93 additions & 0 deletions easyvvuq/analysis/ensemble_boot.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,96 @@ def analyse(self, data_frame=None):
stat_name=self.stat_name)

return results

class EnsembleBootMultiple(BaseAnalysisElement):

def __init__(self, groupby=[], qoi_cols=[],
stat_func=[np.mean], alpha=0.05,
sample_size=None, n_boot_samples=1000,
pivotal=False, stat_name=None):
"""
Element to perform bootstrapping on collated simulation output.
Parameters
----------
groupby : list or None
Columns to use to group the data in `analyse` method before
calculating stats.
qoi_cols : list or None
Columns of quantities of interest (for which stats will be
calculated).
stat_func : list[function]
List of statistical functions to be applied to data for bootstrapping.
alpha : float, default=0.05
Produce estimate of 100.0*(1-`alpha`) confidence interval.
sample_size : int
Size of the sample to be drawn from the input data.
n_boot_samples : int, default=1000
Number of times samples are to be drawn from the input data.
pivotal : bool, default=False
Use the pivotal method? Default to percentile method.
stat_name : str, default=None
Name to use to describe columns containing output statistic (for example
'mean'). If not provided, then attr '__name__' from each func is used.
"""

if not stat_func or stat_func is None:
raise ValueError('stat_func cannot be empty or None')

self.groupby = groupby
self.qoi_cols = qoi_cols

self.stat_func = stat_func
self.alpha = alpha
self.sample_size = sample_size
self.n_boot_samples = n_boot_samples
self.pivotal = pivotal
self.stat_name = stat_name if stat_name is not None else [func.__name__ for func in stat_func]

self.output_type = OutputType.SUMMARY

def element_name(self):
"""Name for this element for logging purposes"""
return "ensemble_boot_multiple"

def element_version(self):
"""Version of this element for logging purposes"""
return "0.1"

def analyse(self, data_frame=None):
"""Perform bootstrapping analysis on the input `data_frame`.
The data_frame is grouped according to `self.groupby` if specified and
analysis is performed on the columns selected in `self.qoi_cols` if set.
Parameters
----------
data_frame : :obj:`pandas.DataFrame`
Summary data produced through collation of simulation output.
Returns
-------
:obj:`pandas.DataFrame`
Basic statistic for selected columns and groupings of data.
"""

if data_frame is None:
raise RuntimeError(
"This VVUQ element needs a data frame to analyse")
elif data_frame.empty:
raise RuntimeError(
"No data in data frame passed to analyse element")
frames = []
for stat_func, stat_name in zip(self.stat_func, self.stat_name):
results = ensemble_bootstrap(
data_frame,
groupby=self.groupby,
qoi_cols=self.qoi_cols,
stat_func=stat_func,
alpha=self.alpha,
sample_size=self.sample_size,
n_samples=self.n_boot_samples,
pivotal=self.pivotal,
stat_name=stat_name)
frames.append(results)
return pd.concat(frames, axis=1, keys=self.stat_name).swaplevel(0, 1, axis=1)
18 changes: 17 additions & 1 deletion tests/test_ensemble_boot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from easyvvuq.analysis.ensemble_boot import confidence_interval, bootstrap
from easyvvuq.analysis.ensemble_boot import ensemble_bootstrap, EnsembleBoot
from easyvvuq.analysis.ensemble_boot import ensemble_bootstrap, EnsembleBoot, EnsembleBootMultiple
import os
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -82,3 +82,19 @@ def test_ensemble_boot():
'b': ['group1'] * VALUES.shape[0] + ['group2'] * VALUES.shape[0]})
results = analysis.analyse(df)
assert (not results.empty)

def test_ensemble_boot_multiple():
analysis = EnsembleBootMultiple()
assert (analysis.element_name() == 'ensemble_boot_multiple')
assert (analysis.element_version() == '0.1')
with pytest.raises(RuntimeError):
analysis.analyse()
with pytest.raises(RuntimeError):
analysis.analyse(pd.DataFrame({}))
analysis = EnsembleBootMultiple(groupby=['b'], qoi_cols=['a'], stat_func=[np.mean, np.var, np.median])
df = pd.DataFrame({
'a': np.concatenate((VALUES, VALUES)),
'b': ['group1'] * VALUES.shape[0] + ['group2'] * VALUES.shape[0]})
results = analysis.analyse(df)
assert (not results.empty)
assert (results.values.shape == (2, 9))

0 comments on commit 1159d3b

Please sign in to comment.