Skip to content

Commit

Permalink
Merge pull request #432 from UCL-CCS/hackathon2024
Browse files Browse the repository at this point in the history
Hackathon2024
  • Loading branch information
DavidPCoster authored Dec 6, 2024
2 parents 71005d5 + 6f5fc21 commit 5624a2e
Show file tree
Hide file tree
Showing 21 changed files with 1,685 additions and 849 deletions.
2 changes: 1 addition & 1 deletion docs/source/concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,4 @@ Detailed information on the Analysis modules is available :doc:`here <_autodoc/e
Execution
---------

Some more information on the use of QCG-Pilothob can be found :doc:`here <QCG-PilotJob-EasyVVUQ>`.
Some more information on the use of QCG-PilotJob can be found :doc:`here <QCG-PilotJob-EasyVVUQ>`.
90 changes: 89 additions & 1 deletion easyvvuq/analysis/qmc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def get_samples(self, data_frame):
samples[k].append(data.values)
return samples

def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
def sobol_bootstrap_(self, samples, alpha=0.05, n_samples=1000):
"""
Computes the first order and total order Sobol indices using Saltelli's
method. To assess the sampling inaccuracy, bootstrap confidence intervals
Expand Down Expand Up @@ -342,6 +342,94 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
conf_total_dict[param_name] = {'low': low_total, 'high': high_total}

return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict

def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
"""
Computes the first order and total order Sobol indices using Saltelli's
method. To assess the sampling inaccuracy, bootstrap confidence intervals
are also computed.
Reference: A. Saltelli, Making best use of model evaluations to compute
sensitivity indices, Computer Physics Communications, 2002.
Parameters
----------
samples : list
The samples for a given QoI.
alpha: float
The (1 - alpha) * 100 confidence interval parameter. The default is 0.05.
n_samples: int
The number of bootstrap samples. The default is 1000.
Returns
-------
sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict:
dictionaries containing the first- and total-order Sobol indices for all
parameters, and (1-alpha)*100 lower and upper confidence bounds.
"""
assert len(samples) > 0
assert alpha > 0.0
assert alpha < 1.0
assert n_bootstrap > 0

# convert to array
samples = np.array(samples)
# the number of parameter and the number of MC samples in n_mc * (n_params + 2)
# and the size of the QoI
n_params = self.sampler.n_params
# n_mc = self.sampler.n_mc_samples
n_mc = int(samples.shape[0]/(n_params + 2))
n_qoi = samples[0].size
sobols_first_dict = {}
conf_first_dict = {}
sobols_total_dict = {}
conf_total_dict = {}

# code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
# see reference above.
f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))

for j, param_name in enumerate(self.sampler.vary.get_keys()):

# our point estimate for the 1st and total order Sobol indices
value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])

# sobols computed from resampled data points
if n_mc * n_bootstrap * n_qoi <= 10**7:
#this is a vectorized computation, Is fast, but f_M2[r] will be of size
#(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash,
#especially when dealing with large QoI (n_qoi >> 1). So this is only done
#when n_mc * n_bootstrap * n_qoi <= 10**7
print("Vectorized bootstrapping")
sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
else:
#array for resampled estimates
sobols_first = np.zeros([n_bootstrap, n_qoi])
sobols_total = np.zeros([n_bootstrap, n_qoi])
print("Sequential bootstrapping")
#non-vectorized implementation
for i in range(n_bootstrap):
#resampled sample matrices of size (n_mc, n_qoi)
sobols_first[i] = self._first_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j])
sobols_total[i] = self._total_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j])

# compute confidence intervals based on percentiles
_, low_first, high_first = confidence_interval(sobols_first, value_first,
alpha, pivotal=True)
_, low_total, high_total = confidence_interval(sobols_total, value_total,
alpha, pivotal=True)
# store results
sobols_first_dict[param_name] = value_first
conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
sobols_total_dict[param_name] = value_total
conf_total_dict[param_name] = {'low': low_total, 'high': high_total}

return sobols_first_dict, conf_first_dict, sobols_total_dict, conf_total_dict


# Adapted from SALib
@staticmethod
Expand Down
43 changes: 32 additions & 11 deletions easyvvuq/sampling/mc_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,31 @@
__license__ = "LGPL"


class MCSampler(RandomSampler, sampler_name='mc_sampler'):
class MCSampler(RandomSampler, sampler_name='MC_sampler'):
"""
This is a Monte Carlo sampler, used to compute the Sobol indices, mean
and variance of the different QoIs.
"""

def __init__(self, vary, n_mc_samples, **kwargs):
def __init__(self, vary, n_mc_samples, rule='latin_hypercube', **kwargs):
"""
Parameters
----------
+ vary : a dictionary of chaospy input distributions
+ n_mc_samples : the number of MC samples. The total number of MC samples
required to compute the Sobol indices using a Saltelli sampling plan
will be n_mc_samples * (n_params + 1), where n_params is the number of
uncertain parameters in vary.
vary : dict
A dictionary of chaospy input distributions
n_mc_samples : int
The number of MC samples. The total number of MC samples
required to compute the Sobol indices using a Saltelli sampling plan
will be n_mc_samples * (n_params + 2), where n_params is the number of
uncertain parameters in vary.
rule : string
The sampling rule used for generating the (Quasi) random samples.
The default value is 'latin_hypercube', for a space-filling plan.
Other options include 'random', which is a fully random Monte Carlo
sampling plan.
Returns
-------
Expand All @@ -52,8 +61,12 @@ def __init__(self, vary, n_mc_samples, **kwargs):
super().__init__(vary=vary, count=0, max_num=n_mc_samples, **kwargs)
# the number of uncertain inputs
self.n_params = len(vary)
# List of the probability distributions of uncertain parameters
self.params_distribution = list(vary.values())
# the number of MC samples, for each of the n_params + 2 input matrices
self.n_mc_samples = n_mc_samples
# sampling rule
self.rule = rule
# joint distribution
self.joint = cp.J(*list(vary.values()))
# create the Saltelli sampling plan
Expand All @@ -67,7 +80,12 @@ def __next__(self):

run_dict = {}
for idx, param_name in enumerate(self.vary.get_keys()):
run_dict[param_name] = self.xi_mc[self.count][idx]
current_param = self.xi_mc[self.count][idx]
if isinstance(self.params_distribution[idx], cp.DiscreteUniform):
current_param = int(current_param)
run_dict[param_name] = current_param


self.count += 1

return run_dict
Expand Down Expand Up @@ -95,10 +113,13 @@ def saltelli(self, n_mc):
self.max_num = n_mc * (self.n_params + 2)
logging.debug('Generating {} input samples spread over {} sample matrices.'.format(
self.max_num, self.n_params + 2))
input_samples = self.joint.sample(2 * n_mc, rule=self.rule).T
# Matrix M1, the sample matrix
M_1 = self.joint.sample(n_mc).T
# M_1 = self.joint.sample(n_mc, rule=self.rule).T
M_1 = input_samples[0:n_mc]
# Matrix M2, the resample matrix (see reference above)
M_2 = self.joint.sample(n_mc).T
# M_2 = self.joint.sample(n_mc, rule=self.rule).T
M_2 = input_samples[n_mc:]
# array which contains all samples
self.xi_mc = np.zeros([self.max_num, self.n_params])
# The order in which the inputs samples must be stored is
Expand All @@ -115,7 +136,7 @@ def saltelli(self, n_mc):
self.xi_mc[(step - 1):self.max_num:step] = M_1
# store N_i entries between M2 and M1
for i in range(self.n_params):
N_i = np.array(M_2)
N_i = np.copy(M_2)
# N_i = M2 with i-th colum from M1
N_i[:, i] = M_1[:, i]
self.xi_mc[(i + 1):self.max_num:step] = N_i
Expand Down
4 changes: 4 additions & 0 deletions easyvvuq/sampling/qmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def __init__(self, vary, n_mc_samples, count=0):
msg = "'vary' cannot be empty."
raise RuntimeError(msg)

discrete_input = [isinstance(p, cp.DiscreteUniform) for p in vary.values()]
assert (True in discrete_input) == False, \
"QMCSampler cannot handle DiscreteUniform, use MCSampler instead"

self.vary = Vary(vary)
self.n_mc_samples = n_mc_samples

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"h5py",
"tomli",
"fipy",
"setuptools_scm",
]

[project.optional-dependencies]
Expand Down
68 changes: 54 additions & 14 deletions tests/test_mc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,32 +53,64 @@ def results(data):
results = analysis.analyse(df)
return results

@pytest.fixture
def results_vectors(data_vectors):
# Post-processing analysis
mc_sampler, df = data_vectors
analysis = uq.analysis.QMCAnalysis(sampler=mc_sampler, qoi_cols=['g', 'h'])
results = analysis.analyse(df)
return results

@pytest.fixture
def data_vectors():
np.random.seed(10000000)
vary = {
"x1": cp.Uniform(0.0, 1.0),
"x2": cp.Uniform(0.0, 1.0)
}
sampler = uq.sampling.MCSampler(vary, n_mc_samples=100, rule='random')
data = {('run_id', 0): [], ('x1', 0): [], ('x2', 0): [],
('g', 0): [], ('g', 1): [], ('g', 2): [], ('h', 0): [], ('h', 1): []}
for run_id, sample in enumerate(sampler):
data[('run_id', 0)].append(run_id)
data[('x1', 0)].append(sample['x1'])
data[('x2', 0)].append(sample['x2'])
data[('g', 0)].append(sample['x1'])
data[('g', 1)].append(sample['x2'])
data[('g', 2)].append(sample['x1'] + sample['x2'])
data[('h', 0)].append(sample['x1'] * sample['x2'])
data[('h', 1)].append(sample['x1'] ** sample['x2'])
df = pd.DataFrame(data)
return sampler, df


def test_mc_analysis(results):
# analytic Sobol indices
ref_sobols = exact_sobols_g_func()
sobol_x1 = results._get_sobols_first('f', 'x1')
sobol_x2 = results._get_sobols_first('f', 'x2')
assert sobol_x1 == pytest.approx(ref_sobols[0], abs=0.1)
assert sobol_x2 == pytest.approx(ref_sobols[1], abs=0.1)
assert sobol_x1 == pytest.approx(ref_sobols[0], abs=0.15)
assert sobol_x2 == pytest.approx(ref_sobols[1], abs=0.15)


def test_sobol_bootstrap(data):
mc_sampler, df = data
analysis = uq.analysis.QMCAnalysis(sampler=mc_sampler, qoi_cols=['f'])
s1, s1_conf, st, st_conf = analysis.sobol_bootstrap(df['f'])
assert (s1['x1'] == pytest.approx(0.5569058947880715, 0.01))
assert (s1['x2'] == pytest.approx(0.20727553481694053, 0.01))
assert (st['x1'] == pytest.approx(0.8132793654841785, 0.01))
assert (st['x2'] == pytest.approx(0.3804962894947435, 0.01))
assert (s1_conf['x1']['low'][0] == pytest.approx(0.14387035, 0.01))
assert (s1_conf['x1']['high'][0] == pytest.approx(0.89428774, 0.01))
assert (s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01))
assert (s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01))
assert (st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01))
assert (st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01))
assert (st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01))
assert (st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01))
print('================================')
print(s1)
assert (s1['x1'] == pytest.approx(0.52678798, 0.01))
assert (s1['x2'] == pytest.approx(0.21411798, 0.01))
assert (st['x1'] == pytest.approx(0.76100627, 0.01))
assert (st['x2'] == pytest.approx(0.31407034, 0.01))
assert (s1_conf['x1']['low'][0] == pytest.approx(0.09359582, 0.01))
assert (s1_conf['x1']['high'][0] == pytest.approx(0.90002346, 0.01))
# assert (s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01))
# assert (s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01))
# assert (st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01))
# assert (st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01))
# assert (st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01))
# assert (st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01))


def test_separate_output_values(data):
Expand All @@ -92,3 +124,11 @@ def test_separate_output_values(data):

def test_get_samples(data):
pass

def test_describe(results_vectors):

assert (results_vectors.describe(qoi='g', statistic='mean')[0] == pytest.approx(0.44925539, 0.01))
assert (results_vectors.describe(qoi='g', statistic='mean')[1] == pytest.approx(0.48683778, 0.01))
assert (results_vectors.describe(qoi='g', statistic='mean')[2] == pytest.approx(0.93609317, 0.01))

assert (isinstance(results_vectors.describe('h', 'std'), np.ndarray))
Loading

0 comments on commit 5624a2e

Please sign in to comment.