Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A surrogate fix to be included in the next build (improving calibration speed) #339

Merged
merged 7 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 78 additions & 96 deletions modules/performFEM/surrogateGP/gpPredict.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

import numpy as np
from scipy.stats import lognorm, norm
from sklearn.linear_model import LinearRegression

errFileName = 'workflow.err' # noqa: N816
sys.stderr = open(errFileName, 'a') # noqa: SIM115, PTH123
# sy - this conflicts with workflow.err in upper level
# errFileName = 'workflow.err' # noqa: N816
# sys.stderr = open(errFileName, 'a') # noqa: SIM115, PTH123

try:
moduleName = 'GPy' # noqa: N816
Expand All @@ -22,7 +24,6 @@
)
exit(-1) # noqa: PLR1722


try:
moduleName = 'GPy' # noqa: N816
import GPy as GPy # noqa: PLC0414
Expand All @@ -44,6 +45,7 @@
)
exit(-1) # noqa: PLR1722


# from emukit.multi_fidelity.convert_lists_to_array import convert_x_list_to_array, convert_xy_lists_to_arrays


Expand Down Expand Up @@ -232,9 +234,10 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
if (np.max(Y_var) / np.var(Y_mean) < 1.0e-10) and len(idx_repl) > 0: # noqa: PLR2004
return np.ones((X.shape[0], 1))

kernel_var = GPy.kern.Matern52(
input_dim=nrv_sur, ARD=True
) + GPy.kern.Linear(input_dim=nrv_sur, ARD=True)
# kernel_var = GPy.kern.Matern52(
# input_dim=nrv_sur, ARD=True
# ) + GPy.kern.Linear(input_dim=nrv_sur, ARD=True)
kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True)
log_vars = np.log(Y_var[idx_repl])
m_var = GPy.models.GPRegression(
X_unique[idx_repl, :],
Expand All @@ -251,12 +254,13 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
var_pred = np.exp(log_var_pred)

if did_normalization:
Y_normFact = np.var(Y_mean) # noqa: N806
# Y_normFact = np.var(Y_mean) # noqa: N806
Y_normFact = np.mean(var_pred.T[0]) # noqa: N806
else:
Y_normFact = 1 # noqa: N806

norm_var_str = (
(var_pred.T[0]) / Y_normFact
(var_pred.T[0]) / Y_normFact
) # if normalization was used..

log_var_pred_x, dum = m_var.predict(x)
Expand All @@ -267,12 +271,25 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
Y_mean = Y # noqa: N806
indices = range(Y.shape[0])

kernel_var = GPy.kern.Matern52(
input_dim=nrv_sur, ARD=True
) + GPy.kern.Linear(input_dim=nrv_sur, ARD=True)
#
# check if we have an old example file - to be deleted in the future
#
old_version = False
for key, val in sur['modelInfo'][g_name_sur[ny] + '_Var'].items(): # noqa: B007, PERF102
if "sum" in key:
old_version = True
break

if old_version:
print("The surrogate model was trained using an older version of the tool. Please retrain the model using this version or use older version.", file=sys.stderr)
exit(-1)

log_vars = np.atleast_2d(
sur['modelInfo'][g_name_sur[ny] + '_Var']['TrainingSamplesY']
).T

kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True)

m_var = GPy.models.GPRegression(
X, log_vars, kernel_var, normalizer=True, Y_metadata=None
)
Expand All @@ -285,12 +302,14 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
var_pred = np.exp(log_var_pred)

if did_normalization:
Y_normFact = np.var(Y) # noqa: N806
# Y_normFact = np.var(Y) # noqa: N806
Y_normFact = np.mean(var_pred.T[0]) # noqa: N806

else:
Y_normFact = 1 # noqa: N806

norm_var_str = (
(var_pred.T[0]) / Y_normFact
(var_pred.T[0]) / Y_normFact
) # if normalization was used..

log_var_pred_x, dum = m_var.predict(x)
Expand Down Expand Up @@ -332,12 +351,12 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
continue

if (
not name_values[1]
.replace('.', '', 1)
.replace('e', '', 1)
.replace('-', '', 2)
.replace('+', '', 1)
.isdigit()
not name_values[1]
.replace('.', '', 1)
.replace('e', '', 1)
.replace('-', '', 2)
.replace('+', '', 1)
.isdigit()
):
# surrogate model does not accept discrete
continue
Expand Down Expand Up @@ -463,7 +482,7 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
s = [str(rv_name_sur[id]) for id in missing_ids] # noqa: A001

if first_eeuq_found and all(
[missingEDP.endswith('-2') for missingEDP in s] # noqa: C419
[missingEDP.endswith('-2') for missingEDP in s] # noqa: C419
):
msg = 'ground motion dimension does not match with that of the training'
# for i in range(len(s)):
Expand Down Expand Up @@ -516,11 +535,28 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
kr = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True)

if sur['doLinear']:
kr = kr + GPy.kern.Linear(input_dim=nrv_sur, ARD=True)
# kr = kr + GPy.kern.Linear(input_dim=nrv_sur, ARD=True)
did_linear = True
lin_index = [True] * nrv
lin_list = []
for ny in range(ng_sur):
tmp_lin = LinearRegression()
tmp_lin.coef_ = np.array(sur['modelInfo'][g_name_sur[ny] + '_Lin']['coef'])
tmp_lin.intercept_ = np.array(sur['modelInfo'][g_name_sur[ny] + '_Lin']['intercept'])
lin_list += [tmp_lin]
else:
did_linear = False

# preprocessing..

if did_logtransform:
Y = np.log(Y) # noqa: N806

if did_linear:
for ny in range(ng_sur):
y_lin_pred = lin_list[ny].predict(X[:, lin_index])
Y[:, ny] = Y[:, ny] - y_lin_pred

kg = kr
m_list = list() # noqa: C408
nugget_var_list = [0] * ng_sur
Expand Down Expand Up @@ -550,9 +586,9 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
exec('m_list[ny].' + key + '= np.array(val)') # noqa: S102

nugget_var_list[ny] = (
m_list[ny].Gaussian_noise.parameters
* nugget_var_pred
* Y_normFact
m_list[ny].Gaussian_noise.parameters
* nugget_var_pred
* Y_normFact
)

else:
Expand Down Expand Up @@ -580,69 +616,10 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
for ny in range(ng_sur):
Y_normFact = np.var(Y[:, ny]) # noqa: N806
nugget_var_list[ny] = (
m_list[ny].gpy_model['mixed_noise.Gaussian_noise.variance']
* Y_normFact
m_list[ny].gpy_model['mixed_noise.Gaussian_noise.variance']
* Y_normFact
)

# if did_stochastic:
#
# kg = kr
# m_list = list()
# nugget_var_list = [0]*ng_sur
# for ny in range(ng_sur):
#
# m_list = m_list + [GPy.models.GPRegression(X, Y[:, ny][np.newaxis].transpose(), kernel=kg.copy(),normalizer=did_normalization)]
# X_unique, Y_mean, norm_var_str, counts, nugget_var_pred, Y_normFact = get_stochastic_variance(X, Y[:,ny][np.newaxis].T, rv_val,ny)
# Y_metadata = {'variance_structure': norm_var_str / counts}
# m_list[ny].set_XY2(X_unique, Y_mean, Y_metadata=Y_metadata)
# for key, val in sur["modelInfo"][g_name_sur[ny]].items():
# exec('m_list[ny].' + key + '= np.array(val)')
#
# nugget_var_list[ny] = m_list[ny].Gaussian_noise.parameters * nugget_var_pred * Y_normFact
#
#
# elif not did_mf:
# kg = kr
# m_list = list()
# for ny in range(ng_sur):
# m_list = m_list + [GPy.models.GPRegression(X, Y[:, ny][np.newaxis].transpose(), kernel=kg.copy(),normalizer=True)]
# for key, val in sur["modelInfo"][g_name_sur[ny]].items():
# exec('m_list[ny].' + key + '= np.array(val)')
#
# Y_normFact = np.var(Y[:, ny])
# nugget_var_list[ny] = m_list[ny].Gaussian_noise.parameters * Y_normFact
#
# else:
# with open(surrogate_dir, "rb") as file:
# m_list=pickle.load(file)
#
# for ny in range(ng_sur):
# Y_normFact = np.var(Y[:, ny])
# nugget_var_list[ny] = m_list[ny].gpy_model["mixed_noise.Gaussian_noise.variance"]* Y_normFact

# to read:::
# kern_name='Mat52'
# did_logtransform=True

# at ui

# f = open(work_dir + '/templatedir/dakota.json')
# inp = json.load(f)
# f.close()

# try:
# f = open(surrogate_dir, 'rb')
# except OSError:
# msg = 'Could not open/read surrogate model from: ' + surrogate_dir + '\n'
# print(msg)
# error_file.write(msg)
# error_file.close()
# file_object.write(msg0+msg)
# file_object.close()
# exit(-1)
# with f:
# m_list = pickle.load(f)

# read param in file and sort input
y_dim = len(m_list)

Expand Down Expand Up @@ -674,6 +651,11 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
) # noiseless
y_pred_median_tmp = np.squeeze(y_pred_median_tmp)
y_pred_var_tmp_tmp = np.squeeze(y_pred_var_tmp_tmp)

if did_linear:
y_lin_pred = lin_list[ny].predict(rv_val[:, lin_index])
y_pred_median_tmp = y_pred_median_tmp + y_lin_pred

y_pred_var_tmp[:, ny] = y_pred_var_tmp_tmp
y_pred_var_m_tmp[:, ny] = y_pred_var_tmp_tmp + np.squeeze(
nugget_var_list[ny]
Expand Down Expand Up @@ -779,7 +761,7 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803
templatedirFolder = os.path.join(os.getcwd(), 'templatedir_SIM') # noqa: PTH109, PTH118, N806

if (
(isEEUQ or isWEUQ) and nsamp == 1
(isEEUQ or isWEUQ) and nsamp == 1
): # because stochastic ground motion generation uses folder number when generating random seed.............
current_dir_i = os.path.join( # noqa: PTH118
os.getcwd(), # noqa: PTH109
Expand Down Expand Up @@ -875,15 +857,15 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803

if isEEUQ or isWEUQ:
if (
os_type.lower().startswith('win')
and run_type.lower() == 'runninglocal'
os_type.lower().startswith('win')
and run_type.lower() == 'runninglocal'
):
workflowDriver = 'sc_driver.bat' # noqa: N806
else:
workflowDriver = 'sc_driver' # noqa: N806
elif (
os_type.lower().startswith('win')
and run_type.lower() == 'runninglocal'
os_type.lower().startswith('win')
and run_type.lower() == 'runninglocal'
):
workflowDriver = 'driver.bat' # noqa: N806
else:
Expand Down Expand Up @@ -911,9 +893,9 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803

elif when_inaccurate == 'continue':
msg2 = (
msg0
+ msg1[ns]
+ '- CONTINUE [Warning: results may not be accurate]\n'
msg0
+ msg1[ns]
+ '- CONTINUE [Warning: results may not be accurate]\n'
)
error_warning(msg2)

Expand All @@ -924,8 +906,8 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803

else:
msg3 = (
msg0
+ f'Prediction error level of output {idx[ns]} is {np.max(error_ratio2[ns]) * 100:.2f}%\n'
msg0
+ f'Prediction error level of output {idx[ns]} is {np.max(error_ratio2[ns]) * 100:.2f}%\n'
)
error_warning(msg3)

Expand Down Expand Up @@ -1038,7 +1020,7 @@ def predict(m, X, did_mf): # noqa: N803, D103
# TODO change below to noiseless # noqa: TD002, TD004
X_list = convert_x_list_to_array([X, X]) # noqa: N806
X_list_l = X_list[: X.shape[0]] # noqa: N806, F841
X_list_h = X_list[X.shape[0] :] # noqa: N806
X_list_h = X_list[X.shape[0]:] # noqa: N806
return m.predict(X_list_h)


Expand Down
9 changes: 6 additions & 3 deletions modules/performUQ/SimCenterUQ/UQengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,14 @@ def readJson(self): # noqa: N802, D102
pass

def make_pool( # noqa: D102
self,
self, seed_val=42
):
if self.run_type.lower() == 'runninglocal':
from multiprocessing import Pool

n_processor = os.cpu_count()
pool = Pool(n_processor)
pool = Pool(n_processor, initializer=initfn, initargs=(seed_val,))

else:
from mpi4py import MPI
from mpi4py.futures import MPIPoolExecutor
Expand Down Expand Up @@ -434,7 +435,9 @@ def run_FEM(X, id_sim, rv_name, work_dir, workflowDriver, runIdx=0): # noqa: C9
# def makePool(self):
# pass


# for creating pool
def initfn(seed_val):
np.random.seed(seed_val) # enforcing seeds
#
# When sampled X is different from surrogate input X. e.g. we sample ground motion parameters or indices, but we use IM as input of GP
#
Expand Down
Loading
Loading