diff --git a/modules/performFEM/surrogateGP/gpPredict.py b/modules/performFEM/surrogateGP/gpPredict.py index 522ba6538..fbf4d429d 100644 --- a/modules/performFEM/surrogateGP/gpPredict.py +++ b/modules/performFEM/surrogateGP/gpPredict.py @@ -8,9 +8,11 @@ import numpy as np from scipy.stats import lognorm, norm +from sklearn.linear_model import LinearRegression -errFileName = 'workflow.err' # noqa: N816 -sys.stderr = open(errFileName, 'a') # noqa: SIM115, PTH123 +# sy - this conflicts with workflow.err in upper level +# errFileName = 'workflow.err' # noqa: N816 +# sys.stderr = open(errFileName, 'a') # noqa: SIM115, PTH123 try: moduleName = 'GPy' # noqa: N816 @@ -22,7 +24,6 @@ ) exit(-1) # noqa: PLR1722 - try: moduleName = 'GPy' # noqa: N816 import GPy as GPy # noqa: PLC0414 @@ -44,6 +45,7 @@ ) exit(-1) # noqa: PLR1722 + # from emukit.multi_fidelity.convert_lists_to_array import convert_x_list_to_array, convert_xy_lists_to_arrays @@ -232,9 +234,10 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 if (np.max(Y_var) / np.var(Y_mean) < 1.0e-10) and len(idx_repl) > 0: # noqa: PLR2004 return np.ones((X.shape[0], 1)) - kernel_var = GPy.kern.Matern52( - input_dim=nrv_sur, ARD=True - ) + GPy.kern.Linear(input_dim=nrv_sur, ARD=True) + # kernel_var = GPy.kern.Matern52( + # input_dim=nrv_sur, ARD=True + # ) + GPy.kern.Linear(input_dim=nrv_sur, ARD=True) + kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True) log_vars = np.log(Y_var[idx_repl]) m_var = GPy.models.GPRegression( X_unique[idx_repl, :], @@ -251,12 +254,13 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 var_pred = np.exp(log_var_pred) if did_normalization: - Y_normFact = np.var(Y_mean) # noqa: N806 + # Y_normFact = np.var(Y_mean) # noqa: N806 + Y_normFact = np.mean(var_pred.T[0]) # noqa: N806 else: Y_normFact = 1 # noqa: N806 norm_var_str = ( - (var_pred.T[0]) / Y_normFact + (var_pred.T[0]) / Y_normFact ) # if normalization was used.. log_var_pred_x, dum = m_var.predict(x) @@ -267,12 +271,25 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 Y_mean = Y # noqa: N806 indices = range(Y.shape[0]) - kernel_var = GPy.kern.Matern52( - input_dim=nrv_sur, ARD=True - ) + GPy.kern.Linear(input_dim=nrv_sur, ARD=True) + # + # check if we have an old example file - to be deleted in the future + # + old_version = False + for key, val in sur['modelInfo'][g_name_sur[ny] + '_Var'].items(): # noqa: B007, PERF102 + if "sum" in key: + old_version = True + break + + if old_version: + print("The surrogate model was trained using an older version of the tool. Please retrain the model using this version or use older version.", file=sys.stderr) + exit(-1) + log_vars = np.atleast_2d( sur['modelInfo'][g_name_sur[ny] + '_Var']['TrainingSamplesY'] ).T + + kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True) + m_var = GPy.models.GPRegression( X, log_vars, kernel_var, normalizer=True, Y_metadata=None ) @@ -285,12 +302,14 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 var_pred = np.exp(log_var_pred) if did_normalization: - Y_normFact = np.var(Y) # noqa: N806 + # Y_normFact = np.var(Y) # noqa: N806 + Y_normFact = np.mean(var_pred.T[0]) # noqa: N806 + else: Y_normFact = 1 # noqa: N806 norm_var_str = ( - (var_pred.T[0]) / Y_normFact + (var_pred.T[0]) / Y_normFact ) # if normalization was used.. log_var_pred_x, dum = m_var.predict(x) @@ -332,12 +351,12 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 continue if ( - not name_values[1] - .replace('.', '', 1) - .replace('e', '', 1) - .replace('-', '', 2) - .replace('+', '', 1) - .isdigit() + not name_values[1] + .replace('.', '', 1) + .replace('e', '', 1) + .replace('-', '', 2) + .replace('+', '', 1) + .isdigit() ): # surrogate model does not accept discrete continue @@ -463,7 +482,7 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 s = [str(rv_name_sur[id]) for id in missing_ids] # noqa: A001 if first_eeuq_found and all( - [missingEDP.endswith('-2') for missingEDP in s] # noqa: C419 + [missingEDP.endswith('-2') for missingEDP in s] # noqa: C419 ): msg = 'ground motion dimension does not match with that of the training' # for i in range(len(s)): @@ -516,11 +535,28 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 kr = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True) if sur['doLinear']: - kr = kr + GPy.kern.Linear(input_dim=nrv_sur, ARD=True) + # kr = kr + GPy.kern.Linear(input_dim=nrv_sur, ARD=True) + did_linear = True + lin_index = [True] * nrv + lin_list = [] + for ny in range(ng_sur): + tmp_lin = LinearRegression() + tmp_lin.coef_ = np.array(sur['modelInfo'][g_name_sur[ny] + '_Lin']['coef']) + tmp_lin.intercept_ = np.array(sur['modelInfo'][g_name_sur[ny] + '_Lin']['intercept']) + lin_list += [tmp_lin] + else: + did_linear = False + + # preprocessing.. if did_logtransform: Y = np.log(Y) # noqa: N806 + if did_linear: + for ny in range(ng_sur): + y_lin_pred = lin_list[ny].predict(X[:, lin_index]) + Y[:, ny] = Y[:, ny] - y_lin_pred + kg = kr m_list = list() # noqa: C408 nugget_var_list = [0] * ng_sur @@ -550,9 +586,9 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 exec('m_list[ny].' + key + '= np.array(val)') # noqa: S102 nugget_var_list[ny] = ( - m_list[ny].Gaussian_noise.parameters - * nugget_var_pred - * Y_normFact + m_list[ny].Gaussian_noise.parameters + * nugget_var_pred + * Y_normFact ) else: @@ -580,69 +616,10 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 for ny in range(ng_sur): Y_normFact = np.var(Y[:, ny]) # noqa: N806 nugget_var_list[ny] = ( - m_list[ny].gpy_model['mixed_noise.Gaussian_noise.variance'] - * Y_normFact + m_list[ny].gpy_model['mixed_noise.Gaussian_noise.variance'] + * Y_normFact ) - # if did_stochastic: - # - # kg = kr - # m_list = list() - # nugget_var_list = [0]*ng_sur - # for ny in range(ng_sur): - # - # m_list = m_list + [GPy.models.GPRegression(X, Y[:, ny][np.newaxis].transpose(), kernel=kg.copy(),normalizer=did_normalization)] - # X_unique, Y_mean, norm_var_str, counts, nugget_var_pred, Y_normFact = get_stochastic_variance(X, Y[:,ny][np.newaxis].T, rv_val,ny) - # Y_metadata = {'variance_structure': norm_var_str / counts} - # m_list[ny].set_XY2(X_unique, Y_mean, Y_metadata=Y_metadata) - # for key, val in sur["modelInfo"][g_name_sur[ny]].items(): - # exec('m_list[ny].' + key + '= np.array(val)') - # - # nugget_var_list[ny] = m_list[ny].Gaussian_noise.parameters * nugget_var_pred * Y_normFact - # - # - # elif not did_mf: - # kg = kr - # m_list = list() - # for ny in range(ng_sur): - # m_list = m_list + [GPy.models.GPRegression(X, Y[:, ny][np.newaxis].transpose(), kernel=kg.copy(),normalizer=True)] - # for key, val in sur["modelInfo"][g_name_sur[ny]].items(): - # exec('m_list[ny].' + key + '= np.array(val)') - # - # Y_normFact = np.var(Y[:, ny]) - # nugget_var_list[ny] = m_list[ny].Gaussian_noise.parameters * Y_normFact - # - # else: - # with open(surrogate_dir, "rb") as file: - # m_list=pickle.load(file) - # - # for ny in range(ng_sur): - # Y_normFact = np.var(Y[:, ny]) - # nugget_var_list[ny] = m_list[ny].gpy_model["mixed_noise.Gaussian_noise.variance"]* Y_normFact - - # to read::: - # kern_name='Mat52' - # did_logtransform=True - - # at ui - - # f = open(work_dir + '/templatedir/dakota.json') - # inp = json.load(f) - # f.close() - - # try: - # f = open(surrogate_dir, 'rb') - # except OSError: - # msg = 'Could not open/read surrogate model from: ' + surrogate_dir + '\n' - # print(msg) - # error_file.write(msg) - # error_file.close() - # file_object.write(msg0+msg) - # file_object.close() - # exit(-1) - # with f: - # m_list = pickle.load(f) - # read param in file and sort input y_dim = len(m_list) @@ -674,6 +651,11 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 ) # noiseless y_pred_median_tmp = np.squeeze(y_pred_median_tmp) y_pred_var_tmp_tmp = np.squeeze(y_pred_var_tmp_tmp) + + if did_linear: + y_lin_pred = lin_list[ny].predict(rv_val[:, lin_index]) + y_pred_median_tmp = y_pred_median_tmp + y_lin_pred + y_pred_var_tmp[:, ny] = y_pred_var_tmp_tmp y_pred_var_m_tmp[:, ny] = y_pred_var_tmp_tmp + np.squeeze( nugget_var_list[ny] @@ -779,7 +761,7 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 templatedirFolder = os.path.join(os.getcwd(), 'templatedir_SIM') # noqa: PTH109, PTH118, N806 if ( - (isEEUQ or isWEUQ) and nsamp == 1 + (isEEUQ or isWEUQ) and nsamp == 1 ): # because stochastic ground motion generation uses folder number when generating random seed............. current_dir_i = os.path.join( # noqa: PTH118 os.getcwd(), # noqa: PTH109 @@ -875,15 +857,15 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 if isEEUQ or isWEUQ: if ( - os_type.lower().startswith('win') - and run_type.lower() == 'runninglocal' + os_type.lower().startswith('win') + and run_type.lower() == 'runninglocal' ): workflowDriver = 'sc_driver.bat' # noqa: N806 else: workflowDriver = 'sc_driver' # noqa: N806 elif ( - os_type.lower().startswith('win') - and run_type.lower() == 'runninglocal' + os_type.lower().startswith('win') + and run_type.lower() == 'runninglocal' ): workflowDriver = 'driver.bat' # noqa: N806 else: @@ -911,9 +893,9 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 elif when_inaccurate == 'continue': msg2 = ( - msg0 - + msg1[ns] - + '- CONTINUE [Warning: results may not be accurate]\n' + msg0 + + msg1[ns] + + '- CONTINUE [Warning: results may not be accurate]\n' ) error_warning(msg2) @@ -924,8 +906,8 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: N803 else: msg3 = ( - msg0 - + f'Prediction error level of output {idx[ns]} is {np.max(error_ratio2[ns]) * 100:.2f}%\n' + msg0 + + f'Prediction error level of output {idx[ns]} is {np.max(error_ratio2[ns]) * 100:.2f}%\n' ) error_warning(msg3) @@ -1038,7 +1020,7 @@ def predict(m, X, did_mf): # noqa: N803, D103 # TODO change below to noiseless # noqa: TD002, TD004 X_list = convert_x_list_to_array([X, X]) # noqa: N806 X_list_l = X_list[: X.shape[0]] # noqa: N806, F841 - X_list_h = X_list[X.shape[0] :] # noqa: N806 + X_list_h = X_list[X.shape[0]:] # noqa: N806 return m.predict(X_list_h) diff --git a/modules/performUQ/SimCenterUQ/UQengine.py b/modules/performUQ/SimCenterUQ/UQengine.py index 74ca792e6..21551961d 100644 --- a/modules/performUQ/SimCenterUQ/UQengine.py +++ b/modules/performUQ/SimCenterUQ/UQengine.py @@ -265,13 +265,14 @@ def readJson(self): # noqa: N802, D102 pass def make_pool( # noqa: D102 - self, + self, seed_val=42 ): if self.run_type.lower() == 'runninglocal': from multiprocessing import Pool n_processor = os.cpu_count() - pool = Pool(n_processor) + pool = Pool(n_processor, initializer=initfn, initargs=(seed_val,)) + else: from mpi4py import MPI from mpi4py.futures import MPIPoolExecutor @@ -434,7 +435,9 @@ def run_FEM(X, id_sim, rv_name, work_dir, workflowDriver, runIdx=0): # noqa: C9 # def makePool(self): # pass - +# for creating pool +def initfn(seed_val): + np.random.seed(seed_val) # enforcing seeds # # When sampled X is different from surrogate input X. e.g. we sample ground motion parameters or indices, but we use IM as input of GP # diff --git a/modules/performUQ/SimCenterUQ/surrogateBuild.py b/modules/performUQ/SimCenterUQ/surrogateBuild.py index f76045118..cf4adc096 100644 --- a/modules/performUQ/SimCenterUQ/surrogateBuild.py +++ b/modules/performUQ/SimCenterUQ/surrogateBuild.py @@ -49,6 +49,8 @@ import sys import time import warnings +import numpy as np +import random warnings.filterwarnings('ignore') @@ -68,19 +70,32 @@ moduleName = 'GPy' # noqa: N816 import GPy as GPy # noqa: PLC0414 - moduleName = 'scipy' # noqa: N816 + moduleName = 'scipy.stats' # noqa: N816 from scipy.stats import cramervonmises, lognorm, norm, qmc + import scipy + + moduleName = 'sklearn.linear_model' # noqa: N816 + from sklearn.linear_model import LinearRegression moduleName = 'UQengine' # noqa: N816 + # from utilities import run_FEM_batch, errorLog error_tag = False # global variable except: # noqa: E722 error_tag = True print('Failed to import module:' + moduleName) # noqa: T201 -errFileName = 'dakota.err' # noqa: N816 -sys.stderr = open(errFileName, 'w') # noqa: SIM115, PTH123 - +errFileName = os.path.join(os.getcwd(),'dakota.err') # noqa: N816 +develop_mode = (len(sys.argv)==7) # a flag for develeopmode +if develop_mode: + # import matplotlib + # matplotlib.use('TkAgg') + import matplotlib.pyplot as plt + print("developer mode") +else: + with open(errFileName, 'w') as f: + f.write("") + sys.stderr = open(errFileName, 'w') # noqa: SIM115, PTH123 # # Modify GPy package @@ -183,7 +198,8 @@ def check_packages(self, error_tag, moduleName): # noqa: N803, D102 def readJson(self): # noqa: C901, N802, D102, PLR0912, PLR0915 # self.nopt = max([20, self.n_processor]) - self.nopt = 1 + self.nopt = 3 + self.is_paralle_opt_safe = False try: jsonPath = self.inputFile # for EEUQ # noqa: N806 @@ -209,12 +225,12 @@ def readJson(self): # noqa: C901, N802, D102, PLR0912, PLR0915 surrogateJson = dakotaJson['UQ']['surrogateMethodInfo'] # noqa: N806 if surrogateJson['method'] == 'Sampling and Simulation': - random.seed(surrogateJson['seed']) - np.random.seed(surrogateJson['seed']) + self.global_seed = surrogateJson['seed'] else: - random.seed(1) - np.random.seed(1) + self.global_seed = 42 + random.seed(self.global_seed) + np.random.seed( self.global_seed) # # EE-UQ # @@ -297,7 +313,7 @@ def readJson(self): # noqa: C901, N802, D102, PLR0912, PLR0915 self.do_parallel = True if self.do_parallel: - self.n_processor, self.pool = self.make_pool() + self.n_processor, self.pool = self.make_pool( self.global_seed) self.cal_interval = self.n_processor else: self.n_processor = 1 @@ -315,6 +331,7 @@ def readJson(self): # noqa: C901, N802, D102, PLR0912, PLR0915 self.do_logtransform = surrogateJson['logTransform'] self.kernel = surrogateJson['kernel'] self.do_linear = surrogateJson['linear'] + self.nugget_opt = surrogateJson['nuggetOpt'] # self.heteroscedastic = surrogateJson["Heteroscedastic"] @@ -391,6 +408,15 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 print('metadata_updated') # noqa: T201 self.set_XY(X, Y) + @monkeypatch_method(GPy.core.GP) + def subsample_XY(self, idx): # noqa: N802, N803 + if self.Y_metadata is not None: + new_meta = self.Y_metadata + new_meta['variance_structure'] = self.Y_metadata['variance_structure'][idx] + self.Y_metadata.update(new_meta) + print('metadata_updated') # noqa: T201 + self.set_XY(self.X[idx,:], self.Y[idx,:]) + # Save model information if (surrogateJson['method'] == 'Sampling and Simulation') or ( surrogateJson['method'] == 'Import Data File' @@ -403,6 +429,7 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 x_dim, y_dim, self.n_processor, + self.global_seed, idx=0, ) self.modelInfoLF = model_info( @@ -412,6 +439,7 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 x_dim, y_dim, self.n_processor, + self.global_seed, idx=-1, ) # NONE model elif surrogateJson['method'] == 'Import Multi-fidelity Data File': @@ -423,6 +451,7 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 x_dim, y_dim, self.n_processor, + self.global_seed, idx=1, ) self.modelInfoLF = model_info( @@ -432,6 +461,7 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 x_dim, y_dim, self.n_processor, + self.global_seed, idx=2, ) else: @@ -553,8 +583,8 @@ def create_kernel(self, x_dim): # noqa: D102 msg = f'Error running SimCenterUQ - Kernel name <{kernel}> not supported' self.exit(msg) - if self.do_linear: - kr = kr + GPy.kern.Linear(input_dim=x_dim, ARD=True) + #if self.do_linear: + # kr = kr + GPy.kern.Linear(input_dim=x_dim, ARD=True) if self.do_mf: kr = emf.kernels.LinearMultiFidelityKernel([kr.copy(), kr.copy()]) # noqa: F821 @@ -613,11 +643,18 @@ def create_gp_model(self): # noqa: D102 self.normMeans = [0] * y_dim self.normVars = [1] * y_dim self.m_list = [0] * self.y_dim + if self.do_linear: + self.lin_list = [0] * self.y_dim + self.m_var_list, self.var_str, self.Y_mean = ( [0] * self.y_dim, [1] * self.y_dim, [0] * self.y_dim, ) + + # below to facilitate better optimization bounds + self.init_noise_var = [None]*y_dim + self.init_process_var= [None]*y_dim for i in range(y_dim): self.m_list[i] = self.create_gpy_model(X_dummy, Y_dummy, kr) @@ -693,6 +730,17 @@ def set_XY( # noqa: C901, N802, D102 else: Y_lfs = Y_lf # noqa: N806 + + if self.do_linear: + self.linear_list = [True] * X_hf.shape[1] # SY - TEMPORARY + else: + self.linear_list = [False] * X_hf.shape[1] # SY - TEMPORARY + + if sum(self.linear_list)>0: + self.lin_list[ny] = LinearRegression().fit(X_hf[:, self.linear_list], Y_hfs) + y_pred = self.lin_list[ny].predict(X_hf[:, self.linear_list]) + Y_hfs = Y_hfs - y_pred + # # below is dummy # if np.all(np.isnan(X_lf)) and np.all(np.isnan(Y_lf)): # X_lf = self.X_lf @@ -729,7 +777,7 @@ def set_XY( # noqa: C901, N802, D102 elif n_unique == X_hf.shape[0]: # no repl # Y_mean=Y_hfs[X_idx] # Y_mean1, nugget_mean1 = self.predictStoMeans(X_new, Y_mean) - Y_mean1, nugget_mean1 = self.predictStoMeans(X_hf, Y_hfs) # noqa: N806 + Y_mean1, nugget_mean1, initial_noise_variance, initial_process_variance = self.predictStoMeans(X_hf, Y_hfs) # noqa: N806 if np.max(nugget_mean1) < 1.0e-10: # noqa: PLR2004 self.set_XY(m_tmp, ny, X_hf, Y_hfs, enforce_hom=True) @@ -745,7 +793,10 @@ def set_XY( # noqa: C901, N802, D102 self.indices_unique = range(Y_hfs.shape[0]) self.n_unique_hf = X_new.shape[0] self.Y_mean[ny] = Y_hfs + self.init_noise_var[ny] = initial_noise_variance + self.init_process_var[ny] = initial_process_variance else: + # nonunique set - check if nugget is zero Y_mean, Y_var = np.zeros((n_unique, 1)), np.zeros((n_unique, 1)) # noqa: N806 @@ -770,6 +821,9 @@ def set_XY( # noqa: C901, N802, D102 return m_tmp elif self.nugget_opt == 'Heteroscedastic': # noqa: RET505 + + dummy, dummy, initial_noise_variance, initial_process_variance = self.predictStoMeans(X_hf, + Y_hfs) # noqa: N806 # # Constructing secondary GP model - can we make use of the "variance of sample variance" # @@ -811,6 +865,8 @@ def set_XY( # noqa: C901, N802, D102 self.indices_unique = indices self.n_unique_hf = X_new.shape[0] self.Y_mean[ny] = Y_mean + self.init_noise_var[ny] = initial_noise_variance + self.init_process_var[ny] = initial_process_variance else: # still nonstochastic gp @@ -843,116 +899,192 @@ def set_XY( # noqa: C901, N802, D102 def predictStoVars(self, X_repl, Y_var_repl, X_new, Y_mean, counts): # noqa: N802, N803, D102 my_x_dim = X_repl.shape[1] - kernel_var = GPy.kern.Matern52( - input_dim=my_x_dim, ARD=True - ) + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) + #kernel_var = GPy.kern.Matern52( + # input_dim=my_x_dim, ARD=True + #) + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) + + kernel_var = GPy.kern.Matern52( input_dim=my_x_dim, ARD=True) log_vars = np.log(Y_var_repl) m_var = GPy.models.GPRegression( X_repl, log_vars, kernel_var, normalizer=True, Y_metadata=None ) + #m_var.Gaussian_noise.constrain_bounded(0.2, 2.0, warning=False) + m_var.Gaussian_noise.constrain_bounded(0.5, 2.0, warning=False) + m_var.Gaussian_noise = 1 # initial points for parname in m_var.parameter_names(): if parname.endswith('lengthscale'): for nx in range(X_repl.shape[1]): myrange = np.max(X_repl, axis=0) - np.min(X_repl, axis=0) - # m_mean.Mat52.lengthscale[[nx]].constrain_bounded( myrange[nx]/X.shape[0], float("Inf")) - m_var.sum.Mat52.lengthscale[[nx]] = myrange[nx] * 100 - m_var.sum.Mat52.lengthscale[[nx]].constrain_bounded( - myrange[nx] / X_repl.shape[0] * 10, - myrange[nx] * 100, - warning=False, - ) - # TODO change the kernel # noqa: TD002, TD004 + + m_var.Mat52.lengthscale[[nx]].constrain_bounded( myrange[nx]/X_repl.shape[0], myrange[nx]*5,warning=False) + m_var.Mat52.lengthscale[[nx]] = myrange[nx] # initial points + # m_var.Gaussian_noise.value = 0.05 + # m_var.Gaussian_noise.constrain_bounded(0.1/np.var(log_vars), 0.8/np.var(log_vars), warning=False) + # m_var.Mat52.lengthscale[[nx]].constrain_bounded( + # myrange[nx] / X_repl.shape[0], + # myrange[nx] * 10, + # warning=False, + # ) + # m_var.sum.Mat52.lengthscale[[nx]].constrain_bounded( + # myrange[nx] / X_repl.shape[0] * 10, + # myrange[nx] * 100, + # warning=False, + # ) + #TODO change the kernel # noqa: TD002, TD004 + #m_var.optimize(max_f_eval=1000) + #m_var.optimize_restarts( + # self.nopt, parallel=self.is_paralle_opt_safe, num_processes=self.n_processor, verbose=False + #) + print("Calibrating Secondary surrogate") + m_var = my_optimize_restart(m_var,self.nopt) - m_var.optimize(max_f_eval=1000) - m_var.optimize_restarts( - self.nopt, parallel=True, num_processes=self.n_processor, verbose=False - ) - print(m_var) # noqa: T201 + #print(m_var) # noqa: T201 log_var_pred, dum = m_var.predict(X_new) var_pred = np.exp(log_var_pred) - # norm_var_str = (var_pred.T[0]/counts) / max(var_pred.T[0]/counts) + norm_var_str = (var_pred.T[0]/counts) / max(var_pred.T[0]/counts) + self.var_norm = 1 if self.set_normalizer: - norm_var_str = (var_pred.T[0]) / np.var( - Y_mean - ) # if normalization was used.. + self.var_norm = np.mean(var_pred.T[0]) + #norm_var_str = (var_pred.T[0]) / np.var(Y_mean) + norm_var_str = var_pred.T[0] / self.var_norm + # if normalization was used.. else: - norm_var_str = var_pred.T[0] # if normalization was used.. + norm_var_str = var_pred.T[0] # if normalization was not used.. + + # norm_var_str = (X_new+2)**2/max((X_new+2)**2) Y_metadata = {'variance_structure': norm_var_str / counts} # noqa: N806 + if develop_mode: + + plt.figure(3) + nx=0 + plt.scatter(X_repl[:, nx], log_vars, alpha=0.1); + plt.scatter(X_new[:, nx], log_var_pred, alpha=0.1); + plt.show() + + plt.figure(1) + dum1, dum2 = m_var.predict(X_repl) + + plt.title("Sto Log-var QoI") + plt.scatter(log_vars, dum1,alpha=0.5); + plt.scatter(log_vars, log_vars,alpha=0.5); + plt.xlabel("exact"); plt.ylabel("pred"); plt.grid() + print(m_var) + print(m_var.Mat52.lengthscale) + plt.show(); + return Y_metadata, m_var, norm_var_str def predictStoMeans(self, X, Y): # noqa: N802, N803, D102 # under homoscedasticity my_x_dim = X.shape[1] - kernel_mean = GPy.kern.Matern52(input_dim=my_x_dim, ARD=True) + myrange = np.max(X, axis=0) - np.min(X, axis=0) + kernel_mean = GPy.kern.Matern52(input_dim=my_x_dim, ARD=True, lengthscale=myrange) # kernel_mean = GPy.kern.Matern52(input_dim=my_x_dim, ARD=True) + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) - if self.do_linear and not (self.isEEUQ or self.isWEUQ): - kernel_mean = kernel_mean + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) - + #if self.do_linear and not (self.isEEUQ or self.isWEUQ): + # kernel_mean = kernel_mean + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) + # + # if sum(self.linear_list)>0: + # + # lin_tmp = LinearRegression().fit(X[:, self.linear_list], Y) + # y_pred = lin_tmp.predict(X[:, self.linear_list]) + # + # # Set GP + # + # m_mean = GPy.models.GPRegression( + # X, Y - y_pred, kernel_mean, normalizer=True, Y_metadata=None + # ) + # + # else: + # m_mean = GPy.models.GPRegression( + # X, Y, kernel_mean, normalizer=True, Y_metadata=None + # ) m_mean = GPy.models.GPRegression( X, Y, kernel_mean, normalizer=True, Y_metadata=None ) + ''' for parname in m_mean.parameter_names(): if parname.endswith('lengthscale'): for nx in range(X.shape[1]): - myrange = np.max(X, axis=0) - np.min(X, axis=0) # m_mean.kern.Mat52.lengthscale[[nx]]= myrange[nx]*100 # m_mean.kern.Mat52.lengthscale[[nx]].constrain_bounded(myrange[nx]/X.shape[0]*50, myrange[nx]*100) - if self.isEEUQ: - m_mean.kern.lengthscale[[nx]] = myrange[nx] * 100 - m_mean.kern.lengthscale[[nx]].constrain_bounded( - myrange[nx] / X.shape[0] * 50, - myrange[nx] * 100, - warning=False, - ) - elif self.do_linear: - m_mean.kern.Mat52.lengthscale[[nx]] = myrange[nx] * 5000 + # if self.isEEUQ: + # # m_mean.kern.lengthscale[[nx]] = myrange[nx] * 100 + # # m_mean.kern.lengthscale[[nx]].constrain_bounded( + # # myrange[nx] / X.shape[0] * 50, + # # myrange[nx] * 100, + # # warning=False, + # # ) + # # m_mean.kern.lengthscale[[nx]] = myrange[nx] + # m_mean.kern.lengthscale[[nx]].constrain_bounded( + # myrange[nx] / X.shape[0], + # myrange[nx] * 10000, + # warning=False, + # ) + if self.do_linear: + # m_mean.kern.Mat52.lengthscale[[nx]] = myrange[nx] * 5000 m_mean.kern.Mat52.lengthscale[[nx]].constrain_bounded( myrange[nx] / X.shape[0] * 50, myrange[nx] * 10000, warning=False, ) else: - m_mean.kern.lengthscale[[nx]] = myrange[nx] * 5000 + + m_mean.Gaussian_noise.constrain_bounded(0.1,0.5,warning=False) + # m_mean.kern.lengthscale[[nx]] = myrange[nx] m_mean.kern.lengthscale[[nx]].constrain_bounded( - myrange[nx] / X.shape[0] * 50, - myrange[nx] * 10000, - warning=False, + myrange[nx]/ X.shape[0]*10, + myrange[nx] * 1, + warning=False ) + ''' # m_mean.optimize(messages=True, max_f_eval=1000) - # m_mean.Gaussian_noise.variance = np.var(Y) # First calibrate parameters - m_mean.optimize_restarts( - self.nopt, parallel=True, num_processes=self.n_processor, verbose=True - ) # First calibrate parameters + # # m_mean.Gaussian_noise.variance = np.var(Y) # First calibrate parameters + # m_mean.optimize_restarts( + # self.nopt, parallel=self.is_paralle_opt_safe, num_processes=self.n_processor, verbose=True + # ) # First calibrate parameters + print("calibrating tertiary surrogate") + m_mean = my_optimize_restart(m_mean,self.nopt) - # m_mean.optimize(messages=True, max_f_eval=1000) + #m_mean.optimize(messages=True, max_f_eval=1000) # if self.do_linear: # m_mean.Gaussian_noise.variance=m_mean.Mat52.variance+m_mean.Gaussian_noise.variance # else: - # m_mean.Gaussian_noise.variance=m_mean.RBF.variance+m_mean.Gaussian_noise.variance + # m_mean.Gaussian_noise.variance=m_mean.RBF.variance+m_mean.Gaussian_noise.variance.variance # m_mean.optimize_restarts(10,parallel=True) mean_pred, mean_var = m_mean.predict(X) - """ - import matplotlib.pyplot as plt - print(m_mean) - #print(m_mean.Mat52.lengthscale) - plt.scatter(X[:, 4], Y); - plt.plot(X[:, 4], mean_pred, 'rx'); - plt.errorbar(X[:, 4],mean_pred.T[0],yerr=np.sqrt(mean_var.T)[0],fmt='x'); - plt.show() - """ - return mean_pred, mean_var + initial_noise_variance = float(m_mean.Gaussian_noise.variance) + initial_process_variance = float(m_mean.Mat52.variance) + + #if np.sum(self.linear_list) > 0: + # mean_pred = mean_pred + lin_tmp.predict(X[:, self.linear_list]) + + if develop_mode: + print(m_mean) + plt.scatter(X[:, 0], Y,alpha=0.2) + plt.plot(X[:, 0], mean_pred, 'rx') + plt.errorbar(X[:, 0],mean_pred.T[0],yerr=np.sqrt(mean_var.T)[0],fmt='rx',alpha=0.1); + plt.title("Sto Log-mean QoI (initial)") + plt.show() + + plt.plot(m_mean.Y, mean_pred, 'rx') + plt.scatter(m_mean.Y, m_mean.Y,alpha=0.2) + plt.title("Sto Log-mean QoI (initial)") + plt.show() + + + return mean_pred, mean_var, initial_noise_variance, initial_process_variance def calibrate(self): # noqa: C901, D102 print('Calibrating in parallel', flush=True) # noqa: T201 @@ -961,7 +1093,7 @@ def calibrate(self): # noqa: C901, D102 nugget_opt_tmp = self.nugget_opt nopt = self.nopt - parallel_calib = False + parallel_calib = True # parallel_calib = self.do_parallel if parallel_calib: @@ -976,6 +1108,9 @@ def calibrate(self): # noqa: C901, D102 nopt, ny, self.n_processor, + self.is_paralle_opt_safe, + self.init_noise_var[ny], + self.init_process_var[ny] ) for ny in range(self.y_dim) ) @@ -987,7 +1122,27 @@ def calibrate(self): # noqa: C901, D102 # TODO: terminate it gracefully.... # noqa: TD002 # see https://stackoverflow.com/questions/21104997/keyboard-interrupt-with-pythons-multiprocessing - + + if develop_mode: + for ny in range(self.y_dim): + print(self.m_list[ny]) + # print(m_tmp.rbf.lengthscale) + tmp = self.m_list[ny].predict(self.m_list[ny].X)[0] + + # this one has a noise + plt.title("Original Mean QoI") + plt.scatter(self.m_list[ny].Y, tmp, alpha=0.1) + plt.scatter(self.m_list[ny].Y, self.m_list[ny].Y, alpha=0.1) + plt.xlabel("exact") + plt.ylabel("pred") + plt.show() + + plt.title("Original Mean QoI") + plt.scatter(self.m_list[ny].X[:,0], tmp, alpha=0.1) + plt.scatter(self.m_list[ny].X[:,0], self.m_list[ny].Y, alpha=0.1) + plt.xlabel("exact") + plt.ylabel("pred") + plt.show() else: for ny in range(self.y_dim): self.m_list[ny], msg, ny = calibrating( # noqa: PLW2901 @@ -997,35 +1152,49 @@ def calibrate(self): # noqa: C901, D102 self.normVars[ny], self.do_mf, self.heteroscedastic, - nopt, + self.nopt, ny, self.n_processor, + self.is_paralle_opt_safe, + self.init_noise_var[ny], + self.init_process_var[ny] ) if msg != '': self.exit(msg) #### - + if develop_mode: + print(self.m_list[ny]) + # print(m_tmp.rbf.lengthscale) + tmp = self.m_list[ny].predict(self.m_list[ny].X) + plt.title("Final Mean QoI") + plt.scatter(self.m_list[ny].Y, tmp[0], alpha=0.1) + plt.scatter(self.m_list[ny].Y, self.m_list[ny].Y, alpha=0.1) + plt.xlabel("exact") + plt.ylabel("pred") + plt.show() + + # because EE-UQ results are more likely to have huge nugget. # if False: - if self.isEEUQ: - if self.heteroscedastic: - variance_keyword = 'het_Gauss.variance' - else: - variance_keyword = 'Gaussian_noise.variance' - - for ny in range(self.y_dim): - for parname in self.m_list[ny].parameter_names(): - if parname.endswith('variance') and ('Gauss' not in parname): - exec( # noqa: S102 - 'my_new_var = max(self.m_list[ny].' - + variance_keyword - + ', 10*self.m_list[ny].' - + parname - + ')' - ) - exec('self.m_list[ny].' + variance_keyword + '= my_new_var') # noqa: S102 - - self.m_list[ny].optimize() + # if self.isEEUQ: + # if self.heteroscedastic: + # variance_keyword = 'het_Gauss.variance' + # else: + # variance_keyword = 'Gaussian_noise.variance' + # + # for ny in range(self.y_dim): + # for parname in self.m_list[ny].parameter_names(): + # if parname.endswith('variance') and ('Gauss' not in parname): + # exec( # noqa: S102 + # 'my_new_var = max(self.m_list[ny].' + # + variance_keyword + # + ', 10*self.m_list[ny].' + # + parname + # + ')' + # ) + # exec('self.m_list[ny].' + variance_keyword + '= my_new_var') # noqa: S102 + # + # self.m_list[ny].optimize() self.calib_time = time.time() - t_opt print(f' Calibration time: {self.calib_time:.2f} s', flush=True) # noqa: T201 @@ -1036,6 +1205,12 @@ def calibrate(self): # noqa: C901, D102 return Y_preds, Y_pred_vars, Y_pred_vars_w_measures, e2 def train_surrogate(self, t_init): # noqa: C901, D102, PLR0915 + + + + self.seed = 43 + np.random.seed(43) + self.nc1 = min(200 * self.x_dim, 2000) # candidate points self.nq = min(200 * self.x_dim, 2000) # integration points # FEM index @@ -1116,6 +1291,7 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 X_hf_tmp = model_hf.sampling(max([model_hf.n_init - model_hf.n_existing, 0])) # noqa: N806 + # # if X is from a data file & Y is from simulation # @@ -1170,7 +1346,9 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 msg = f'Error importing input data: dimension inconsistent: high fidelity model have {self.Y_hf.shape[1]} QoI(s) but low fidelity model have {self.Y_lf.shape[1]}.' self.exit(msg) + stoch_idx = [] for i in range(y_dim): + print("Setting up QoI {} among {}".format(i+1,y_dim)) self.m_list[i] = self.set_XY( self.m_list[i], i, @@ -1180,6 +1358,64 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 self.Y_lf[:, i][np.newaxis].transpose(), ) # log-transform is inside set_XY + + # check stochastic ? + # if self.stochastic[i] and not self.do_mf: + # # see if we can run it parallel + # X_new, X_idx, indices, counts = np.unique( # noqa: N806 + # self.X_hf, + # axis=0, + # return_index=True, + # return_counts=True, + # return_inverse=True, + # ) + # n_unique = X_new.shape[0] + # if n_unique == self.X_hf.shape[0]: # no repl + # stoch_idx += [i] + # + # else: + # # + # # original calibration + # # + # print("Setting up QoI {} among {}".format(i+1,y_dim)) + # self.m_list[i] = self.set_XY( + # self.m_list[i], + # i, + # self.X_hf, + # self.Y_hf[:, i][np.newaxis].transpose(), + # self.X_lf, + # self.Y_lf[:, i][np.newaxis].transpose(), + # ) # log-transform is inside set_XY + + # # parllel run + # if len(stoch_idx)>0: + # iterables = ( + # ( + # copy.deepcopy(self.m_list[i]), + # i, + # self.x_dim, + # self.X_hf, + # self.Y_hf[:, i][np.newaxis].transpose(), + # self.create_kernel, + # self.create_gpy_model, + # self.do_logtransform, + # self.predictStoMeans, + # self.set_normalizer + # ) + # for i in stoch_idx + # ) + # result_objs = list(self.pool.starmap(set_XY_indi, iterables)) + # for ny, m_tmp_, Y_mean_, normMeans_, normVars_, m_var_list_, var_str_, indices_unique_, n_unique_hf_ in result_objs: # noqa: N806 + # self.m_tmp[ny] = m_tmp_ + # self.Y_mean[ny] = Y_mean_ + # self.normMeans[ny] = normMeans_ + # self.normVars[ny] = normVars_ + # self.m_var_list[ny] = m_var_list_ + # self.var_str[ny] = var_str_ + # self.indices_unique = indices_unique_ + # self.n_unique_hf[ny] = n_unique_hf_ + + # # Verification measures # @@ -1201,9 +1437,8 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 # Initial calibration # Calibrate self.m_list - self.Y_cvs, self.Y_cv_vars, self.Y_cv_var_w_measures, e2 = ( - self.calibrate() - ) + self.Y_cvs, self.Y_cv_vars, self.Y_cv_var_w_measures, e2 = self.calibrate() + if self.do_logtransform: # self.Y_cv = np.exp(2*self.Y_cvs+self.Y_cv_vars)*(np.exp(self.Y_cv_vars)-1) # in linear space # TODO: Let us use median instead of mean? # noqa: TD002 @@ -1341,197 +1576,128 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 print(f'2. max(NRMSE) = {np.max(self.NRMSE_val)}', flush=True) # noqa: T201 print(f'3. time = {self.sim_time:.2f} s', flush=True) # noqa: T201 - r""" - - self.inbound50 - self.Gausspvalue - ## The plot in quoFEM - import matplotlib.pyplot as plt - ny = 0 ; - nx = 4; - sorted_y_std = np.sqrt(self.Y_cv_var_w_measure[:,ny]) - sorted_y_std0 = np.sqrt(self.Y_cv_var[:,ny]) - - sorted_y_stds = np.sqrt(self.Y_cv_var_w_measures[:,ny]) - sorted_y_std0s = np.sqrt(self.Y_cv_vars[:,ny]) - - plt.errorbar(self.X_hf[:, nx],(self.Y_cv[:, ny]),yerr=sorted_y_std,fmt='x'); - plt.errorbar(self.X_hf[:, nx],(self.Y_cv[:, ny]),yerr=sorted_y_std0,fmt='x'); - plt.scatter(self.X_hf[:, nx],(self.Y_hf[:, ny]),c='r'); plt.show() - - # plt.errorbar(self.X_hf[:, nx],(self.Y_cvs[:, ny]),yerr=sorted_y_stds,fmt='x'); - # plt.errorbar(self.X_hf[:, nx],(self.Y_cvs[:, ny]),yerr=sorted_y_std0s,fmt='x'); - # plt.scatter(self.X_hf[:, nx],np.log(self.Y_hf[:, ny]),c='r'); plt.show() - # - # - # plt.scatter(self.X_hf[:, nx],np.log(self.Y_hf[:, ny]),color='r'); - # plt.scatter(self.X_hf[:, nx],np.log(self.Y_cv[:, ny])); plt.show() - - - plt.errorbar(self.X_hf[:, nx],self.Y_cv[:, ny],yerr = sorted_y_std,fmt='x');plt.ylim([-200,1500]);plt.show() - - - ## - - x1 = np.log(self.X_hf[:, 1]) - x2 = np.log(self.X_hf[:, 2]) - y = np.log(self.Y_hf[:, 0]) - - - - my_test_X = self.X_hf*1.05 - x1t = np.log(my_test_X[:, 1]) - x2t = np.log(my_test_X[:, 2]) - #yt = (self.m_list[0].predict(my_test_X)[0]) - yt = e22[:, 0] - - fig = plt.figure() - ax = fig.add_subplot(projection='3d') - ax.scatter(x1, x2, y) - ax.scatter(x1t, x2t, yt) - ax.view_init(15, 60) - - plt.show() - - - # visualize - dum, dum, indices, counts = np.unique(self.X_hf, axis=0, return_index=True, return_counts=True, - return_inverse=True) - - import matplotlib.pyplot as plt - xs1 = np.linspace(-2,2,100) - xs2 = np.linspace(1.4,1.6,100) - xv1, xv2 = np.meshgrid(xs1, xs2) - xa1 = np.array(xv1).flatten() - xa2 = np.array(xv2).flatten() - xs = np.vstack((xa1,xa2)).T - - ys_pred, ys_pred_var = self.m_list[0].predict_noiseless(xs) - log_var_pred, dumm = self.m_var_list[0].predict_noiseless(xs) - ys_pred_sig = np.sqrt(ys_pred_var) - std_pred = np.sqrt(np.exp(log_var_pred)) - - - from mpl_toolkits import mplot3d - import numpy as np - import matplotlib.pyplot as plt - fig = plt.figure() - ax = plt.axes(projection='3d') - - ax.scatter3D(xa1, xa2, ys_pred); - ax.scatter3D(xa1, xa2, ys_pred+np.sqrt(ys_pred_var)); - plt.show() - - + if develop_mode: + print("CV: inbound50") + print(self.inbound50) + print("CV: max coverage") + print(self.max_coverage) + print("CV: quantile values") + print(self.quantile_reconst_list) + + + ny = 0; + nx =0; + sorted_y_std = np.sqrt(self.Y_cv_var_w_measure[:, ny]) + sorted_y_std0 = np.sqrt(self.Y_cv_var[:, ny]) + + plt.errorbar(self.X_hf[:, nx], (self.Y_cv[:, ny]), yerr=sorted_y_std, fmt='x',alpha=50/self.X_hf.shape[0]); + plt.errorbar(self.X_hf[:, nx], (self.Y_cv[:, ny]), yerr=sorted_y_std0, fmt='x',alpha=50/self.X_hf.shape[0]); + plt.scatter(self.X_hf[:, nx], (self.Y_hf[:, ny]), c='r',alpha=0.1); + plt.title("RV={}, QoI={}".format(nx + 1, ny + 1)) + plt.show() + + log_Y_cv_sample = np.random.normal(self.Y_cvs[:, ny],np.sqrt(self.Y_cv_var_w_measures[:, ny])) + + ny = 0 + plt.scatter(self.Y_hf[:, ny], self.Y_cv[:, ny],alpha=0.1); + plt.errorbar(self.Y_hf[:, ny], self.Y_cv[:, ny], yerr=sorted_y_std, fmt='x',alpha=0.1); + plt.scatter(self.Y_hf[:, ny], self.Y_hf[:, ny],alpha=0.1); + plt.title("QoI = {}".format(ny+1)) + plt.show() + + plt.scatter(np.log10(self.Y_hf[:, ny]),np.log10((self.Y_cv[:, ny])), alpha=1,marker='x'); + plt.plot(np.log10(self.Y_hf[:, ny]),np.log10(self.Y_hf[:, ny]),alpha=1,color='r'); + mycor_CV = np.corrcoef(self.Y_hf[:, nx], self.Y_cv[:, ny])[1,0] + mycor_log_CV = np.corrcoef(np.log(self.Y_hf[:, nx]), np.log(self.Y_cv[:, ny]))[1,0] + plt.title(f"train CV rho={round(mycor_CV*100)/100} rho_log={round(mycor_log_CV*100)/100}") + plt.xlabel("QoI exact"); + plt.ylabel("QoI pred median"); + plt.grid() + plt.show() + + [a,b] = self.m_list[0].predict(self.m_list[0].X) + + if sum(self.linear_list) > 0: + a = a + self.lin_list[ny].predict(self.X_hf[:, self.linear_list])[:, 0:1] + # Don't use b + + plt.scatter(np.log10(self.Y_hf[:, ny]),np.log10(np.exp(a[:, ny])),alpha=1,marker='x'); + plt.plot(np.log10(self.Y_hf[:, ny]),np.log10(self.Y_hf[:, ny]),alpha=1,color='r'); + mycor = np.corrcoef(self.Y_hf[:, ny], np.exp(a[:, ny]))[1,0] + mycor_log = np.corrcoef(np.log(self.Y_hf[:, ny]), a[:, ny])[1,0] + plt.title(f"train rho={round(mycor*100)/100} rho_log={round(mycor_log*100)/100}") + plt.xlabel("QoI exact"); + plt.ylabel("QoI pred median"); + plt.grid() + plt.show() + + plt.scatter((self.Y_hf[:, ny]),(np.exp(a[:, ny])),alpha=1,marker='x'); + plt.plot((self.Y_hf[:, ny]),(self.Y_hf[:, ny]),alpha=1,color='r'); + mycor = np.corrcoef(self.Y_hf[:, ny], np.exp(a[:, ny]))[1,0] + mycor_log = np.corrcoef(np.log(self.Y_hf[:, ny]), a[:, ny])[1,0] + plt.title(f"train rho={round(mycor*100)/100} rho_log={round(mycor_log*100)/100}") + plt.xlabel("QoI exact"); + plt.ylabel("QoI pred median"); + plt.grid() + plt.show() + + + plt.scatter((self.X_hf[:, nx]), (self.Y_hf[:, ny]), alpha=1, color='r'); + plt.scatter((self.X_hf[:, nx]), (np.exp(a[:, ny])), alpha=1, marker='x'); + plt.xlabel("X"); + plt.ylabel("QoI pred median"); + plt.grid() + plt.show() + + + # plt.scatter(np.log10(self.Y_hf[:, ny]),np.log10(np.exp(log_Y_cv_sample)), alpha=1,marker='x'); + # plt.plot(np.log10(self.Y_hf[:, ny]),np.log10(self.Y_hf[:, ny]),alpha=1,color='r'); + # mycor = np.corrcoef(self.Y_hf[:, ny], np.exp(log_Y_cv_sample))[1,0] + # plt.title(f"train CV samples rho={round(mycor*100)/100}") + # plt.xlabel("QoI exact"); + # plt.ylabel("QoI pred sample"); + # plt.grid() + # plt.show() + + X_test = np.genfromtxt(r'C:\Users\SimCenter\Dropbox\SimCenterPC\Stochastic_GP_validation\input_test.csv', delimiter=',') + Y_test = np.genfromtxt(r'C:\Users\SimCenter\Dropbox\SimCenterPC\Stochastic_GP_validation\output_test.csv', delimiter=',') + [a,b] = self.m_list[0].predict(X_test) + + if sum(self.linear_list) > 0: + a = a + self.lin_list[ny].predict(X_test[:, self.linear_list])[:, 0:1] + # Don't use b + + plt.scatter(np.log10(Y_test),np.log10(np.exp(a[:, ny])),alpha=1,marker='x'); + plt.plot(np.log10(Y_test),np.log10(Y_test),alpha=1,color='r'); + mycor_Test = np.corrcoef(Y_test, np.exp(a[:, ny]))[1,0] + mycor_log_Test = np.corrcoef(np.log(Y_test), a[:, ny])[1,0] + plt.title(f"test rho={round(mycor_Test*100)/100} rho_log={round(mycor_log_Test*100)/100}") + plt.xlabel("QoI exact"); + plt.ylabel("QoI pred median"); + plt.grid() + plt.show() + + # + # Predict variance + # - import matplotlib.pyplot as plt - dum, dum, indices, counts = np.unique(self.X_hf, axis=0, return_index=True, return_counts=True, - return_inverse=True) + log_var_pred, dum = self.m_var_list[0].predict(X_test) + log_Y_var_pred_w_measure = b + np.exp(log_var_pred) * self.m_list[ny].Gaussian_noise.parameters - - xs1 = np.linspace(-2,2,100) - xs = np.hstack((xs1[:,None],np.ones(xs1[:,None].shape)*1.5)) - ys_pred, ys_pred_var = self.m_list[0].predict_noiseless(xs) - log_var_pred, dumm = self.m_var_list[0].predict_noiseless(xs) - var_pred = np.exp(log_var_pred) + qualtile_vals = np.arange(0.1, 1, 0.1) + qualtile_reconst = np.zeros([len(qualtile_vals)]) + for nqu in range(len(qualtile_vals)): + Q_b = norm.ppf(qualtile_vals[nqu], loc=a, scale=np.sqrt(log_Y_var_pred_w_measure)) + qualtile_reconst[nqu] = np.sum((np.log(Y_test) < Q_b[:,0])) / Y_test.shape[0] - norm_var_str = (var_pred.T[0]) # if normalization was used.. - - ys_pred_sig = np.sqrt(ys_pred_var) - ys_pred_sig_w_measured = np.sqrt(norm_var_str[:,None]*self.m_list[0].Gaussian_noise.variance+ys_pred_var) - - - - plt.figure() - #plt.plot(xa1, ys_pred, ':', c=(155/255, 34/255, 38/255), label="f(x)") #RED - #plt.scatter(X, Y, c=(155/255, 34/255, 38/255), label='Observations') - - - idx = np.argsort(self.X_hf[:,0]) - sorted_x =self.X_hf[idx,0] - sorted_y =self.Y_cv[idx,0] - sorted_y_real =self.Y_hf[idx,0] - sorted_y_std = np.sqrt(self.Y_cv_var_w_measure[idx,0]) - sorted_y_std0 = np.sqrt(self.Y_cv_var[idx,0]) - - ### cross validation predictions - - # plt.fill(np.concatenate([sorted_x, sorted_x[::-1]]), - # np.concatenate([sorted_y - 1.9600 * sorted_y_std0, - # (sorted_y + 1.9600 * sorted_y_std0) [::-1]]), - # alpha=.5, fc=(200/255, 200/255, 33/255), ec='None', label='95% cross validation') - # - # - - plt.fill(np.concatenate([xs1, xs1[::-1]]), - np.concatenate([ys_pred - 1.9600 * ys_pred_sig_w_measured, - (ys_pred + 1.9600 * ys_pred_sig_w_measured)[::-1]]), - alpha=.5, fc=(42/255, 111/255, 151/255), ec='None', label='95% PI') - - plt.fill(np.concatenate([xs1, xs1[::-1]]), - np.concatenate([ys_pred - 1.9600 * ys_pred_sig, - (ys_pred + 1.9600 * ys_pred_sig)[::-1]]), - alpha=.5, fc=(200/255, 33/255, 33/255), ec='None', label='95% CI') - - #plt.scatter(self.m_list[0].X,self.m_list[0].Y, label='Mean of repls') - - #plt.scatter(sorted_x,sorted_y, label='All observation') - plt.plot(xs1, ys_pred, '-', c=(0/255, 18/255, 25/255), label='Prediction mean') - plt.scatter(self.X_hf[:,0],self.Y_hf[:,0], label='All observations') - #plt.scatter(self.m_list[0].X[counts>1,0],self.m_list[0].Y[counts>1,0], label='Mean of repls') - - plt.xlabel('$x$') - plt.ylabel('$f(x)$') - plt.legend(loc='upper left', frameon=False) - plt.ylim([-300,1800]) - plt.show() - - - - - ny=1 - idx = np.argsort(self.X_hf[:,ny]) - sorted_x =self.X_hf[idx,ny] - sorted_y =self.Y_cv[idx,ny] - sorted_y_real =self.Y_hf[idx,ny] - sorted_y_std = np.sqrt(self.Y_cv_var_w_measure[idx,ny]) - sorted_y_std0 = np.sqrt(self.Y_cv_var[idx,ny]) - - from scipy.stats import norm, probplot - import matplotlib.pyplot as plt - - plt.hist((sorted_y-sorted_y_real)/sorted_y_std, bins=20,density=True) - x = np.linspace(-4,4, 100) - plt.plot(x, norm.pdf(x), 'r-', lw=5, alpha=0.6, label='norm pdf') - plt.xlabel(r"$(Y_{CV}-Y_{obs})/\sigma_{CV}$") - plt.show() - - probplot((sorted_y-sorted_y_real)/sorted_y_std, dist="norm", fit=True, rvalue=True, plot=plt) - plt.show() - - np.std((sorted_y-sorted_y_real)/sorted_y_std) - - import matplotlib.pyplot as plt - dof=0 - plt.plot(self.Y_cv[:,dof],self.Y_hf[:,dof],'x'); - plt.plot(self.Y_hf[:,dof],self.Y_hf[:,dof],'-'); - plt.xlabel("CV") - plt.ylabel("Exact") - plt.show() - """ # noqa: W291, W293 - # plt.show() - # plt.plot(self.Y_cv[:, 1],Y_exact[:,1],'x') - # plt.plot(Y_exact[:, 1],Y_exact[:, 1],'x') - # plt.xlabel("CV") - # plt.ylabel("Exact") - # plt.show() - # - # self.m_list = list() - # for i in range(y_dim): - # self.m_list = self.m_list + [GPy.models.GPRegression(self.X_hf, self.Y_hf, kernel=GPy.kern.RBF(input_dim=x_dim, ARD=True), normalizer=True)] - # self.m_list[i].optimize() - # - # self.m_list[i].predict() + quant_err = abs(qualtile_reconst - qualtile_vals) + print(f"Test: max coverage err: {np.max(quant_err)}") + print(f"Test: mean coverage err: {np.mean(quant_err)}") + print("Test: quantile range") + print(qualtile_reconst) + print(f"Corr(log) for CV: {round(mycor_log_CV*100)/100}") + print(f"Corr(log) for Test: {round(mycor_log_Test*100)/100}") + print("") def verify(self): # noqa: D102 Y_cv = self.Y_cv # noqa: N806 @@ -1577,7 +1743,7 @@ def verify(self): # noqa: D102 # y_pred_var[ns, ny] = y_pred_vars error_ratio2_Pr = y_pred_var / y_data_var # noqa: N806 - print(np.max(error_ratio2_Pr, axis=0), flush=True) # noqa: T201 + # print(np.max(error_ratio2_Pr, axis=0), flush=True) # noqa: T201 perc_thr_tmp = np.hstack( [np.array([1]), np.arange(10, 1000, 50), np.array([999])] @@ -1613,12 +1779,21 @@ def verify_nugget(self): # noqa: D102 Y = self.Y_hf # noqa: N806 model_hf = self.modelInfoHF # noqa: F841 + self.quantile_reconst_list = np.zeros((self.y_dim,9)) + self.max_coverage = np.zeros((self.y_dim,)) + self.mean_coverage = np.zeros((self.y_dim,)) self.inbound50 = np.zeros((self.y_dim,)) self.Gausspvalue = np.zeros((self.y_dim,)) if not self.do_mf: for ny in range(self.y_dim): if not self.do_logtransform: + + + # + # Interquarltile range + # + PI_lb = norm.ppf( # noqa: N806 0.25, loc=Y_cv[:, ny], @@ -1631,6 +1806,24 @@ def verify_nugget(self): # noqa: D102 ) num_in_bound = np.sum((Y[:, ny] > PI_lb) * (Y[:, ny] < PI_ub)) + + # + # coverage range + # + + qualtile_vals = np.arange(0.1,1,0.1) + qualtile_reconst = np.zeros([len(qualtile_vals)]) + for nqu in range(len(qualtile_vals)): + Q_b = np.squeeze(norm.ppf(qualtile_vals[nqu], loc=Y_cv[:, ny], scale=np.sqrt(Y_cv_var_w_measure[:, ny]))) + + #print(Y[:, ny]) + #print(Q_b) + qualtile_reconst[nqu] = np.sum(Y[:, ny] < Q_b)/Y.shape[0] + + quant_err = abs(qualtile_reconst - qualtile_vals) + + + norm_residual = (Y[:, ny] - Y_cv[:, ny]) / np.sqrt( Y_cv_var_w_measure[:, ny] ) @@ -1644,8 +1837,10 @@ def verify_nugget(self): # noqa: D102 log_Y_cv = self.Y_cvs[:, ny] # noqa: N806 log_Y_cv_var_w_measure = self.Y_cv_var_w_measures[:, ny] # noqa: N806 - # PI_lb = lognorm.ppf(0.25, s=sigm, scale=np.exp(mu)).tolist() - # PI_ub = lognorm.ppf(0.75, s=sigm, scale=np.exp(mu)).tolist() + # + # Interquarltile range + # + PI_lb = norm.ppf( # noqa: N806 0.25, loc=log_Y_cv, scale=np.sqrt(log_Y_cv_var_w_measure) ).tolist() @@ -1656,11 +1851,31 @@ def verify_nugget(self): # noqa: D102 (np.log(Y[:, ny]) > PI_lb) * (np.log(Y[:, ny]) < PI_ub) ) + # + # coverage range + # + + qualtile_vals = np.arange(0.1,1,0.1) + qualtile_reconst = np.zeros([len(qualtile_vals)]) + for nqu in range(len(qualtile_vals)): + Q_b = norm.ppf(qualtile_vals[nqu], loc=log_Y_cv, scale=np.sqrt(log_Y_cv_var_w_measure)).tolist() + qualtile_reconst[nqu] = np.sum((np.log(Y[:, ny]) < Q_b))/Y.shape[0] + + quant_err = abs(qualtile_reconst - qualtile_vals) + + # + # cramervonmises + # + norm_residual = (np.log(Y[:, ny]) - log_Y_cv) / np.sqrt( log_Y_cv_var_w_measure ) stats = cramervonmises(norm_residual, 'norm') + + self.quantile_reconst_list[ny,:] = qualtile_reconst + self.max_coverage[ny] = np.max(quant_err) + self.mean_coverage[ny] = np.mean(quant_err) self.inbound50[ny] = num_in_bound / Y.shape[0] self.Gausspvalue[ny] = stats.pvalue @@ -1802,6 +2017,10 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 comments='%', ) + # + # Save surrogateinfo + # + results = {} hfJson = {} # noqa: N806 @@ -1876,6 +2095,7 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 results['valCorrCoeff'] = {} results['valIQratio'] = {} results['valPval'] = {} + results['valCoverageVals'] = {} results['yPredict_PI_lb'] = {} results['yPredict_PI_ub'] = {} results['xExact'] = {} @@ -1941,6 +2161,7 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 results['valCorrCoeff'][self.g_name[ny]] = self.corr_val[ny] results['valIQratio'][self.g_name[ny]] = self.inbound50[ny] results['valPval'][self.g_name[ny]] = self.Gausspvalue[ny] + results['valCoverageVals'][self.g_name[ny]] = self.quantile_reconst_list[ny].tolist() if np.isnan(self.NRMSE_val[ny]) or np.isinf(self.NRMSE_val[ny]): results['valNRMSE'][self.g_name[ny]] = 'null' @@ -1967,6 +2188,9 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 results['modelInfo'] = {} for ny in range(self.y_dim): + # + # Save the variance model + # if self.stochastic[ny]: results['modelInfo'][self.g_name[ny] + '_Var'] = {} for parname in self.m_var_list[ny].parameter_names(): @@ -1978,15 +2202,27 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 ] = self.m_var_list[ny].Y.flatten().tolist() else: results['modelInfo'][self.g_name[ny] + '_Var'] = 0 - - if not self.do_mf: - for ny in range(self.y_dim): + + # + # Save the main model + # + if not self.do_mf: results['modelInfo'][self.g_name[ny]] = {} for parname in self.m_list[ny].parameter_names(): results['modelInfo'][self.g_name[ny]][parname] = list( eval('self.m_list[ny].' + parname) # noqa: S307 ) + # + # Save the linear + # + if self.do_linear>0: + results['modelInfo'][self.g_name[ny] + '_Lin'] = {} + results['modelInfo'][self.g_name[ny] + '_Lin']['predictorList'] = [] # TBA + results['modelInfo'][self.g_name[ny] + '_Lin']['coef'] = np.squeeze(self.lin_list[ny].coef_).tolist() # + results['modelInfo'][self.g_name[ny] + '_Lin']['intercept'] = float(self.lin_list[ny].intercept_) # + + if self.isEEUQ or self.isWEUQ: # read SAM.json SAMpath = self.work_dir + '/templatedir/SAM.json' # noqa: N806 @@ -2578,13 +2814,14 @@ def normalized_mean_sq_error(self, yp, ye): # noqa: D102 def get_cross_validation_err(self): # noqa: D102 print('Calculating cross validation errors', flush=True) # noqa: T201 time_tmp = time.time() - X_hf = self.X_hf # contains separate samples # noqa: N806 - Y_hf = self.Y_hf # noqa: N806 - - e2 = np.zeros(Y_hf.shape) # only for unique... - Y_pred = np.zeros(Y_hf.shape) # noqa: N806 - Y_pred_var = np.zeros(Y_hf.shape) # noqa: N806 - Y_pred_var_w_measure = np.zeros(Y_hf.shape) # noqa: N806 + X_hf = self.X_hf # contains separate samples # noqa: N806 + nsamp = self.X_hf.shape[0] + ydim = self.Y_hf.shape[1] + + e2 = np.zeros((nsamp,ydim)) # only for unique... + Y_pred = np.zeros((nsamp,ydim)) # noqa: N806 + Y_pred_var = np.zeros((nsamp,ydim)) # noqa: N806 + Y_pred_var_w_measure = np.zeros((nsamp,ydim)) # noqa: N806 # # Efficient cross validation TODO: check if it works for heteroskedacstic # @@ -2598,14 +2835,14 @@ def get_cross_validation_err(self): # noqa: D102 indices = self.indices_unique - for ny in range(Y_hf.shape[1]): + for ny in range(ydim): Xm = self.m_list[ny].X # contains unique samples # noqa: N806 Ym = self.m_list[ny].Y # noqa: N806 # works both for stochastic/stochastic nugget_mat = ( np.diag(np.squeeze(self.var_str[ny])) - * self.m_list[ny].Gaussian_noise.parameters + * self.m_list[ny].Gaussian_noise.parameters # TODO ) Rmat = self.m_list[ny].kern.K(Xm) # noqa: N806 @@ -2632,7 +2869,12 @@ def get_cross_validation_err(self): # noqa: D102 * self.normVars[ny], ) + if sum(self.linear_list)>0: + Y_pred[:, ny] = Y_pred[:, ny] + self.lin_list[ny].predict(X_hf[:,self.linear_list])[:,0] + + else: + Y_hf = self.Y_hf # noqa: N806 Y_pred2 = np.zeros(Y_hf.shape) # noqa: N806 Y_pred_var2 = np.zeros(Y_hf.shape) # noqa: N806 e22 = np.zeros(Y_hf.shape) @@ -2782,6 +3024,7 @@ def __init__( # noqa: C901 x_dim, y_dim, n_processor, + global_seed, idx=0, ): def exit_tmp(msg): @@ -2796,6 +3039,7 @@ def exit_tmp(msg): self.idx = idx self.x_dim = x_dim self.y_dim = y_dim + self.global_seed = global_seed # # Get [X_existing, Y_existing, n_existing, n_total] # @@ -2961,7 +3205,7 @@ def sampling(self, n): # noqa: D102 if n > 0: X_samples = np.zeros((n, self.x_dim)) # noqa: N806 # LHS - sampler = qmc.LatinHypercube(d=self.x_dim) + sampler = qmc.LatinHypercube(d=self.x_dim, seed = self.global_seed) U = sampler.random(n=n) # noqa: N806 for nx in range(self.x_dim): if self.xDistTypeArr[nx] == 'U': @@ -3056,7 +3300,14 @@ def calibrating( # noqa: C901, D103 nopt, ny, n_processor, + is_paralle_opt_safe, + init_noise_var, + init_process_var ): # nuggetVal = self.nuggetVal[ny] + + np.random.seed(int(ny)) + random.seed(int(ny)) + msg = '' if do_heteroscedastic: @@ -3068,12 +3319,23 @@ def calibrating( # noqa: C901, D103 if nugget_opt_tmp == 'Optimize': # m_tmp[variance_keyword].unfix() X = m_tmp.X # noqa: N806 + # for parname in m_tmp.parameter_names(): + # if parname.endswith('lengthscale'): + # for nx in range(X.shape[1]): # noqa: B007 + # myrange = np.max(X, axis=0) - np.min(X, axis=0) + # exec('m_tmp.' + parname + '[[nx]] = myrange[nx]') # noqa: S102 + + m_tmp[variance_keyword].constrain_bounded(0.05,2,warning=False) for parname in m_tmp.parameter_names(): if parname.endswith('lengthscale'): for nx in range(X.shape[1]): # noqa: B007 - myrange = np.max(X, axis=0) - np.min(X, axis=0) - exec('m_tmp.' + parname + '[[nx]] = myrange[nx]') # noqa: S102 - + myrange = np.max(X, axis=0) - np.min(X, axis=0) # noqa: F841 + exec( # noqa: S102 + 'm_tmp.' + + parname + + '[[nx]].constrain_bounded(myrange[nx] / X.shape[0]*10, myrange[nx],warning=False)' + ) + # m_tmp[parname][nx].constrain_bounded(myrange[nx] / X.shape[0], myrange[nx]*100) elif nugget_opt_tmp == 'Fixed Values': m_tmp[variance_keyword].constrain_fixed( nuggetVal[ny] / normVar, warning=False @@ -3091,20 +3353,37 @@ def calibrating( # noqa: C901, D103 myrange = np.max(X, axis=0) - np.min(X, axis=0) exec('m_tmp.' + parname + '[[nx]] = myrange[nx]') # noqa: S102 elif nugget_opt_tmp == 'Heteroscedastic': + + if init_noise_var==None: + init_noise_var = 1 + + if init_process_var == None: + init_process_var = 1 + X = m_tmp.X # noqa: N806 + for parname in m_tmp.parameter_names(): if parname.endswith('lengthscale'): for nx in range(X.shape[1]): # noqa: B007 myrange = np.max(X, axis=0) - np.min(X, axis=0) # noqa: F841 - exec('m_tmp.' + parname + '[[nx]] = myrange[nx]*100') # noqa: S102 exec( # noqa: S102 'm_tmp.' + parname - + '[[nx]].constrain_bounded(myrange[nx] / X.shape[0], myrange[nx]*100,warning=False)' + + '[[nx]].constrain_bounded(myrange[nx] / X.shape[0]*10, myrange[nx],warning=False)' + ) + exec( # noqa: S102 + 'm_tmp.' + + parname + + '[[nx]] = myrange[nx]*1' ) - # m_tmp[parname][nx] = myrange[nx]*100 + + #m_tmp[parname][nx] = myrange[nx]*0.1 # m_tmp[parname][nx].constrain_bounded(myrange[nx] / X.shape[0], myrange[nx]*100) # TODO change the kernel # noqa: TD002, TD004 + + #m_tmp[variance_keyword].constrain_bounded(0.05/np.mean(m_tmp.Y_metadata['variance_structure']),2/np.mean(m_tmp.Y_metadata['variance_structure']),warning=False) + m_tmp.Gaussian_noise.constrain_bounded(0.5*init_noise_var, 2*init_noise_var, warning=False) + else: msg = 'Nugget keyword not identified: ' + nugget_opt_tmp @@ -3137,23 +3416,40 @@ def calibrating( # noqa: C901, D103 ) if msg == '': - m_tmp.optimize() + #m_tmp.optimize() # n=0; if not do_mf: - m_tmp.optimize_restarts( - num_restarts=nopt, - parallel=True, - num_processes=n_processor, - verbose=True, - ) + + #Here + + print("Calibrating final surrogate") + m_tmp = my_optimize_restart(m_tmp,nopt) + + # if develop_mode: + # print(m_tmp) + # #print(m_tmp.rbf.lengthscale) + # tmp = m_tmp.predict(m_tmp.X) + # plt.title("Original Mean QoI") + # plt.scatter(m_tmp.Y, tmp[0],alpha=0.1) + # plt.scatter(m_tmp.Y, m_tmp.Y,alpha=0.1) + # plt.xlabel("exact") + # plt.ylabel("pred") + # plt.show() + + # m_tmp.optimize_restarts( + # num_restarts=nopt, + # parallel=is_paralle_opt_safe, + # num_processes=n_processor, + # verbose=True, + # ) else: m_tmp.gpy_model.optimize_restarts( num_restarts=nopt, - parallel=True, + parallel=is_paralle_opt_safe, num_processes=n_processor, verbose=False, ) - print(m_tmp) # noqa: T201 + #print(m_tmp) # noqa: T201 # while n+20 <= nopt: # m_tmp.optimize_restarts(num_restarts=20) # n = n+20 @@ -3164,6 +3460,136 @@ def calibrating( # noqa: C901, D103 return m_tmp, msg, ny +def my_optimize_restart(m,n_opt): + init = time.time() + n_sample = len(m.Y) + idx = list(range(n_sample)) + n_batch = 700 + n_cluster = int(np.ceil(len(m.Y)/n_batch)) + n_batch = np.ceil(n_sample/n_cluster) + random.shuffle(idx) + X_full = m.X + Y_full = m.Y + + log_likelihoods = np.zeros((n_cluster,)) + errors = np.zeros((n_cluster,)) + m_list = [] + for nc in range(n_cluster): + inside_cluster = idx[int(n_batch*nc):int(np.min([n_batch*(nc+1),n_sample]))] + + # + # Testing if this works better for parallel run + # + @monkeypatch_method(GPy.core.GP) + def subsample_XY(self, idx): # noqa: N802, N803 + if self.Y_metadata is not None: + new_meta = self.Y_metadata + new_meta['variance_structure'] = self.Y_metadata['variance_structure'][idx] + self.Y_metadata.update(new_meta) + print('metadata_updated') # noqa: T201 + self.set_XY(self.X[idx, :], self.Y[idx, :]) + + @monkeypatch_method(GPy.core.GP) + def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 + if Y_metadata is not None: + if self.Y_metadata is None: + self.Y_metadata = Y_metadata + else: + self.Y_metadata.update(Y_metadata) + print('metadata_updated') # noqa: T201 + self.set_XY(X, Y) + + m_subset = m.copy() + #m_subset.set_XY2(m_subset.X[inside_cluster,:],m_subset.Y[inside_cluster,:],{""}) + m_subset.subsample_XY(inside_cluster) + #m_subset.optimize(max_f_eval=1000) + m_subset.optimize_restarts(n_opt) + variance1 = m_subset.normalizer.std**2 + + #Option 1 + tmp_all = m_subset.predict(X_full) + errors[nc] = np.linalg.norm(tmp_all[0][:, 0] - Y_full[:, 0]) + + #Option 2 + m_subset.set_XY2(X_full, Y_full, m.Y_metadata) + variance2 = m_subset.normalizer.std**2 + + m_subset.Gaussian_noise.variance = m_subset.Gaussian_noise.variance*variance1/variance2 + log_likelihoods[nc] = m_subset.log_likelihood() + + m_list += [copy.deepcopy(m_subset)] + print(" cluster {} among {} : logL {}".format(nc+1, n_cluster, log_likelihoods[nc])) + + # import matplotlib.pyplot as plt + # tmp_all = m_subset.predict(X_full); plt.scatter(tmp_all[0][:, 0], Y_full[:, 0],alpha=0.1); plt.scatter(Y_full[:, 0], Y_full[:, 0],alpha=0.1); plt.show() + # tmp_subset = m_subset.predict(X_full[inside_cluster]); plt.scatter(tmp_subset[0][:, 0], Y_full[inside_cluster, 0],alpha=0.1); plt.scatter(Y_full[inside_cluster, 0], Y_full[inside_cluster, 0],alpha=0.1); plt.show() + + # best_cluster = np.argmin(errors) # not capturing skedasticity + best_cluster = np.argmax(log_likelihoods) + + m = m_list[best_cluster] + # m.kern.parameters = kernels[best_cluster][0] + # m.Gaussian_noise.parameters = kernels[best_cluster][1] + # m.parameters_changed() + # tmp = m.predict(X_full[inside_cluster]); plt.scatter(tmp[0][:, 0], Y_full[inside_cluster, 0]); plt.scatter(Y_full[inside_cluster, 0], Y_full[inside_cluster, 0]); plt.show() + print("Elapsed time: {:.2f} s".format(time.time() - init)) + + return m + +def set_XY_indi( + m_tmp, + ny, + x_dim, + X_hf, # noqa: N803 + Y_hf, # noqa: N803 + create_kernel, + create_gpy_model, + do_logtransform, + predictStoMeans, + set_normalizer +): + # + # check if X dimension has changed... + # + x_current_dim = x_dim + for parname in m_tmp.parameter_names(): + if parname.endswith('lengthscale'): + exec('x_current_dim = len(m_tmp.' + parname + ')') # noqa: S102 + + if x_current_dim != X_hf.shape[1]: + kr = create_kernel(X_hf.shape[1]) + X_dummy = np.zeros((1, X_hf.shape[1])) # noqa: N806 + Y_dummy = np.zeros((1, 1)) # noqa: N806 + m_new = create_gpy_model(X_dummy, Y_dummy, kr) + m_tmp = m_new.copy() + # m_tmp.optimize() + + if do_logtransform: + if np.min(Y_hf) < 0: + raise 'Error running SimCenterUQ - Response contains negative values. Please uncheck the log-transform option in the UQ tab' + Y_hfs = np.log(Y_hf) # noqa: N806 + else: + Y_hfs = Y_hf # noqa: N806 + + # Y_mean=Y_hfs[X_idx] + # Y_mean1, nugget_mean1 = self.predictStoMeans(X_new, Y_mean) + Y_mean1, nugget_mean1, initial_noise_variance, initial_process_variance = predictStoMeans(X_hf, Y_hfs) # noqa: N806 + + Y_metadata, m_var, norm_var_str = self.predictStoVars( # noqa: N806 + X_hf, (Y_hfs - Y_mean1) ** 2, X_hf, Y_hfs, X_hf.shape[0] + ) + m_tmp.set_XY2(X_hf, Y_hfs, Y_metadata=Y_metadata) + + m_var_list = m_var + var_str = norm_var_str + indices_unique = range(Y_hfs.shape[0]) + n_unique_hf = X_hf.shape[0] + Y_mean = Y_hfs + + normMeans = 0 + normVars = 1 + + return ny, m_tmp, Y_mean, normMeans, normVars, m_var_list, var_str, indices_unique, n_unique_hf def closest_node(x, X, ll): # noqa: N803, D103 X = np.asarray(X) # noqa: N806 @@ -3218,6 +3644,8 @@ def read_txt(text_dir, exit_fun): # noqa: D103 return X + + if __name__ == '__main__': main(sys.argv) diff --git a/modules/performUQ/common/createStandardUQ_Input.cpp b/modules/performUQ/common/createStandardUQ_Input.cpp index ad4205193..1f72d13ef 100755 --- a/modules/performUQ/common/createStandardUQ_Input.cpp +++ b/modules/performUQ/common/createStandardUQ_Input.cpp @@ -328,7 +328,7 @@ int main(int argc, char **argv) { json_error_t error; json_t *rootINPUT = json_load_file(inputFile, 0, &error); if (rootINPUT == NULL) { - std::cerr << "createStandardUQ_INput could not open file" << inputFile << "\n"; + std::cerr << "createStandardUQ_Input could not open file: " << inputFile << "\n"; exit(801); // no random variables is allowed } diff --git a/modules/performUQ/dakota/DakotaUQ.py b/modules/performUQ/dakota/DakotaUQ.py index b72f3e5ec..b6eaa0df0 100644 --- a/modules/performUQ/dakota/DakotaUQ.py +++ b/modules/performUQ/dakota/DakotaUQ.py @@ -145,11 +145,17 @@ def main(args): # noqa: C901, D103 dakotaErrFile = os.path.join(os.getcwd(), 'dakota.err') # noqa: PTH109, PTH118, N806 dakotaOutFile = os.path.join(os.getcwd(), 'dakota.out') # noqa: PTH109, PTH118, N806 dakotaTabFile = os.path.join(os.getcwd(), 'dakotaTab.out') # noqa: PTH109, PTH118, N806 - checkErrFile = os.path.getsize(dakotaErrFile) # noqa: PTH202, N806 + checkErrFile = os.path.exists(dakotaErrFile) # noqa: PTH110, N806 checkOutFile = os.path.exists(dakotaOutFile) # noqa: PTH110, N806 checkTabFile = os.path.exists(dakotaTabFile) # noqa: F841, N806, PTH110 + + checkErrSize=-1 + if checkErrFile>0: + checkErrSize = os.path.getsize(dakotaErrFile) # noqa: PTH202, N806 + if checkOutFile == False and checkErrFile == 0: # noqa: E712 + with open(dakotaErrFile, 'a') as file: # noqa: PTH123 file.write(result.decode('utf-8')) else: