diff --git a/modules/createEVENT/shakeMapEvent/shakeMapEvent.py b/modules/createEVENT/shakeMapEvent/shakeMapEvent.py index 7956743a4..d6f21e1b5 100644 --- a/modules/createEVENT/shakeMapEvent/shakeMapEvent.py +++ b/modules/createEVENT/shakeMapEvent/shakeMapEvent.py @@ -45,10 +45,9 @@ def create_shakemap_event(eventDirectory, eventPath, IMTypes): # noqa: D103 + IMTypesList = eval(IMTypes) - IMTypesList = eval(IMTypes) - - print("Creating shakemap event") + print('Creating shakemap event') xml_file_path = Path(eventDirectory) / eventPath / 'grid.xml' @@ -69,7 +68,7 @@ def create_shakemap_event(eventDirectory, eventPath, IMTypes): # noqa: D103 lon, lat = float(values[0]), float(values[1]) point = Point(lon, lat) points.append(point) - + # Store only the specified attributes attr = {} attribute_mapping = { @@ -78,37 +77,35 @@ def create_shakemap_event(eventDirectory, eventPath, IMTypes): # noqa: D103 'MMI': 4, 'PSA03': 5, 'PSA10': 6, - 'PSA30': 7 + 'PSA30': 7, } - + for im_type in IMTypesList: if im_type in attribute_mapping: attr[im_type] = float(values[attribute_mapping[im_type]]) - + attributes.append(attr) # Create GeoDataFrame - gdf = gpd.GeoDataFrame(attributes, geometry=points, crs="EPSG:4326") + gdf = gpd.GeoDataFrame(attributes, geometry=points, crs='EPSG:4326') # Display the first few rows - print("Saving shakemap to gpkg") + print('Saving shakemap to gpkg') # Save as a GeoPackage file gdf_path = Path(eventDirectory) / 'EventGrid.gpkg' - gdf.to_file(gdf_path, driver="GPKG") + gdf.to_file(gdf_path, driver='GPKG') return if __name__ == '__main__': - parser = argparse.ArgumentParser() parser.add_argument('--input', help='Input file') parser.add_argument('--Directory', help='Directory path') parser.add_argument('--EventPath', help='Event path') parser.add_argument('--IntensityMeasureType', help='types of intensity measures') - args = parser.parse_args() create_shakemap_event(args.Directory, args.EventPath, args.IntensityMeasureType) diff --git a/modules/performRegionalMapping/GISSpecifiedEvents/GISSpecifiedEvent.py b/modules/performRegionalMapping/GISSpecifiedEvents/GISSpecifiedEvent.py index 67aaef869..c23ad992d 100644 --- a/modules/performRegionalMapping/GISSpecifiedEvents/GISSpecifiedEvent.py +++ b/modules/performRegionalMapping/GISSpecifiedEvents/GISSpecifiedEvent.py @@ -43,21 +43,23 @@ from RasterEvent import create_event as create_raster_event + def is_raster_file(filename): # Define a set of common raster file extensions raster_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.tif'} - + # Create a Path object from the filename file_path = Path(filename) - + # Extract the file extension and check if it is in the set of raster extensions return file_path.suffix.lower() in raster_extensions + def is_xml_file(filename): # Check if the file has an .xml extension if not filename.lower().endswith('.xml'): return False - + # Try to parse the file as XML try: ET.parse(filename) @@ -65,17 +67,18 @@ def is_xml_file(filename): except ET.ParseError: return False -def create_event(asset_file : str, event_grid_file: str): # noqa: C901, N803, D103 +def create_event(asset_file: str, event_grid_file: str): # noqa: C901, N803, D103 if is_raster_file(event_grid_file): return create_raster_event(asset_file, event_grid_file) elif is_xml_file(event_grid_file): # Here you would call a function to handle XML files # For now, we'll just raise a NotImplementedError - raise NotImplementedError("XML file handling is not yet implemented.") + raise NotImplementedError('XML file handling is not yet implemented.') else: - raise ValueError(f"{event_grid_file} is not a raster. Only rasters are currently supported.") - + raise ValueError( + f'{event_grid_file} is not a raster. Only rasters are currently supported.' + ) if __name__ == '__main__': @@ -84,6 +87,4 @@ def create_event(asset_file : str, event_grid_file: str): # noqa: C901, N803, D parser.add_argument('--filenameEVENTgrid') args = parser.parse_args() - create_event( - args.assetFile, args.filenameEVENTgrid - ) + create_event(args.assetFile, args.filenameEVENTgrid) diff --git a/modules/performRegionalMapping/GISSpecifiedEvents/RasterEvent.py b/modules/performRegionalMapping/GISSpecifiedEvents/RasterEvent.py index a7dcddbc0..eb2ecad25 100644 --- a/modules/performRegionalMapping/GISSpecifiedEvents/RasterEvent.py +++ b/modules/performRegionalMapping/GISSpecifiedEvents/RasterEvent.py @@ -51,16 +51,15 @@ def sample_raster_at_latlon(src, lat, lon): # Ensure the indices are within the bounds of the raster if row < 0 or row >= src.height or col < 0 or col >= src.width: - raise IndexError("Transformed coordinates are out of raster bounds") + raise IndexError('Transformed coordinates are out of raster bounds') # Read the raster value at the given row and column raster_value = src.read(1)[row, col] return raster_value -def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 - +def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 # read the event grid data file event_grid_path = Path(event_grid_file).resolve() event_dir = event_grid_path.parent @@ -82,22 +81,21 @@ def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 asset_dict = json.load(f) data_final = [ - ['GP_file','Latitude','Longitude'], - ] + ['GP_file', 'Latitude', 'Longitude'], + ] # Iterate through each asset for asset in asset_dict: asset_id = asset['id'] asset_file_path = asset['file'] - - # Load the corresponding file for each asset - with open(asset_file_path, encoding='utf-8') as asset_file : + # Load the corresponding file for each asset + with open(asset_file_path, encoding='utf-8') as asset_file: # Load the asset data asset_data = json.load(asset_file) im_tag = asset_data['RegionalEvent']['intensityMeasures'][0] - + # Extract the latitude and longitude lat = float(asset_data['GeneralInformation']['location']['latitude']) lon = float(asset_data['GeneralInformation']['location']['longitude']) @@ -107,34 +105,33 @@ def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 # Check if the transformed coordinates are within the raster bounds bounds = src.bounds - if (bounds.left <= lon_transformed <= bounds.right and - bounds.bottom <= lat_transformed <= bounds.top): + if ( + bounds.left <= lon_transformed <= bounds.right + and bounds.bottom <= lat_transformed <= bounds.top + ): try: - val = sample_raster_at_latlon(src=src, - lat=lat_transformed, - lon=lon_transformed) - - data = [ - [im_tag], - [val] - ] - + val = sample_raster_at_latlon( + src=src, lat=lat_transformed, lon=lon_transformed + ) + + data = [[im_tag], [val]] + # Save the simcenter file name file_name = f'Site_{asset_id}.csvx{0}x{int(asset_id):05d}' - data_final.append([file_name,lat,lon]) + data_final.append([file_name, lat, lon]) csv_save_path = event_dir / f'Site_{asset_id}.csv' with open(csv_save_path, 'w', newline='') as file: # Create a CSV writer object writer = csv.writer(file) - + # Write the data to the CSV file writer.writerows(data) # prepare a dictionary of events event_list_json = [[file_name, 1.0]] - + asset_data['Events'] = [{}] asset_data['Events'][0] = { 'EventFolderPath': str(event_dir), @@ -145,12 +142,10 @@ def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 with open(asset_file_path, 'w', encoding='utf-8') as f: # noqa: PTH123 json.dump(asset_data, f, indent=2) - except IndexError as e: - print(f"Error for asset ID {asset_id}: {e}") + print(f'Error for asset ID {asset_id}: {e}') else: - print(f"Asset ID: {asset_id} is outside the raster bounds") - + print(f'Asset ID: {asset_id} is outside the raster bounds') # # save the event dictionary to the BIM # asset_data['Events'] = [{}] @@ -165,13 +160,12 @@ def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 # with open(asset_file, 'w', encoding='utf-8') as f: # noqa: PTH123 # json.dump(asset_data, f, indent=2) - # Save the final event grid csv_save_path = event_dir / 'EventGrid.csv' with open(csv_save_path, 'w', newline='') as file: # Create a CSV writer object writer = csv.writer(file) - + # Write the data to the CSV file writer.writerows(data_final) @@ -185,6 +179,4 @@ def create_event(asset_file, event_grid_file): # noqa: C901, N803, D103 parser.add_argument('--filenameEVENTgrid') args = parser.parse_args() - create_event( - args.assetFile, args.filenameEVENTgrid - ) + create_event(args.assetFile, args.filenameEVENTgrid) diff --git a/modules/performRegionalMapping/NearestNeighborEvents/NNE.py b/modules/performRegionalMapping/NearestNeighborEvents/NNE.py index bd4e953e5..2d2a0cb26 100644 --- a/modules/performRegionalMapping/NearestNeighborEvents/NNE.py +++ b/modules/performRegionalMapping/NearestNeighborEvents/NNE.py @@ -48,6 +48,7 @@ from sklearn.neighbors import NearestNeighbors import geopandas as gpd + def find_neighbors( # noqa: C901, D103 asset_file, event_grid_file, @@ -89,7 +90,7 @@ def find_neighbors( # noqa: C901, D103 if file_extension == '.csv': # Existing code for CSV files grid_df = pd.read_csv(event_dir / event_grid_file, header=0) - + # store the locations of the grid points in X lat_E = grid_df['Latitude'] # noqa: N806 lon_E = grid_df['Longitude'] # noqa: N806 @@ -103,15 +104,15 @@ def find_neighbors( # noqa: C901, D103 else: # Else assume GIS files - works will all gis files that geopandas supports gdf = gpd.read_file(event_dir / event_grid_file) - + # Ensure the GIS file is in a geographic coordinate system if not gdf.crs.is_geographic: gdf = gdf.to_crs(epsg=4326) # Convert to WGS84 - + # Extract coordinates from the geometry gdf['Longitude'] = gdf.geometry.x gdf['Latitude'] = gdf.geometry.y - + # store the locations of the grid points in X lat_E = gdf['Latitude'] # noqa: N806 lon_E = gdf['Longitude'] # noqa: N806 @@ -121,7 +122,7 @@ def find_neighbors( # noqa: C901, D103 grid_extra_keys = list( gdf.drop(['geometry', 'Longitude', 'Latitude'], axis=1).columns ) - + # Convert GeoDataFrame to regular DataFrame for consistency with the rest of the code grid_df = pd.DataFrame(gdf.drop(columns='geometry')) @@ -131,7 +132,9 @@ def find_neighbors( # noqa: C901, D103 else: neighbors_to_get = neighbors - nbrs = NearestNeighbors(n_neighbors=neighbors_to_get, algorithm='ball_tree').fit(X) + nbrs = NearestNeighbors(n_neighbors=neighbors_to_get, algorithm='ball_tree').fit( + X + ) # load the building data file with open(asset_file, encoding='utf-8') as f: # noqa: PTH123 @@ -307,34 +310,46 @@ def find_neighbors( # noqa: C901, D103 ] * e scale_list = np.ones(len(event_list)) - else : + else: event_list = [] scale_list = [] event_type = 'intensityMeasure' # Determine event_count (number of IMs per grid point) - im_columns = [col for col in grid_df.columns if col not in ['geometry', 'Longitude', 'Latitude']] + im_columns = [ + col + for col in grid_df.columns + if col not in ['geometry', 'Longitude', 'Latitude'] + ] event_count = len(im_columns) # for each neighbor for sample_j, nbr in enumerate(nbr_samples): - # make sure we resample events if samples > event_count event_j = sample_j % event_count # get the index of the nth neighbor nbr_index = ind_list[nbr] - + # For GIS files, create a new CSV file csv_filename = f'Site_{sample_j}.csv' csv_path = event_dir / csv_filename - + # Create a CSV file with data from the GIS file # Use actual data from the GIS file if available, otherwise use dummy data - im_columns = [col for col in grid_df.columns if col not in ['geometry', 'Longitude', 'Latitude']] - - im_data = pd.DataFrame({col: [grid_df.iloc[nbr_index][col]] * event_count for col in im_columns}) + im_columns = [ + col + for col in grid_df.columns + if col not in ['geometry', 'Longitude', 'Latitude'] + ] + + im_data = pd.DataFrame( + { + col: [grid_df.iloc[nbr_index][col]] * event_count + for col in im_columns + } + ) im_data.to_csv(csv_path, index=False) # save the collection file name and the IM row id diff --git a/modules/performUQ/common/__init__.py b/modules/performUQ/common/__init__.py new file mode 100644 index 000000000..6e031999e --- /dev/null +++ b/modules/performUQ/common/__init__.py @@ -0,0 +1 @@ +# noqa: D104 diff --git a/modules/performUQ/common/adaptive_doe.py b/modules/performUQ/common/adaptive_doe.py new file mode 100644 index 000000000..3a636386d --- /dev/null +++ b/modules/performUQ/common/adaptive_doe.py @@ -0,0 +1,158 @@ +"""Implements an adaptive design of experiments strategy using Gaussian Process (GP) and Principal Component Analysis (PCA).""" + +import numpy as np +from space_filling_doe import LatinHypercubeSampling + + +class AdaptiveDesignOfExperiments: + """ + Adaptive Design of Experiments (DoE) using Gaussian Process (GP) and Principal Component Analysis (PCA). + + This class implements an adaptive design of experiments strategy to select new training points + based on the Integrated Mean Squared Error (IMSE) criterion. + + Attributes + ---------- + gp_model (list): List of Gaussian Process models. + pca (PCA): Principal Component Analysis object. + domain (array-like): The domain of the input space. + + Methods + ------- + _lengthscale_for_doe(): + Compute the lengthscale for the design of experiments. + + _kernel_for_doe(): + Create a kernel for the design of experiments. + + _gp_for_doe(): + Create a Gaussian Process model for the design of experiments. + + _imse_w_approximation(X_train, mci_samples, candidate_training_points): + Compute the IMSE approximation for candidate training points. + + select_training_points(X_train, n_points, mci_samples, n_candidates): + Select new training points based on the IMSE criterion. + """ + + def __init__(self, gp_model, pca, domain): + """ + Initialize the AdaptiveDesignOfExperiments class. + + Parameters + ---------- + gp_model (list): List of Gaussian Process models. + pca (PCA): Principal Component Analysis object. + domain (array-like): The domain of the input space. + """ + self.gp_model = gp_model + self.pca = pca + self.domain = domain + + self._lengthscale_for_doe() + self._kernel_for_doe() + self._gp_for_doe() + + def _lengthscale_for_doe(self): + """ + Compute the lengthscale for the design of experiments. + + The lengthscale is computed as a weighted sum of the lengthscales of the individual GP models, + where the weights are the explained variances of the PCA components. + + Returns + ------- + float: The computed lengthscale. + """ + eigenvalues = self.pca.explained_variance_ + w = eigenvalues / np.sum(eigenvalues) + lengthscales = np.atleast_2d([m.kernel.lengthscale for m in self.gp_model]) + self.lengthscale_star = np.sum(w * lengthscales) + return self.lengthscale_star + + def _kernel_for_doe(self): + """ + Create a kernel for the design of experiments. + + The kernel is a copy of the kernel of the first GP model, with the lengthscale set to the computed lengthscale. + + Returns + ------- + Kernel: The created kernel. + """ + self.kernel = self.gp_model.kernel.copy() + self.kernel.lengthscale = self.lengthscale_star + return self.kernel + + def _gp_for_doe(self): + """ + Create a Gaussian Process model for the design of experiments. + + The GP model is a copy of the first GP model, with the kernel set to the created kernel. + + Returns + ------- + GaussianProcessRegressor: The created GP model. + """ + self.gp_model_for_doe = self.gp_model[0].copy() + self.gp_model_for_doe.kernel = self.kernel + return self.gp_model_for_doe + + def _imse_w_approximation(self, x_train, mci_samples, candidate_training_points): + """ + Compute the IMSE approximation for candidate training points. + + Parameters + ---------- + X_train (array-like): The current training data. + mci_samples (array-like): Monte Carlo integration samples. + candidate_training_points (array-like): Candidate training points. + + Returns + ------- + array: The IMSE values for the candidate training points. + """ + self.gp_model_for_doe.set_XY( + x_train, + np.zeros((x_train.shape[0], 1)), + ) + _, pred_var = self.gp_model_for_doe.predict(mci_samples) + n_theta = x_train.shape[1] + beta = 2.0 * n_theta + imse = np.zeros((candidate_training_points.shape[0], 1)) + for i, candidate in enumerate(candidate_training_points): + correlation_vector = self.gp_model_for_doe.kern.K(mci_samples, candidate) + imse[i] = ( + 1 + / (mci_samples.shape[0]) + * np.dot(correlation_vector**beta, pred_var) + ) + return imse + + def select_training_points(self, x_train, n_points, mci_samples, n_candidates): + """ + Select new training points based on the IMSE criterion. + + Parameters + ---------- + X_train (array-like): The current training data. + n_points (int): The number of new training points to select. + mci_samples (array-like): Monte Carlo integration samples. + n_candidates (int): The number of candidate training points to generate. + + Returns + ------- + array: The selected new training points. + """ + dimension = x_train.shape[1] + for _ in range(n_points): + lhs = LatinHypercubeSampling( + n_samples=n_candidates, n_dimensions=dimension + ) + candidate_training_points = lhs.generate(self.domain) + imse = self._imse_w_approximation( + x_train, mci_samples, candidate_training_points + ) + next_training_point = candidate_training_points[np.argmax(imse)] + x_train = np.vstack((x_train, next_training_point)) + return x_train[-n_points:, :] diff --git a/modules/performUQ/common/common_datamodels.py b/modules/performUQ/common/common_datamodels.py new file mode 100644 index 000000000..f0afa9ad5 --- /dev/null +++ b/modules/performUQ/common/common_datamodels.py @@ -0,0 +1,194 @@ +""" +Defines data models for the performUQ application using Pydantic. + +Classes: + EDPItem: Represents an Engineering Demand Parameter item. + CorrelationValue: Represents a correlation value. + ApplicationData: Represents application data. + FEM_App: Represents a FEM application. + UQ_App: Represents a UQ application. + Applications: Represents a collection of applications. + GP_AB_UQData: Represents GP_AB_UQ data. + Model: Represents the main model. +""" + +from __future__ import annotations + +import sys +from typing import TYPE_CHECKING, Any, Literal, Optional + +if TYPE_CHECKING: + from pathlib import Path + +from pydantic import BaseModel, Field, validator + +sys.path.append( + '/Users/aakash/SimCenter/SimCenterBackendApplications/modules/performUQ/common' +) + + +class EDPItem(BaseModel): + """ + Represents an EDP (Engineering Demand Parameter) item. + + Attributes + ---------- + name (str): The name of the EDP item. + type (Literal["scalar", "field"]): The type of the EDP item, either "scalar" or "field". + length (int): The length of the EDP item, must be greater than 0. + """ + + name: str + type: Literal['scalar', 'field'] + length: int = Field(gt=0) + + +class CorrelationValue(BaseModel): + """ + Represents a correlation value. + + Attributes + ---------- + values (float): The correlation value, must be between -1 and 1. + """ + + values: float = Field(..., ge=-1, le=1) + + +class ApplicationData(BaseModel): + """ + Represents application data. + + Attributes + ---------- + MS_Path (Path): The path to the MS. + mainScript (str): The main script name. + postprocessScript (str): The post-process script name. + """ + + MS_Path: Path + mainScript: str # noqa: N815 + postprocessScript: str # noqa: N815 + + +class FEM_App(BaseModel): + """ + Represents a FEM (Finite Element Method) application. + + Attributes + ---------- + Application (str): The name of the application. + ApplicationData (ApplicationData): The application data. + """ + + Application: str + ApplicationData: ApplicationData + + +class UQ_App(BaseModel): + """ + Represents a UQ (Uncertainty Quantification) application. + + Attributes + ---------- + Application (str): The name of the application. + ApplicationData (dict[str, Any]): The application data. + """ + + Application: str + ApplicationData: dict[str, Any] + + +class Applications(BaseModel): + """ + Represents a collection of applications. + + Attributes + ---------- + FEM (FEM_App): The FEM application. + UQ (UQ_App): The UQ application. + """ + + FEM: FEM_App + UQ: UQ_App + + +class GP_AB_UQData(BaseModel): + """ + Represents GP_AB_UQ data. + + Attributes + ---------- + calibration_data_file_name (str): The name of the calibration data file. + calibration_data_path (Path): The path to the calibration data. + log_likelihood_file_name (Optional[str]): The name of the log likelihood file. + log_likelihood_path (Optional[Path]): The path to the log likelihood file. + sample_size (int): The sample size. + seed (int): The seed value. + """ + + calibration_data_file_name: str + calibration_data_path: Path + log_likelihood_file_name: str | None + log_likelihood_path: Path | None + sample_size: int + seed: int + + +class Model(BaseModel): + """ + Represents the main model. + + Attributes + ---------- + Applications (Applications): The applications. + EDP (List[EDPItem]): The list of EDP items. + FEM (Dict[str, str]): The FEM data. + UQ (GP_AB_UQData): The UQ data. + WorkflowType (str): The workflow type. + correlationMatrix (List[float]): The correlation matrix. + localAppDir (str): The local application directory. + randomVariables (list[dict[str, Any]]): The list of random variables. + remoteAppDir (str): The remote application directory. + resultType (str): The result type. + runDir (str): The run directory. + runType (str): The run type. + summary (List[str]): The summary. + workingDir (str): The working directory. + """ + + Applications: Applications + EDP: list[EDPItem] + FEM: dict[str, str] # Empty dict + UQ: GP_AB_UQData + WorkflowType: str + correlationMatrix: list[float] # noqa: N815 + localAppDir: str # noqa: N815 + randomVariables: list[dict[str, Any]] # noqa: N815 + remoteAppDir: str # noqa: N815 + resultType: str # noqa: N815 + runDir: str # noqa: N815 + runType: str # noqa: N815 + summary: list[str] + workingDir: str # noqa: N815 + + @validator('correlationMatrix', each_item=True) + def check_correlation_matrix(cls, v): # noqa: N805 + """ + Validate each item in the correlation matrix. + + Args: + v (float): The correlation value. + + Raises + ------ + ValueError: If the correlation value is not between -1 and 1. + + Returns + ------- + float: The validated correlation value. + """ + if not (-1 <= v <= 1): + error_message = 'Each correlation value must be between -1 and 1.' + raise ValueError(error_message) + return v diff --git a/modules/performUQ/common/convergence_metrics.py b/modules/performUQ/common/convergence_metrics.py new file mode 100644 index 000000000..ad3f06bb3 --- /dev/null +++ b/modules/performUQ/common/convergence_metrics.py @@ -0,0 +1,197 @@ +""" +Functions to calculate various convergence metrics. + +These include generalized KL divergence (GKL), generalized MAP (GMAP), and +Leave-One-Out Normalized Root Mean Square Error (LOO NRMSE). +""" + +import numpy as np +from scipy.optimize import minimize +from scipy.special import logsumexp + + +def _calculate_normalization_constants( + current_function_values, previous_function_values +): + """ + Calculate normalization constants for the current and previous function values. + + Args: + current_function_values (np.ndarray): The current function values. + previous_function_values (np.ndarray): The previous function values. + + Returns + ------- + tuple: A tuple containing the optimized alpha_1 and alpha_2 values. + """ + + def objective_function(log_alphas): + """ + Objective function to minimize the difference from 1 for c1 and c2. + + Args: + log_alphas (list): List containing log(alpha_1) and log(alpha_2). + + Returns + ------- + float: The value of the objective function. + """ + log_alpha_1, log_alpha_2 = log_alphas + + numerator_current = current_function_values - log_alpha_1 + numerator_previous = previous_function_values - log_alpha_2 + + log_normalized_proposal = np.log(0.5) + np.logaddexp( + numerator_current, numerator_previous + ) + + c1 = ( + 1 + / len(numerator_current) + * np.exp(logsumexp(numerator_current - log_normalized_proposal)) + ) + c2 = ( + 1 + / len(numerator_previous) + * np.exp(logsumexp(numerator_previous - log_normalized_proposal)) + ) + + return (c1 - 1) ** 2 + (c2 - 1) ** 2 + + # Initial guesses for log(alpha_1) and log(alpha_2) + initial_log_alpha_1 = logsumexp(current_function_values) + initial_log_alpha_2 = logsumexp(previous_function_values) + initial_guess = [initial_log_alpha_1, initial_log_alpha_2] + + # Perform the optimization + result = minimize(objective_function, initial_guess, method='BFGS') + + # Extract optimized alpha_1 and alpha_2 + optimized_log_alpha_1, optimized_log_alpha_2 = result.x + alpha_1_optimized = np.exp(optimized_log_alpha_1) + alpha_2_optimized = np.exp(optimized_log_alpha_2) + + return (alpha_1_optimized, alpha_2_optimized) + + +def _calculate_kl_divergence( + current_log_target_function, previous_log_target_function, samples +): + """ + Calculate the KL divergence between the current and previous log target functions. + + Args: + current_log_target_function (callable): The current log target function. + previous_log_target_function (callable): The previous log target function. + samples (np.ndarray): The samples to evaluate the functions. + + Returns + ------- + float: The KL divergence estimate. + """ + current_function_values = current_log_target_function(samples) + previous_function_values = previous_log_target_function(samples) + alpha_1, alpha_2 = _calculate_normalization_constants( + current_function_values, previous_function_values + ) + kl_divergence_estimate = ( + 1 + / len(samples) + * np.sum( + np.log( + current_function_values + / previous_function_values + * alpha_2 + / alpha_1 + ) + * current_function_values + / ( + 1 + / 2 + * ( + current_function_values + + alpha_1 / alpha_2 * previous_function_values + ) + ) + ) + ) + + return kl_divergence_estimate # noqa: RET504 + + +def calculate_gkl( + current_log_target_function, previous_log_target_function, samples +): + """ + Calculate the generalized KL divergence (GKL). + + Args: + current_log_target_function (callable): The current log target function. + previous_log_target_function (callable): The previous log target function. + samples (np.ndarray): The samples to evaluate the functions. + + Returns + ------- + float: The GKL value. + """ + kl_divergence_estimate = _calculate_kl_divergence( + current_log_target_function, previous_log_target_function, samples + ) + n_theta = np.shape(samples)[1] + gkl = 1 / n_theta * kl_divergence_estimate + return gkl # noqa: RET504 + + +def calculate_gmap( + current_log_target_function, + previous_log_target_function, + samples, + prior_variances, +): + """ + Calculate the generalized MAP (GMAP). + + Args: + current_log_target_function (callable): The current log target function. + previous_log_target_function (callable): The previous log target function. + samples (np.ndarray): The samples to evaluate the functions. + prior_variances (np.ndarray): The prior variances. + + Returns + ------- + float: The GMAP value. + """ + current_function_values = current_log_target_function(samples) + previous_function_values = previous_log_target_function(samples) + current_map = np.argmax(current_function_values) + previous_map = np.argmax(previous_function_values) + + gmap = np.sqrt(np.sum((current_map - previous_map) ** 2 / prior_variances)) + return gmap # noqa: RET504 + + +def calculate_loo_nrmse_w( + loo_predictions, + gp_surrogate_model_prediction, + weights=None, +): + """ + Calculate the Leave-One-Out Normalized Root Mean Square Error (LOO NRMSE) with weights. + + Args: + loo_predictions (np.ndarray): The LOO predictions. + gp_surrogate_model_prediction (np.ndarray): The GP surrogate model predictions. + weights (np.ndarray, optional): The weights for the predictions. Defaults to None. + + Returns + ------- + np.ndarray: The LOO NRMSE values. + """ + if weights is None: + weights = np.ones_like(loo_predictions) + normalized_weights = weights / np.sum(weights) + g_i = np.linalg.norm( + loo_predictions - gp_surrogate_model_prediction, axis=1, keepdims=True + ) / np.linalg.norm(gp_surrogate_model_prediction, axis=1, keepdims=True) + g_cv = normalized_weights * g_i / len(loo_predictions) + return g_cv # noqa: RET504 diff --git a/modules/performUQ/common/gp_ab_algorithm.py b/modules/performUQ/common/gp_ab_algorithm.py new file mode 100644 index 000000000..801a7e721 --- /dev/null +++ b/modules/performUQ/common/gp_ab_algorithm.py @@ -0,0 +1,766 @@ +""" +Module implementing the GP-AB Algorithm for Bayesian calibration. + +It includes classes and functions for performing Gaussian Process modeling, +Principal Component Analysis, and various convergence metrics. +""" + +import importlib +import json +import os +import sys +import time +from functools import partial +from pathlib import Path +from typing import Literal + +import common_datamodels +import convergence_metrics +import numpy as np +import pydantic + +# sys.path.append( +# "/Users/aakash/SimCenter/SimCenterBackendApplications/modules/performUQ/common" +# ) +import uq_utilities +from adaptive_doe import AdaptiveDesignOfExperiments +from gp_model import GaussianProcessModel +from principal_component_analysis import PrincipalComponentAnalysis +from scipy.stats import invgamma, norm +from space_filling_doe import LatinHypercubeSampling +from tmcmc import TMCMC, calculate_warm_start_stage + + +def log_likelihood(prediction_error_vector, prediction_error_variance): + """ + Calculate the log-likelihood of the prediction errors given the variance. + + Args: + prediction_error_vector (np.ndarray): The vector of prediction errors. + prediction_error_variance (float): The variance of the prediction errors. + + Returns + ------- + float: The log-likelihood value. + """ + return np.sum( + norm.logpdf(prediction_error_vector, 0, np.sqrt(prediction_error_variance)) + ) + + +def _response_approximation(current_gp, current_pca, model_parameters): + """ + Approximate the response using the current GP model and PCA. + + Args: + current_gp (GaussianProcessModel): The current Gaussian Process model. + current_pca (PrincipalComponentAnalysis): The current PCA model. + model_parameters (np.ndarray): The model parameters. + + Returns + ------- + np.ndarray: The approximated response. + """ + latent_predictions, _ = current_gp.predict(model_parameters) + gp_prediction = current_pca.project_back_to_original_space(latent_predictions) + return gp_prediction # noqa: RET504 + + +class GP_AB_Algorithm: + """ + A class to represent the GP-AB Algorithm for Bayesian calibration. + + Attributes + ---------- + data (np.ndarray): The observed data. + input_dimension (int): The input dimension of the model. + output_dimension (int): The output dimension of the model. + output_length_list (list[int]): The list of output lengths. + domain (list[tuple[float, float]]): The domain for each dimension. + prior_variances (np.ndarray): The prior variances. + prior_pdf_function (callable): The prior PDF function. + log_likelihood_function (callable): The log-likelihood function. + pca_threshold (float): The threshold for PCA. + gkl_threshold (float): The threshold for GKL. + gmap_threshold (float): The threshold for GMAP. + max_simulations (int): The maximum number of simulations. + max_computational_time (float): The maximum computational time. + start_time (float): The start time of the algorithm. + converged (bool): Whether the algorithm has converged. + budget_exceeded (bool): Whether the budget has been exceeded. + terminate (bool): Whether to terminate the algorithm. + num_experiments (list[int]): The number of experiments. + num_recalibration_experiments (int): The number of recalibration experiments. + recalibration_ratio (float): The recalibration ratio. + sample_transformation_function (callable): The sample transformation function. + model_evaluation_function (callable): The model evaluation function. + run_type (str): The run type (e.g., "runningLocal"). + parallel_pool (uq_utilities.ParallelPool): The parallel pool instance. + parallel_evaluation_function (callable): The parallel evaluation function. + loocv_threshold (float): The threshold for LOOCV. + results (dict): The results dictionary. + current_gp_model (GaussianProcessModel): The current GP model. + current_pca (PrincipalComponentAnalysis): The current PCA model. + samples_dict (dict): The dictionary of samples. + betas_dict (dict): The dictionary of betas. + log_likelihoods_dict (dict): The dictionary of log-likelihoods. + log_target_density_values_dict (dict): The dictionary of log-target density values. + log_evidence_dict (dict): The dictionary of log evidence. + """ + + def __init__( # noqa: PLR0913 + self, + data, + output_length_list, + output_names_list, # noqa: ARG002 + input_dimension, + output_dimension, + domain, + model_evaluation_function, + sample_transformation_function, + prior_pdf_function, + log_likelihood_function, + prior_variances, + max_simulations=np.inf, + max_computational_time=np.inf, + pca_threshold=0.999, + run_type='runningLocal', + loocv_threshold=0.2, + recalibration_ratio=0.1, + gkl_threshold=0.01, + gmap_threshold=0.01, + ): + """ + Initialize the GP_AB_Algorithm class. + + Args: + data (np.ndarray): The observed data. + output_length_list (list[int]): The list of output lengths. + output_names_list (list[str]): The list of output names. + input_dimension (int): The input dimension of the model. + output_dimension (int): The output dimension of the model. + domain (list[tuple[float, float]]): The domain for each dimension. + model_evaluation_function (callable): The model evaluation function. + sample_transformation_function (callable): The sample transformation function. + prior_pdf_function (callable): The prior PDF function. + log_likelihood_function (callable): The log-likelihood function. + prior_variances (np.ndarray): The prior variances. + max_simulations (int, optional): The maximum number of simulations. Defaults to np.inf. + max_computational_time (float, optional): The maximum computational time. Defaults to np.inf. + pca_threshold (float, optional): The threshold for PCA. Defaults to 0.999. + run_type (str, optional): The run type (e.g., "runningLocal"). Defaults to "runningLocal". + loocv_threshold (float, optional): The threshold for LOOCV. Defaults to 0.2. + recalibration_ratio (float, optional): The recalibration ratio. Defaults to 0.1. + gkl_threshold (float, optional): The threshold for GKL. Defaults to 0.01. + gmap_threshold (float, optional): The threshold for GMAP. Defaults to 0.01. + """ + self.data = data + self.input_dimension = input_dimension + self.output_dimension = output_dimension + self.output_length_list = output_length_list + self.domain = domain + self.prior_variances = prior_variances + + self.inputs = None + self.outputs = None + self.latent_outputs = None + + self.prior_pdf_function = prior_pdf_function + self.log_likelihood_function = log_likelihood_function + + self.pca_threshold = pca_threshold + self.num_pca_components_list = [] + + self.gkl_threshold = gkl_threshold + self.gmap_threshold = gmap_threshold + + self.max_simulations = max_simulations + self.max_computational_time = max_computational_time + self.start_time = time.time() + + self.converged = False + self.budget_exceeded = False + self.terminate = False + + self.num_experiments = [0] + self.num_recalibration_experiments = 0 + self.recalibration_ratio = recalibration_ratio + + self.sample_transformation_function = sample_transformation_function + self.model_evaluation_function = model_evaluation_function + self.run_type = run_type + self.parallel_pool = uq_utilities.get_parallel_pool_instance(run_type) + self.parallel_evaluation_function = self.parallel_pool.pool.starmap + + self.loocv_threshold = loocv_threshold + + self.results = {} + self.current_gp_model = GaussianProcessModel( + self.input_dimension, 1, ARD=True + ) + self.current_pca = PrincipalComponentAnalysis(self.pca_threshold) + + self.samples_dict = {} + self.betas_dict = {} + self.log_likelihoods_dict = {} + self.log_target_density_values_dict = {} + self.log_evidence_dict = {} + + def _evaluate_in_parallel(self, func, samples): + """ + Evaluate the model in parallel using the provided function and samples. + + Args: + func (callable): The function to evaluate. + samples (np.ndarray): The samples to evaluate. + + Returns + ------- + np.ndarray: The evaluated outputs. + """ + transformed_samples = self.sample_transformation_function(samples) + simulation_numbers = np.arange(len(samples)) + iterable = zip(simulation_numbers, transformed_samples) + outputs = np.atleast_2d( + list(self.parallel_evaluation_function(func, iterable)) + ) + return outputs # noqa: RET504 + + def _perform_initial_doe(self, n_samples): + """ + Perform the initial Design of Experiments (DoE) using Latin Hypercube Sampling. + + Args: + n_samples (int): The number of samples to generate. + + Returns + ------- + np.ndarray: The generated samples. + """ + self.initial_doe = LatinHypercubeSampling( + n_samples=n_samples, n_dimensions=self.input_dimension + ) + samples = self.initial_doe.generate(self.domain) + return samples # noqa: RET504 + + def _get_initial_training_set(self, n_samples): + """ + Get the initial training set by performing DoE and evaluating the model. + + Args: + n_samples (int): The number of samples to generate. + + Returns + ------- + tuple: A tuple containing the inputs and outputs of the initial training set. + """ + inputs = self._perform_initial_doe(n_samples) + outputs = self._evaluate_in_parallel(self.model_evaluation_function, inputs) + return inputs, outputs + + def _log_likelihood_approximation(self, response_approximation, samples): + """ + Approximate the log-likelihood for the given samples. + + Args: + response_approximation (callable): The response approximation function. + samples (np.ndarray): The samples to evaluate. + + Returns + ------- + np.ndarray: The approximated log-likelihood values. + """ + u_values = samples[:, : self.input_dimension] + model_parameters = self.sample_transformation_function(u_values).reshape( + 1, -1 + ) + predictions = response_approximation(model_parameters) + prediction_errors = self.data - predictions + + q = samples[:, self.input_dimension :] + + log_likes = [] + num_samples = samples.shape[0] + for i in range(num_samples): + ll = [] + start = 0 + for j in range(q.shape[1]): + ll.append( + self.log_likelihood_function( + prediction_errors[ + i, start : start + self.output_length_list[j] + ], + q[i, j], + ) + ) + start += self.output_length_list[j] + log_likes.append(np.sum(ll)) + return np.array(log_likes).reshape((num_samples, 1)) + + def _log_prior_pdf(self, samples): + """ + Calculate the log-prior PDF for the given samples. + + Args: + samples (np.ndarray): The samples to evaluate. + + Returns + ------- + np.ndarray: The log-prior PDF values. + """ + u_values = samples[:, : self.input_dimension] + model_parameters = self.sample_transformation_function(u_values) + log_prior_model_parameters = np.log( + self.prior_pdf_function(model_parameters) + ).reshape((-1, 1)) + q = samples[:, self.input_dimension :] + log_prior_q = np.sum(invgamma.logpdf(q, 1, scale=0.5), axis=1) + prior_log_pdf = log_prior_model_parameters + log_prior_q + return prior_log_pdf # noqa: RET504 + + def _log_posterior_approximation(self, samples, log_likelihoods): + """ + Approximate the log-posterior for the given samples and log-likelihoods. + + Args: + samples (np.ndarray): The samples to evaluate. + log_likelihoods (np.ndarray): The log-likelihood values. + + Returns + ------- + np.ndarray: The approximated log-posterior values. + """ + log_prior = self._log_prior_pdf(samples) + log_posterior = log_likelihoods + log_prior + return log_posterior # noqa: RET504 + + def loocv_measure(self, log_likelihood_approximation): + """ + Calculate the Leave-One-Out Cross-Validation (LOOCV) measure. + + Args: + log_likelihood_approximation (callable): The log-likelihood approximation function. + + Returns + ------- + float: The LOOCV measure. + """ + lls = log_likelihood_approximation(self.inputs) + loo_predictions = self.current_gp_model.loo_predictions(self.outputs) + loocv_measure = convergence_metrics.calculate_loo_nrmse_w( + loo_predictions, lls + ) + return loocv_measure # noqa: RET504 + + def calibrate_gp(self, model_parameters, model_outputs): + """ + Calibrate the Gaussian Process (GP) model using the given parameters and outputs. + + Args: + model_parameters (np.ndarray): The model parameters. + model_outputs (np.ndarray): The model outputs. + """ + latent_outputs = self.current_pca.project_to_latent_space(model_outputs) + self.num_pca_components_list.append(np.shape(latent_outputs)[1]) + self.current_gp_model = GaussianProcessModel( + self.input_dimension, self.num_pca_components_list[-1], ARD=True + ) + self.current_gp_model.fit(model_parameters, latent_outputs) + self.num_recalibration_experiments = self.num_experiments[-1] + + def run_iteration(self, k): + """ + Run a single iteration of the GP-AB Algorithm. + + Args: + k (int): The iteration number. + + Returns + ------- + tuple: A tuple containing a boolean indicating whether to terminate and the current TMCMC results. + """ + self.iteration_number = k + model_parameters = self.inputs + model_outputs = self.outputs + + # Step 1.1: GP calibration + if ( + self.num_experiments[-1] - self.num_recalibration_experiments + >= self.recalibration_ratio * self.num_recalibration_experiments + ): + self.previous_gp_model = self.current_gp_model + self.latent_outputs = self.current_pca.project_to_latent_space( + model_outputs + ) + self.num_pca_components_list.append(np.shape(self.latent_outputs)[1]) + self.current_gp_model = GaussianProcessModel( + self.input_dimension, self.num_pca_components_list[-1], ARD=True + ) + self.current_gp_model.fit(model_parameters, self.latent_outputs) + self.num_recalibration_experiments = self.num_experiments[-1] + else: + self.current_gp_model = self.previous_gp_model + self.latent_outputs = self.current_pca.project_to_latent_space( + model_outputs + ) + self.num_pca_components_list.append(np.shape(self.latent_outputs)[1]) + + # Step 1.2: GP predictive model + gp_prediction_latent_mean, gp_prediction_latent_variance = ( + self.current_gp_model.predict(model_parameters) + ) + gp_prediction_mean = self.current_pca.project_back_to_original_space( # noqa: F841 + gp_prediction_latent_mean + ) + loo_predictions_latent_space = self.current_gp_model.loo_predictions( + self.latent_outputs + ) + loo_predictions = self.current_pca.project_back_to_original_space( # noqa: F841 + loo_predictions_latent_space + ) + + # Step 1.3: Posterior distribution approximation + response_approximation = partial( + _response_approximation, self.current_gp_model, self.current_pca + ) + log_likelihood_approximation = partial( + self._log_likelihood_approximation, response_approximation + ) + log_posterior_approximation = self._log_posterior_approximation + + # Step 2.1: Evaluate warm-starting for TMCMC + tmcmc = TMCMC(log_likelihood_approximation, log_posterior_approximation) + + j_star = 0 + beta = 0 + + samples_dict = {} + betas_dict = {} + log_likelihoods_dict = {} + log_target_density_values_dict = {} + log_evidence_dict = {} + + num_samples_per_stage = 50 + if k > 0: + # loocv_measure = self.loocv_measure() + # if loocv_measure < self.loocv_threshold: + # j_star = self._calculate_warm_start_stage( + # log_likelihood_approximation, self.betas_dict + # ) + j_star = calculate_warm_start_stage( + log_likelihood_approximation, + current_tmcmc_results, # noqa: F821 + ) + previous_log_posterior_approximation = log_posterior_approximation + + # Step 2.2: Sequential MC sampling + if j_star == 0: + samples = self._perform_initial_doe(num_samples_per_stage) + log_target_density_values = np.log( + self.prior_pdf_function(samples) + ).reshape((-1, 1)) + log_likelihood_values = np.ones_like(log_target_density_values) + log_evidence = 0 + beta = 0 + + samples_dict[j_star] = samples + betas_dict[j_star] = beta + log_likelihoods_dict[j_star] = log_likelihood_values + log_target_density_values_dict[j_star] = log_target_density_values + log_evidence_dict[j_star] = log_evidence + else: + for j in range(j_star): + samples_dict[j] = self.samples_dict[j] + betas_dict[j] = self.betas_dict[j] + log_likelihoods_dict[j] = self.log_likelihoods_dict[j] + log_target_density_values_dict[j] = ( + self.log_target_density_values_dict[j] + ) + log_evidence_dict[j] = self.log_evidence_dict[j] + + current_tmcmc_results = tmcmc.run( + samples_dict, + betas_dict, + log_likelihoods_dict, + log_target_density_values_dict, + log_evidence_dict, + np.random.default_rng(), + j_star, + num_burn_in=0, + ) + + # Step 3.1: Assess convergence + gkl = convergence_metrics.calculate_gkl( + log_posterior_approximation, + previous_log_posterior_approximation, + samples, + ) + gmap = convergence_metrics.calculate_gmap( + log_posterior_approximation, + previous_log_posterior_approximation, + samples, + self.prior_variances, + ) + self.converged = gkl < self.gkl_threshold and gmap < self.gmap_threshold + + # Step 3.2: Computational budget related termination + self.budget_exceeded = (len(self.inputs) >= self.max_simulations) or ( + time.time() - self.start_time >= self.max_computational_time + ) + self.terminate = self.converged or self.budget_exceeded + if self.terminate: + return self.terminate, current_tmcmc_results + + # Step 4.1: Select variance for DoE + n_training_points = 2 * self.input_dimension + self.exploitation_proportion = 0.5 + n_exploit = np.ceil(self.exploitation_proportion * n_training_points) + current_doe = AdaptiveDesignOfExperiments( + self.current_gp_model, self.current_pca, self.domain + ) + + # Step 4.2: Exploitation DoE + exploitation_training_points = current_doe.select_training_points( + self.inputs, n_exploit, samples, 1000 + ) + self.inputs = np.vstack([self.inputs, exploitation_training_points]) + + # Step 4.3: Exploration DoE + exploration_training_points = current_doe.select_training_points( + self.inputs, n_training_points - n_exploit, samples, 1000 + ) + self.inputs = np.vstack([self.inputs, exploration_training_points]) + + new_training_points = np.vstack( + [exploitation_training_points, exploration_training_points] + ) + self.num_experiments.append(len(self.inputs)) + + # Step 5: Response estimation + new_training_outputs = self._evaluate_in_parallel( + self.model_evaluation_function, new_training_points + ) + self.outputs = np.vstack([self.outputs, new_training_outputs]) + + return self.terminate, current_tmcmc_results + + def run_initial_doe(self, num_initial_doe_per_dim=2): + """ + Run the initial Design of Experiments (DoE). + + Args: + num_initial_doe_per_dim (int, optional): Number of initial DoE samples per dimension. Defaults to 2. + + Returns + ------- + tuple: A tuple containing the inputs, outputs, and number of initial DoE samples. + """ + num_initial_doe_samples = num_initial_doe_per_dim * self.input_dimension + inputs, outputs = self._get_initial_training_set(num_initial_doe_samples) + return inputs, outputs, num_initial_doe_samples + + def write_results(self): + """Write the results of the GP-AB Algorithm to a file.""" + # print(f'{self.iteration_number = }') + # print('Results written to file') + + +def main(input_arguments): + """ + Run the GP-AB Algorithm. + + Args: + input_arguments (InputArguments): The input arguments for the algorithm. + """ + gp_ab = GP_AB_Algorithm(*preprocess(input_arguments)) + inputs, outputs, num_initial_doe_samples = gp_ab.run_initial_doe( + num_initial_doe_per_dim=2 + ) + gp_ab.inputs = inputs + gp_ab.outputs = outputs + gp_ab.num_experiments.append(num_initial_doe_samples) + iteration = 0 + while not gp_ab.terminate: + gp_ab.terminate, gp_ab.results = gp_ab.run_iteration(iteration) + gp_ab.write_results() + iteration += 1 + + +def read_inputs(input_json_file): + """ + Read and parse the input JSON file. + + Args: + input_json_file (str): Path to the input JSON file. + + Returns + ------- + tuple: A tuple containing UQ inputs, random variables, correlation matrix, EDP inputs, and application inputs. + """ + with Path(input_json_file).open(encoding='utf-8') as f: + inputs = json.load(f) + + # application_inputs = inputs["Applications"] + # uq_inputs = inputs["UQ"] + # rv_inputs = inputs["randomVariables"] + # correlation_matrix_inputs = inputs["correlationMatrix"] + # edp_inputs = inputs["EDP"] + + input_data = common_datamodels.Model.model_validate(inputs) + uq_inputs = input_data.UQ + rv_inputs = input_data.randomVariables + correlation_matrix_inputs = input_data.correlationMatrix + edp_inputs = inputs['EDP'] + application_inputs = input_data.Applications + + # application_inputs = common_datamodels.Applications.model_validate( + # inputs["Applications"] + # ) + # uq_inputs = GP_AB_UQData.model_validate(inputs["UQ"]) + # rv_inputs = inputs["randomVariables"] + # correlation_matrix_inputs = common_datamodels.CorrelationMatrix.from_list( + # inputs["correlationMatrix"] + # ) + # edp_inputs = common_datamodels.EDP(inputs["EDP"]) + + return ( + uq_inputs, + rv_inputs, + correlation_matrix_inputs, + edp_inputs, + application_inputs, + ) + + +def preprocess(input_arguments): + """ + Preprocess the input arguments for the GP-AB Algorithm. + + Args: + input_arguments (InputArguments): The input arguments for the algorithm. + + Returns + ------- + tuple: A tuple containing the preprocessed data required for the GP-AB Algorithm. + """ + ( + uq_inputs, + rv_inputs, + correlation_matrix_inputs, + edp_inputs, + application_inputs, + ) = read_inputs(input_arguments.input_json_file) + + data_file = ( + uq_inputs.calibration_data_path / uq_inputs.calibration_data_file_name + ) + with data_file.open() as f: + data = np.genfromtxt(f, delimiter=',') + + log_likelihood_file_name = uq_inputs.log_likelihood_file_name + log_likelihood_path = uq_inputs.log_likelihood_path + log_likelihood_function = log_likelihood + if log_likelihood_file_name: + sys.path.append(str(log_likelihood_path)) + ll_module = importlib.import_module(log_likelihood_file_name) + log_likelihood_function = ll_module.log_likelihood + + joint_distribution = uq_utilities.ERANatafJointDistribution( + rv_inputs, + correlation_matrix_inputs, + ) + domain = [(-3, 3) for _ in range(len(rv_inputs))] + prior_variances = [1 for _ in range(len(rv_inputs))] # TODO(ABS): Validate this + # Transformation function from standard to physical space + sample_transformation_function = joint_distribution.u_to_x + + # Prior logpdf function + prior_pdf_function = joint_distribution.pdf + + main_script_path = str(application_inputs.FEM.ApplicationData.MS_Path) + + model = uq_utilities.get_default_model( + list_of_rv_data=rv_inputs, + edp_data=edp_inputs, + list_of_dir_names_to_copy_files_from=[main_script_path], + run_directory=input_arguments.path_to_working_directory, + driver_filename=str(input_arguments.driver_file_name), + workdir_prefix='workdir', + ) + model_evaluation_function = model.evaluate_model_once + + edp_names_list = [edp['name'] for edp in edp_inputs] + edp_lengths_list = [edp['length'] for edp in edp_inputs] + input_dimension = len(rv_inputs) + output_dimension = sum( + edp_lengths_list + ) # TODO(ABS): Validate this against length of data + + max_simulations = np.inf + max_computational_time = np.inf + pca_threshold = 0.999 + run_type = input_arguments.run_type + loocv_threshold = 0.2 + recalibration_ratio = 0.1 + + return ( + data, + edp_lengths_list, + edp_names_list, + input_dimension, + output_dimension, + domain, + model_evaluation_function, + sample_transformation_function, + prior_pdf_function, + log_likelihood_function, + prior_variances, + max_simulations, + max_computational_time, + pca_threshold, + run_type, + loocv_threshold, + recalibration_ratio, + ) + + +class InputArguments(pydantic.BaseModel): + """ + A class to represent the input arguments for the GP-AB Algorithm. + + Attributes + ---------- + path_to_working_directory : Path + The path to the working directory. + path_to_template_directory : Path + The path to the template directory. + run_type : Literal['runningLocal', 'runningRemote'] + The type of run (local or remote). + driver_file_name : Path + The name of the driver file. + input_json_file : Path + The path to the input JSON file. + """ + + path_to_working_directory: Path + path_to_template_directory: Path + run_type: Literal['runningLocal', 'runningRemote'] + driver_file_name: Path + input_json_file: Path + + model_config = pydantic.ConfigDict(revalidate_instances='always') + + +if __name__ == '__main__': + this = Path(__file__).resolve() + os.chdir('/Users/aakash/Documents/quoFEM/LocalWorkDir/tmp.SimCenter') + args = { + 'path_to_working_directory': Path(sys.argv[1]).resolve(), + 'path_to_template_directory': Path(sys.argv[2]).resolve(), + 'run_type': sys.argv[3], + 'driver_file_name': Path(sys.argv[4]).resolve(), + 'input_json_file': Path(sys.argv[5]).resolve(), + } + input_arguments = InputArguments.model_validate(args) + main(input_arguments) + os.chdir(this.parent) diff --git a/modules/performUQ/common/gp_model.py b/modules/performUQ/common/gp_model.py new file mode 100644 index 000000000..3d8713b3c --- /dev/null +++ b/modules/performUQ/common/gp_model.py @@ -0,0 +1,142 @@ +"""Module for creating and using Gaussian Process models with the GaussianProcessModel class.""" + +import GPy +import numpy as np +from scipy.linalg import cho_solve + + +class GaussianProcessModel: + """ + A class to represent a Gaussian Process Model. + + Attributes + ---------- + input_dimension (int): The input dimension of the model. + output_dimension (int): The output dimension of the model. + ARD (bool): Automatic Relevance Determination flag. + kernel (GPy.kern.Matern52): The kernel used in the model. + model (list[GPy.models.GPRegression]): The list of GP regression models. + """ + + def __init__(self, input_dimension, output_dimension, ARD): # noqa: N803 + """ + Initialize the GaussianProcessModel. + + Args: + input_dimension (int): The input dimension of the model. + output_dimension (int): The output dimension of the model. + ARD (bool): Automatic Relevance Determination flag. + """ + self.input_dimension = input_dimension + self.output_dimension = output_dimension + self.ARD = ARD + + self.kernel = self._create_kernel() + self.model = self._create_surrogate() + # self._add_nugget_parameter() + self._fix_nugget_parameter() + + def _create_kernel(self): + """ + Create the kernel for the Gaussian Process. + + Returns + ------- + GPy.kern.Matern52: The Matern52 kernel. + """ + return GPy.kern.Matern52(input_dim=self.input_dimension, ARD=self.ARD) + + def _create_surrogate(self): + """ + Create the surrogate GP regression models. + + Returns + ------- + list[GPy.models.GPRegression]: The list of GP regression models. + """ + m_list = [] + for _ in range(self.output_dimension): + m_list += [ + GPy.models.GPRegression( + np.zeros((1, self.input_dimension)), + np.zeros((1, 1)), + self.kernel.copy(), + ) + ] + return m_list + + def _add_nugget_parameter(self): + """Add a nugget parameter to the GP models to improve numerical stability.""" + for i in range(self.output_dimension): + self.model[i].Gaussian_noise.variance = 1e-6 + + def _fix_nugget_parameter(self, value=1e-8): + """ + Fix the nugget parameter to a specific value. + + Args: + value (float): The value to fix the nugget parameter to. + """ + for i in range(self.output_dimension): + self.model[i].likelihood.variance = value + self.model[i].likelihood.variance.fix() + + def fit(self, x_train, y_train): + """ + Fit the GP models to the training data. + + Args: + x_train (np.ndarray): The input training data. + y_train (np.ndarray): The output training data. + """ + for i in range(self.output_dimension): + self.model[i].set_XY(x_train, np.reshape(y_train[:, i], (-1, 1))) + self.model[i].optimize() + + def predict(self, x_predict): + """ + Predict the output for the given input data. + + Args: + x_predict (np.ndarray): The input data for prediction. + + Returns + ------- + tuple: A tuple containing the mean and variance of the predictions. + """ + y_mean = np.zeros((x_predict.shape[0], self.output_dimension)) + y_var = np.zeros((x_predict.shape[0], self.output_dimension)) + for i in range(self.output_dimension): + mean, var = self.model[i].predict(x_predict) + y_mean[:, i] = mean.flatten() + y_var[:, i] = var.flatten() + + return y_mean, y_var + + def loo_predictions(self, y_train): + """ + Calculate the Leave-One-Out (LOO) predictions. + + Args: + y_train (np.ndarray): The output training data. + + Returns + ------- + np.ndarray: The LOO predictions. + """ + loo_pred = np.zeros_like(y_train) + for i in range(self.output_dimension): + cholesky_factor = self.model[i].posterior.K_chol + alpha = cho_solve( + (cholesky_factor, True), np.reshape(y_train[:, i], (-1, 1)) + ) + cholesky_factor_inverse = cho_solve( + (cholesky_factor, True), np.eye(cholesky_factor.shape[0]) + ) + k_inv_diag = np.reshape( + np.sum(cholesky_factor_inverse**2, axis=1), (-1, 1) + ) + loo_pred[:, i] = ( + np.reshape(y_train[:, i], (-1, 1)) - alpha / k_inv_diag + ).flatten() + return loo_pred diff --git a/modules/performUQ/common/principal_component_analysis.py b/modules/performUQ/common/principal_component_analysis.py new file mode 100644 index 000000000..55f80a100 --- /dev/null +++ b/modules/performUQ/common/principal_component_analysis.py @@ -0,0 +1,94 @@ +"""Provides a class for performing Principal Component Analysis (PCA) for dimensionality reduction.""" + +import numpy as np +from sklearn.decomposition import PCA + + +class PrincipalComponentAnalysis: + """ + A class to perform Principal Component Analysis (PCA) for dimensionality reduction. + + Attributes + ---------- + pca_threshold (float): The threshold for the cumulative explained variance ratio to determine the number of components. + pca (PCA): The PCA model. + mean_vec (np.ndarray): The mean vector of the original data. + proj_matrix (np.ndarray): The projection matrix (PCA components). + eigenvalues (np.ndarray): The eigenvalues of the PCA components. + explained_variance_ratio (np.ndarray): The explained variance ratio of the PCA components. + n_components (int): The number of components to retain. + """ + + def __init__(self, pca_threshold) -> None: + """ + Initialize the PrincipalComponentAnalysis class. + + Args: + pca_threshold (float): The threshold for the cumulative explained variance ratio to determine the number of components. + """ + self.pca_threshold = pca_threshold + + self.pca = None + self.mean_vec = None + self.proj_matrix = None + self.eigenvalues = None + self.explained_variance_ratio = None + self.n_components = None + + def project_to_latent_space(self, outputs): + """ + Perform PCA on the output data to reduce the dimensionality. + + Args: + outputs (np.ndarray): The output data to be projected to the latent space. + + Returns + ------- + np.ndarray: The latent outputs after PCA transformation. + """ + # Perform PCA on the output data to reduce the dimensionality + pca = PCA() + pca.fit(outputs) + + # Determine the number of components to retain based on the threshold value + explained_variance_ratio = pca.explained_variance_ratio_ + self.n_components = ( + np.argmax(np.cumsum(explained_variance_ratio) >= self.pca_threshold) + 1 + ) + + # Perform PCA with the specified number of components and transform the output data + # TODO (ABS): Reimplement without the second PCA fit + self.pca = PCA(n_components=self.n_components) + self.pca.fit(outputs) + latent_outputs = self.pca.transform(outputs) + + # Store the PCA parameters for later use in inverse transformation + self.mean_vec = self.pca.mean_ + self.proj_matrix = self.pca.components_ + self.eigenvalues = self.pca.explained_variance_ + self.explained_variance_ratio = self.pca.explained_variance_ratio_ + + return latent_outputs + + def project_back_to_original_space(self, latent_outputs): + """ + Inverse transform the latent outputs back to the original space using the stored PCA parameters. + + Args: + latent_outputs (np.ndarray): The latent outputs to be projected back to the original space. + + Returns + ------- + np.ndarray: The outputs in the original space. + + Raises + ------ + ValueError: If the PCA model has not been fitted yet. + """ + # Check if the PCA model has been fitted + if self.pca is None: + error_message = 'No PCA model has been fitted yet.' + raise ValueError(error_message) + # Inverse transform the latent outputs using the stored PCA parameters + outputs = self.pca.inverse_transform(latent_outputs) + return outputs # noqa: RET504 diff --git a/modules/performUQ/common/space_filling_doe.py b/modules/performUQ/common/space_filling_doe.py new file mode 100644 index 000000000..becd826fd --- /dev/null +++ b/modules/performUQ/common/space_filling_doe.py @@ -0,0 +1,62 @@ +""" +Provides a class for performing Latin Hypercube Sampling (LHS). + +This module generates space-filling designs. +""" + +from __future__ import annotations + +import numpy as np +from pydantic import BaseModel, PositiveInt +from scipy.stats import qmc + + +class LatinHypercubeSampling(BaseModel, validate_assignment=True): + """ + A class to perform Latin Hypercube Sampling (LHS) for generating space-filling designs. + + Attributes + ---------- + n_samples (PositiveInt): The number of samples to generate. + n_dimensions (PositiveInt): The number of dimensions for each sample. + seed (Optional[PositiveInt]): The seed for random number generation. + """ + + n_samples: PositiveInt + n_dimensions: PositiveInt + seed: PositiveInt | None = None + + def generate(self, domain=None): + """ + Generate samples using Latin Hypercube Sampling. + + Args: + domain (list[tuple[float, float]], optional): The domain for each dimension. Defaults to None. + + Returns + ------- + np.ndarray: The generated samples. + """ + lhs = qmc.LatinHypercube(self.n_dimensions, seed=self.seed) + samples = lhs.random(self.n_samples) + if domain is not None: + # Scale samples to the provided domain + lower_bounds = np.array([bound[0] for bound in domain]) + upper_bounds = np.array([bound[1] for bound in domain]) + samples = qmc.scale(samples, lower_bounds, upper_bounds) + return samples + + +if __name__ == '__main__': + """ + Example usage of the LatinHypercubeSampling class. + """ + # initial_doe = LatinHypercubeSampling(n_samples=10, n_dimensions=2) + # print(repr(initial_doe)) + # samples = initial_doe.generate() + # print(samples) + # domain = [(0, 10), (-5, 5)] # Example of domain for each dimension + # scaled_samples = initial_doe.generate(domain) + # print(scaled_samples) + # # initial_doe.seed = 0 + # # idoe = InitialDesignOfExperiments(n_samples=0, n_dimensions=0, seed=-1) diff --git a/modules/performUQ/common/tmcmc.py b/modules/performUQ/common/tmcmc.py new file mode 100644 index 000000000..a375db6e5 --- /dev/null +++ b/modules/performUQ/common/tmcmc.py @@ -0,0 +1,369 @@ +"""Implementation of the Transitional Markov Chain Monte Carlo (TMCMC) algorithm.""" + +import math + +import numpy as np +from scipy.special import logsumexp + + +def _calculate_weights_warm_start( + beta, current_loglikelihoods, previous_loglikelihoods +): + """ + Calculate the weights for the warm start stage. + + Args: + beta (float): The current beta value. + current_loglikelihoods (np.ndarray): The current log-likelihood values. + previous_loglikelihoods (np.ndarray): The previous log-likelihood values. + + Returns + ------- + np.ndarray: The calculated weights. + """ + log_weights = beta * (current_loglikelihoods - previous_loglikelihoods) + log_sum_weights = logsumexp(log_weights) + normalized_log_weights = log_weights - log_sum_weights + normalized_weights = np.exp(normalized_log_weights) + weights = normalized_weights / np.sum(normalized_weights) + return weights # noqa: RET504 + + +def calculate_warm_start_stage( + current_loglikelihood_approximation, previous_results, threshold_cov=1 +): + """ + Calculate the warm start stage number based on the coefficient of variation of weights. + + Args: + current_loglikelihood_approximation (callable): Function to approximate current log-likelihoods. + previous_results (tuple): The previous results containing samples, betas, and log-likelihoods. + threshold_cov (float, optional): The threshold for the coefficient of variation. Defaults to 1. + + Returns + ------- + int: The stage number for the warm start. + """ + stage_nums = sorted(previous_results[0].keys(), reverse=True) + for stage_num in stage_nums: + current_loglikelihoods = current_loglikelihood_approximation( + previous_results[0][stage_num] + ) + previous_loglikelihoods = previous_results[2][stage_num] + beta = previous_results[1][stage_num] + weights = _calculate_weights_warm_start( + beta, current_loglikelihoods, previous_loglikelihoods + ) + cov_weights = np.nanstd(weights) / np.nanmean(weights) + if cov_weights < threshold_cov: + return stage_num + return 0 + + +def _calculate_weights(beta_increment, log_likelihoods): + """ + Calculate the weights for the given beta increment and log-likelihoods. + + Args: + beta_increment (float): The increment in beta. + log_likelihoods (np.ndarray): The log-likelihood values. + + Returns + ------- + np.ndarray: The calculated weights. + """ + log_weights = beta_increment * log_likelihoods + log_sum_weights = logsumexp(log_weights) + normalized_log_weights = log_weights - log_sum_weights + normalized_weights = np.exp(normalized_log_weights) + weights = normalized_weights / np.sum(normalized_weights) + return weights # noqa: RET504 + + +def _calculate_log_evidence(beta_increment, log_likelihoods): + """ + Calculate the log evidence for the given beta increment and log-likelihoods. + + Args: + beta_increment (float): The increment in beta. + log_likelihoods (np.ndarray): The log-likelihood values. + + Returns + ------- + float: The calculated log evidence. + """ + log_evidence = logsumexp(beta_increment * log_likelihoods) - np.log( + len(log_likelihoods) + ) + return log_evidence # noqa: RET504 + + +def _increment_beta(log_likelihoods, beta, threshold_cov=1): + """ + Increment the beta value based on the coefficient of variation of weights. + + Args: + log_likelihoods (np.ndarray): The log-likelihood values. + beta (float): The current beta value. + threshold_cov (float, optional): The threshold for the coefficient of variation. Defaults to 1. + + Returns + ------- + float: The new beta value. + """ + if beta >= 1: + return 1 + beta_increment = 1 - beta + weights = _calculate_weights(beta_increment, log_likelihoods) + cov_weights = np.nanstd(weights) / np.nanmean(weights) + while cov_weights > threshold_cov: + beta_increment = 0.99 * beta_increment + weights = _calculate_weights(beta_increment, log_likelihoods) + cov_weights = np.nanstd(weights) / np.nanmean(weights) + proposed_beta = beta + beta_increment + new_beta = min(proposed_beta, 1) + return new_beta # noqa: RET504 + + +def _get_scaled_proposal_covariance(samples, weights, scale_factor=0.2): + """ + Calculate the scaled proposal covariance matrix. + + Args: + samples (np.ndarray): The samples. + weights (np.ndarray): The weights. + scale_factor (float, optional): The scale factor. Defaults to 0.2. + + Returns + ------- + np.ndarray: The scaled proposal covariance matrix. + """ + return scale_factor**2 * np.cov( + samples, rowvar=False, aweights=weights.flatten() + ) + + +class TMCMC: + """ + A class to perform Transitional Markov Chain Monte Carlo (TMCMC) sampling. + + Attributes + ---------- + _log_likelihood_approximation (callable): Function to approximate log-likelihoods. + _log_posterior_approximation (callable): Function to approximate log-posterior densities. + num_steps (int): The number of steps for each stage. + threshold_cov (float): The threshold for the coefficient of variation. + thinning_factor (int): The thinning factor for the MCMC chain. + adapt_frequency (int): The frequency of adaptation for the proposal distribution. + """ + + def __init__( + self, + log_likelihood_approximation_function, + log_target_density_approximation_function, + threshold_cov=1, + num_steps=1, + thinning_factor=10, + adapt_frequency=100, + ): + """ + Initialize the TMCMC class. + + Args: + log_likelihood_approximation_function (callable): Function to approximate log-likelihoods. + log_target_density_approximation_function (callable): Function to approximate log-posterior densities. + threshold_cov (float, optional): The threshold for the coefficient of variation. Defaults to 1. + num_steps (int, optional): The number of steps for each stage. Defaults to 1. + thinning_factor (int, optional): The thinning factor for the MCMC chain. Defaults to 10. + adapt_frequency (int, optional): The frequency of adaptation for the proposal distribution. Defaults to 100. + """ + self._log_likelihood_approximation = log_likelihood_approximation_function + self._log_posterior_approximation = log_target_density_approximation_function + + self.num_steps = num_steps + self.threshold_cov = threshold_cov + self.thinning_factor = thinning_factor + self.adapt_frequency = adapt_frequency + + def _run_one_stage( + self, + samples, + log_likelihoods, + log_target_density_values, + beta, + rng, + log_likelihood_function, + log_target_density_function, + scale_factor, + target_acceptance_rate, + do_thinning=False, # noqa: FBT002 + burn_in_steps=0, + ): + """ + Run one stage of the TMCMC algorithm. + + Args: + samples (np.ndarray): The samples. + log_likelihoods (np.ndarray): The log-likelihood values. + log_target_density_values (np.ndarray): The log-target density values. + beta (float): The current beta value. + rng (np.random.Generator): The random number generator. + log_likelihood_function (callable): Function to calculate log-likelihoods. + log_target_density_function (callable): Function to calculate log-target densities. + scale_factor (float): The scale factor for the proposal distribution. + target_acceptance_rate (float): The target acceptance rate for the MCMC chain. + do_thinning (bool, optional): Whether to perform thinning. Defaults to False. + burn_in_steps (int, optional): The number of burn-in steps. Defaults to 0. + + Returns + ------- + tuple: A tuple containing the new samples, new log-likelihoods, new log-target density values, new beta, and log evidence. + """ + new_beta = _increment_beta(log_likelihoods, beta) + log_evidence = _calculate_log_evidence(new_beta - beta, log_likelihoods) + weights = _calculate_weights(new_beta - beta, log_likelihoods) + + proposal_covariance = _get_scaled_proposal_covariance( + samples, weights, scale_factor + ) + + new_samples = np.zeros_like(samples) + new_log_likelihoods = np.zeros_like(log_likelihoods) + new_log_target_density_values = np.zeros_like(log_target_density_values) + + current_samples = samples.copy() + current_log_likelihoods = log_likelihoods.copy() + current_log_target_density_values = log_target_density_values.copy() + + num_samples = samples.shape[0] + num_accepts = 0 + n_adapt = 1 + step_count = 0 + for k in range(burn_in_steps + num_samples): + # print(f'{k=}') + index = rng.choice(num_samples, p=weights.flatten()) + if k >= burn_in_steps: + if new_beta == 1 or do_thinning: + self.num_steps = self.num_steps * self.thinning_factor + for _ in range(self.num_steps): + step_count += 1 + if step_count % self.adapt_frequency == 0: + acceptance_rate = num_accepts / self.adapt_frequency + num_accepts = 0 + n_adapt += 1 + ca = (acceptance_rate - target_acceptance_rate) / ( + math.sqrt(n_adapt) + ) + scale_factor = scale_factor * np.exp(ca) + proposal_covariance = _get_scaled_proposal_covariance( + current_samples, weights, scale_factor + ) + + proposed_state = rng.multivariate_normal( + current_samples[index, :], proposal_covariance + ).reshape(1, -1) + log_likelihood_at_proposed_state = log_likelihood_function( + proposed_state + ) + log_target_density_at_proposed_state = log_target_density_function( + proposed_state, log_likelihood_at_proposed_state + ) + log_hastings_ratio = ( + log_target_density_at_proposed_state + - current_log_target_density_values[index] + ) + u = rng.uniform() + accept = np.log(u) <= log_hastings_ratio + if accept: + num_accepts += 1 + current_samples[index, :] = proposed_state + current_log_likelihoods[index] = log_likelihood_at_proposed_state + current_log_target_density_values[index] = ( + log_target_density_at_proposed_state + ) + if k >= burn_in_steps: + weights = _calculate_weights( + new_beta - beta, current_log_likelihoods + ) + if k >= burn_in_steps: + k_prime = k - burn_in_steps + new_samples[k_prime, :] = current_samples[index, :] + new_log_likelihoods[k_prime] = current_log_likelihoods[index] + new_log_target_density_values[k_prime] = ( + current_log_target_density_values[index] + ) + + return ( + new_samples, + new_log_likelihoods, + new_log_target_density_values, + new_beta, + log_evidence, + ) + + def run( + self, + samples_dict, + betas_dict, + log_likelihoods_dict, + log_target_density_values_dict, + log_evidence_dict, + rng, + stage_num, + num_burn_in=0, + ): + """ + Run the TMCMC algorithm. + + Args: + samples_dict (dict): Dictionary of samples for each stage. + betas_dict (dict): Dictionary of beta values for each stage. + log_likelihoods_dict (dict): Dictionary of log-likelihood values for each stage. + log_target_density_values_dict (dict): Dictionary of log-target density values for each stage. + log_evidence_dict (dict): Dictionary of log evidence values for each stage. + rng (np.random.Generator): The random number generator. + stage_num (int): The current stage number. + num_burn_in (int, optional): The number of burn-in steps. Defaults to 0. + + Returns + ------- + tuple: A tuple containing the updated dictionaries for samples, betas, log-likelihoods, log-target density values, and log evidence. + """ + self.num_dimensions = samples_dict[0].shape[1] + self.target_acceptance_rate = 0.23 + 0.21 / self.num_dimensions + self.scale_factor = 2.4 / np.sqrt(self.num_dimensions) + while betas_dict[stage_num] < 1: + # print(f'Stage {stage_num}') + ( + new_samples, + new_log_likelihoods, + new_log_target_density_values, + new_beta, + log_evidence, + ) = self._run_one_stage( + samples_dict[stage_num], + log_likelihoods_dict[stage_num], + log_target_density_values_dict[stage_num], + betas_dict[stage_num], + rng, + self._log_likelihood_approximation, + self._log_posterior_approximation, + self.scale_factor, + self.target_acceptance_rate, + do_thinning=False, + burn_in_steps=num_burn_in, + ) + stage_num += 1 + samples_dict[stage_num] = new_samples + betas_dict[stage_num] = new_beta + log_likelihoods_dict[stage_num] = new_log_likelihoods + log_target_density_values_dict[stage_num] = new_log_target_density_values + log_evidence_dict[stage_num] = log_evidence + + return ( + samples_dict, + betas_dict, + log_likelihoods_dict, + log_target_density_values_dict, + log_evidence_dict, + )