Skip to content

Commit

Permalink
Added uniform priors feature.
Browse files Browse the repository at this point in the history
  • Loading branch information
AdityaSavara committed Aug 1, 2020
1 parent 1c7832e commit a5cc92e
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 11 deletions.
50 changes: 42 additions & 8 deletions CheKiPEUQ/InverseProblem.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,26 @@ def __init__(self, UserInput = None):
if self.UserInput.parameter_estimation_settings['verbose']:
print("Paremeter Estimation Object Initialized")
#Leaving the original dictionary object intact, but making a new object to make covmat_prior.
UserInput.InputParametersPriorValuesUncertainties = UserInput.model['InputParametersPriorValuesUncertainties']
UserInput.InputParametersPriorValuesUncertainties = np.array(UserInput.model['InputParametersPriorValuesUncertainties'],dtype='float32')
UserInput.InputParametersPriorValuesUniformDistributionsIndices = [] #intializing.
if -1.0 in InputParametersPriorValuesUncertainties: #This means that at least one of the uncertainties has been set to "-1" which means a uniform distribution.
if len(np.shape(UserInput.InputParametersPriorValuesUncertainties)) != 1:
print("A value of '-1' in the uncertainties signifies a uniform distribution for CheKiPEUQ. As of July 1st 2020, the uniform distribution feature is only compatible with a 1D of array for uncertainties and not compatible with providing a full covariance matrix. If you need such a feature, contact the developers because it could be implemented. Eventually, a more sophisiticated back end may be used which would allow such a feature.")
# If there is a uniform distribution, that means two actions need to be taken:
#First, we will populate InputParametersPriorValuesUncertainties with the standard deviation of a uniform distribution. This is so that the MCMC steps can be taken of the right size.
#Second, that we will need to make a custom calculation when calculating the prior probability that effectively excludes this variable. So we'll create an array of indices to help us with that.
#We will do both in a loop.
UserInput.InputParametersPriorValuesUniformDistributionsKey = UserInput.InputParametersPriorValuesUncertainties *1.0 #Just initalizing
for parameterIndex, uncertaintyValue in enumerate(UserInput.InputParametersPriorValuesUncertainties ):
if uncertaintyValue == -1.0:
UserInput.InputParametersPriorValuesUniformDistributionsKey[parameterIndex] = 1.0 #This is setting the parameter as "True" for having a uniform distribution.
UserInput.InputParametersPriorValuesUniformDistributionsIndices.append(parameterIndex)
#In the case of a uniform distribution, the standard deviation and variance are given by sigma = (b−a)/ √12 :
#See for example https://www.quora.com/What-is-the-standard-deviation-of-a-uniform-distribution-How-is-this-formula-determined
std_prior_single_parameter = (UserInput.model['InputParameterPriorValues_upperBounds'][parameterIndex] - UserInput.model['InputParameterPriorValues_lowerBounds'][parameterIndex])/(12**0.5)
UserInput.InputParametersPriorValuesUncertainties[parameterIndex] = std_prior_single_parameter #Note that going forward the array InputParametersPriorValuesUncertainties cannot be checked to see if the parameter is from a uniform distribution. Instead, InputParametersPriorValuesUniformDistributionsKey must be checked.
print("line 45", parameterIndex, UserInput.InputParametersPriorValuesUncertainties)

if len(np.shape(UserInput.InputParametersPriorValuesUncertainties)) == 1 and (len(UserInput.InputParametersPriorValuesUncertainties) > 0): #If it's a 1D array/list that is filled, we'll diagonalize it.
UserInput.std_prior = np.array(UserInput.InputParametersPriorValuesUncertainties, dtype='float32') #using 32 since not everyone has 64.
UserInput.var_prior = np.power(UserInput.InputParametersPriorValuesUncertainties,2)
Expand Down Expand Up @@ -403,7 +422,7 @@ def doGridSearch(self, searchType='getLogP', exportLog = True, gridSamplingAbsol

#The below function is a helper function that is used during doeInfoGainMatrix. However, it can certainly be used for other purposes.
def populateResponsesWithSyntheticData(self, parModulationCombination):
#For each paramaeter Modulation Combination we are going to obtain a matrix of info_gains that is based on a grid of the independent_variables.
#For each parameter Modulation Combination we are going to obtain a matrix of info_gains that is based on a grid of the independent_variables.
#First we need to make some synthetic data using parModulationCombination for the discreteParameterVector
discreteParameterVector = parModulationCombination
simulationFunction = self.UserInput.simulationFunction #Do NOT use self.UserInput.model['simulateByInputParametersOnlyFunction'] because that won't work with reduced parameter space requests.
Expand Down Expand Up @@ -699,7 +718,7 @@ def doOptimizeSSR(self, simulationFunctionAdditionalArgs = (), method = None, op
self.opt_SSR = optimizeResult.fun #This is the best fit SSR.
if printOptimum == True:
print("Final results from doOptimizeSSR:", self.opt_parameter_set, "final SSR:", self.opt_SSR)
#FIXME: Right now, the createAllPlots command will not work unless we populate the map parmaeter set, so that is what we are doing. But a better longterm solution needs to be made. In which the graph says "opt" rather than "MAP" and uses the appropriate variables.
#FIXME: Right now, the createAllPlots command will not work unless we populate the map parameter set, so that is what we are doing. But a better longterm solution needs to be made. In which the graph says "opt" rather than "MAP" and uses the appropriate variables.
#TODO: Also need to add things like WSSR based on magnitude and variance weightings.
self.map_parameter_set = self.opt_parameter_set
return [self.opt_parameter_set, self.opt_SSR]
Expand Down Expand Up @@ -951,6 +970,9 @@ def getLogPrior(self,discreteParameterVector):
if type(self.UserInput.model['custom_logPrior']) != type(None):
logPrior = self.UserInput.model['custom_logPrior'](discreteParameterVector)
return logPrior
boundsChecksPassed = self.doInputParameterBoundsChecks(discreteParameterVector)
if boundsChecksPassed == False: #If false, return a 'zero probability' type result. Else, continue getting log of prior..
return float('-inf') #This approximates zero probability.
if self.UserInput.parameter_estimation_settings['scaling_uncertainties_type'] == "off":
discreteParameterVector_scaled = np.array(discreteParameterVector)*1.0
elif self.UserInput.parameter_estimation_settings['scaling_uncertainties_type'] != "off":
Expand All @@ -959,7 +981,19 @@ def getLogPrior(self,discreteParameterVector):
else: #TODO: If we're in the else statemnt, then the scaling uncertainties is a covariance matrix, for which we plan to do row and column scaling, which has not yet been implemented. #We could pobably just use the diagonal in the short term.
print("WARNING: There is an error in your self.UserInput.scaling_uncertainties. Contact the developers with a bug report.")
discreteParameterVector_scaled = np.array(discreteParameterVector)*1.0
logPrior = multivariate_normal.logpdf(x=discreteParameterVector_scaled,mean=self.UserInput.mu_prior_scaled,cov=self.UserInput.covmat_prior_scaled)

if hasattr(self.UserInput, 'InputParametersPriorValuesUniformDistributionsIndices') == False: #this is the normal case, no uniform distributionns.
logPrior = multivariate_normal.logpdf(x=discreteParameterVector_scaled,mean=self.UserInput.mu_prior_scaled,cov=self.UserInput.covmat_prior_scaled)
elif hasattr(self.UserInput, 'InputParametersPriorValuesUniformDistributionsIndices') == True: #This means that at least one variable has a uniform prior distribution. So we need to remove that parameter before doing the multivariate_normal.logpdf.
#Note that this if-statement is intentionally after the scaling uncertainties because that feature can be compatible with the uniform distribution.
discreteParameterVector_scaled_truncated = np.delete(discreteParameterVector_scaled, UserInput.InputParametersPriorValuesUniformDistributionsIndices) #delete does not change original array.
mu_prior_scaled_truncated = np.delete(self.UserInput.mu_prior_scaled, UserInput.InputParametersPriorValuesUniformDistributionsIndices) #delete does not change original array.
var_prior_scaled_truncated = np.delete(self.UserInput.var_prior_scaled, UserInput.InputParametersPriorValuesUniformDistributionsIndices) #delete does not change original array.
#Presently, we don't have full covmat support with uniform distributions. In principle, it would be better to use covmat_prior_scaled and delete the rows and columns since then we might have covmat support.
#For now, we just make the truncated covmat from the var_prior. We currently don't have full covmat support for the case of uniform distributions.
covmat_prior_scaled_truncated = np.diagflat(var_prior_scaled_truncated)
logPrior = multivariate_normal.logpdf(x=discreteParameterVector_scaled_truncated,mean=mu_prior_scaled_truncated,cov=covmat_prior_scaled_truncated)
#Note: Below code should be okay regardless of whether there are uniform distributions since it only adjusts logPrior by a scalar.
if self.UserInput.parameter_estimation_settings['undo_scaling_uncertainties_type'] == True:
try:
scaling_factor = float(self.UserInput.parameter_estimation_settings['scaling_uncertainties_type'])
Expand Down Expand Up @@ -1268,17 +1302,17 @@ def callback(self, discreteParameterVector, *extraArgs):
if self.FirstCall == True:
parameterNamesString = ""
for parameterIndex in range(len(discreteParameterVector)):
parmeterName = f"Par-{parameterIndex+1}"
parameterNamesString += f"{parmeterName:10s}\t"
parameterName = f"Par-{parameterIndex+1}"
parameterNamesString += f"{parameterName:10s}\t"
headerString = "Iter " + parameterNamesString + "ObjectiveF"
print(headerString)
self.FirstCall = False

iterationNumberString = "{0:4d} ".format(self.iterationNumber)
discreteParameterVector = self.lastTrialDiscreteParameterVector #We take the stored one rather than the one provided to make sure that we're getting the same one as the stored objective function.
parameterValuesString = ""
for paramaterValue in discreteParameterVector:
parameterValuesString += f"{paramaterValue:10.5e}\t"
for parameterValue in discreteParameterVector:
parameterValuesString += f"{parameterValue:10.5e}\t"
currentObjectiveFunctionValue = f"{self.lastTrialObjectiveFunction:10.5e}"
iterationOutputString = iterationNumberString + parameterValuesString + currentObjectiveFunctionValue
print(iterationOutputString)
Expand Down
3 changes: 2 additions & 1 deletion CheKiPEUQ/UserInput.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
model = {}
model['InputParameterPriorValues'] = [] #Should be like: [41.5, 41.5, 13.0, 13.0, 0.1, 0.1] # Ea1_mean, Ea2_mean, log_A1_mean, log_A2_mean, gamma_1_mean, gamma_2_mean
model['InputParametersPriorValuesUncertainties'] = []# Should be like: [200, 200, 13, 13, 0.1, 0.1] #If user wants to use a prior with covariance, then this must be a 2D array/ list. To assume no covariance, a 1D
#A value of -1 in the Uncertainties indicates the parameter in question is described a univorm distribution In this case, the InputParameterPriorValues_upperBounds and InputParameterPriorValues_lowerBounds must be defined for each parmeter (can be defined as None for the non-uniform parameters).
model['parameterNamesAndMathTypeExpressionsDict'] = {} #This must be provided. It can be as simple as {"Param1":"1"} etc. but it must be a dictionary with strings as keys and as values. The next line is a comment with a more complicated example.
#Example: model['parameterNamesAndMathTypeExpressionsDict'] = {'Ea_1':r'$E_{a1}$','Ea_2':r'$E_{a2}$','log_A1':r'$log(A_{1})$','log_A2':r'$log(A_{2})$','gamma1':r'$\gamma_{1}$','gamma2':r'$\gamma_{2}$'}
model['populateIndependentVariablesFunction'] = None
Expand All @@ -24,7 +25,7 @@
model['reducedParameterSpace'] = [] #This is to keep parameters as 'constants'. Any parameter index in this list will be allowed to change, the rest will be held as constants.
model['responses_simulation_uncertainties'] = None #Can be none, a list/vector, or can be a function that returns the uncertainties after each simulation is done. The easiest way would be to have a function that extracts a list that gets updated in another namespace after each simulation.
model['custom_logLikelihood'] = None #This should point to a function that takes the discrete parameter values as an argument and returns "logLikelihood, simulatedResponses". So the function returns a value for the logLikelihood (or proportional to it). The function must *also* return the simulated response output, though technically can just return the number 0 as the ssecond return. The function can be a simple as accessing a global dictionary. This feature is intended for cases where the likelihood cannot be described by a normal/gaussian distribution.
model['custom_logPrior'] = None #This feature has not been implemented yet, but is intended for cases where the prior distribution is not described by a normal distribution. The user will provide a function that takes in the parameters and returns a logPrior (or something proportional to a logPrior).
model['custom_logPrior'] = None #This feature has been implemented but not tested, it is intended for cases where the prior distribution is not described by a normal distribution. The user will provide a function that takes in the parameters and returns a logPrior (or something proportional to a logPrior). If MCMC will be performed, the user will still need to fill out InputParametersPriorValuesUncertainties with std deviations or a covariance matrix since that is used to decide the mcmc steps.
model['InputParameterPriorValues_upperBounds'] = [] #This should be a list/array of the same shape as InputParameterPriorValues. Use a value of "None" for any parameter that should not be bounded in this direction. The code then truncates any distribution to have a probability of ~0 when any of the parameters go outside of their bounds. ##As of May 4th 2020, this only has been checked for scaling_uncertainties_type = 'off'
model['InputParameterPriorValues_lowerBounds'] = []#This should be a list/array of the same shape as InputParameterPriorValues. Use a value of "None" for any parameter that should not be bounded in this direction. The code then truncates any distribution to have a probability of ~0 when any of the parameters go outside of their bounds. ##As of May 4th 2020, this only has been checked for scaling_uncertainties_type = 'off'

Expand Down
2 changes: 1 addition & 1 deletion Examples/Example07/mcmc_log_file.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ self.map_index:1196
self.map_parameter_set:[1.4727147 3.95779236]
self.mu_AP_parameter_set:[1.46310408 3.94220832]
self.stdap_parameter_set:[0.66791756 0.70516203]
self.info_gain:19.176120481200073
self.info_gain:14.030743614130037
evidence:1.9764225761475367e-05
posterior_cov_matrix:
[[0.44634878 0.02351822]
Expand Down
2 changes: 1 addition & 1 deletion Examples/Example24/mcmc_log_file.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ self.map_index:1175
self.map_parameter_set:[1.15763372e+01 1.31694180e+05]
self.mu_AP_parameter_set:[1.16042481e+01 1.31646291e+05]
self.stdap_parameter_set:[9.11484052e-01 1.00766352e+04]
self.info_gain:1.7157813446368657
self.info_gain:1.4825573634364662
evidence:3.0665615155128266e-15
posterior_cov_matrix:
[[8.30895499e-01 7.44773417e+03]
Expand Down

0 comments on commit a5cc92e

Please sign in to comment.