Added uniform priors feature.

AdityaSavara · Aug 1, 2020 · a5cc92e · a5cc92e
1 parent 1c7832e
commit a5cc92e
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 11 deletions.
diff --git a/CheKiPEUQ/InverseProblem.py b/CheKiPEUQ/InverseProblem.py
@@ -26,7 +26,26 @@ def __init__(self, UserInput = None):
         if self.UserInput.parameter_estimation_settings['verbose']: 
             print("Paremeter Estimation Object Initialized")
         #Leaving the original dictionary object intact, but making a new object to make covmat_prior.
-        UserInput.InputParametersPriorValuesUncertainties = UserInput.model['InputParametersPriorValuesUncertainties']     
+        UserInput.InputParametersPriorValuesUncertainties = np.array(UserInput.model['InputParametersPriorValuesUncertainties'],dtype='float32')
+        UserInput.InputParametersPriorValuesUniformDistributionsIndices = [] #intializing.
+        if -1.0 in InputParametersPriorValuesUncertainties: #This means that at least one of the uncertainties has been set to "-1" which means a uniform distribution. 
+            if len(np.shape(UserInput.InputParametersPriorValuesUncertainties)) != 1:
+                print("A value of '-1' in the uncertainties signifies a uniform distribution for CheKiPEUQ. As of July 1st 2020, the uniform distribution feature is only compatible with a 1D of array for uncertainties and not compatible with providing a full covariance matrix. If you need such a feature, contact the developers because it could be implemented. Eventually, a more sophisiticated back end may be used which would allow such a feature.")
+            # If there is a uniform distribution, that means two actions need to be taken:
+             #First, we will populate InputParametersPriorValuesUncertainties with the standard deviation of a uniform distribution. This is so that the MCMC steps can be taken of the right size.
+             #Second, that we will need to make a custom calculation when calculating the prior probability that effectively excludes this variable.  So we'll create an array of indices to help us with that.        
+            #We will do both in a loop.
+            UserInput.InputParametersPriorValuesUniformDistributionsKey  = UserInput.InputParametersPriorValuesUncertainties *1.0 #Just initalizing
+            for parameterIndex, uncertaintyValue in enumerate(UserInput.InputParametersPriorValuesUncertainties ):
+                if uncertaintyValue == -1.0:
+                    UserInput.InputParametersPriorValuesUniformDistributionsKey[parameterIndex] = 1.0 #This is setting the parameter as "True" for having a uniform distribution. 
+                    UserInput.InputParametersPriorValuesUniformDistributionsIndices.append(parameterIndex)
+                    #In the case of a uniform distribution, the standard deviation and variance are given by sigma = (b−a)/ √12 :   
+                    #See for example  https://www.quora.com/What-is-the-standard-deviation-of-a-uniform-distribution-How-is-this-formula-determined
+                    std_prior_single_parameter = (UserInput.model['InputParameterPriorValues_upperBounds'][parameterIndex] - UserInput.model['InputParameterPriorValues_lowerBounds'][parameterIndex])/(12**0.5)
+                    UserInput.InputParametersPriorValuesUncertainties[parameterIndex] = std_prior_single_parameter #Note that going forward the array InputParametersPriorValuesUncertainties cannot be checked to see if the parameter is from a uniform distribution. Instead, InputParametersPriorValuesUniformDistributionsKey must be checked. 
+                    print("line 45", parameterIndex, UserInput.InputParametersPriorValuesUncertainties)
+
         if len(np.shape(UserInput.InputParametersPriorValuesUncertainties)) == 1 and (len(UserInput.InputParametersPriorValuesUncertainties) > 0): #If it's a 1D array/list that is filled, we'll diagonalize it.
             UserInput.std_prior = np.array(UserInput.InputParametersPriorValuesUncertainties, dtype='float32') #using 32 since not everyone has 64.
             UserInput.var_prior = np.power(UserInput.InputParametersPriorValuesUncertainties,2)
@@ -403,7 +422,7 @@ def doGridSearch(self, searchType='getLogP', exportLog = True, gridSamplingAbsol
 
     #The below function is a helper function that is used during doeInfoGainMatrix. However, it can certainly be used for other purposes.
     def populateResponsesWithSyntheticData(self, parModulationCombination):
-        #For each paramaeter Modulation Combination we are going to obtain a matrix of info_gains that is based on a grid of the independent_variables.
+        #For each parameter Modulation Combination we are going to obtain a matrix of info_gains that is based on a grid of the independent_variables.
         #First we need to make some synthetic data using parModulationCombination for the discreteParameterVector
         discreteParameterVector = parModulationCombination
         simulationFunction = self.UserInput.simulationFunction #Do NOT use self.UserInput.model['simulateByInputParametersOnlyFunction']  because that won't work with reduced parameter space requests.  
@@ -699,7 +718,7 @@ def doOptimizeSSR(self, simulationFunctionAdditionalArgs = (), method = None, op
         self.opt_SSR = optimizeResult.fun #This is the best fit SSR.
         if printOptimum == True:
             print("Final results from doOptimizeSSR:", self.opt_parameter_set, "final SSR:", self.opt_SSR)
-        #FIXME: Right now, the createAllPlots command will not work unless we populate the map parmaeter set, so that is what we are doing. But a better longterm solution needs to be made. In which the graph says "opt" rather than "MAP" and uses the appropriate variables.
+        #FIXME: Right now, the createAllPlots command will not work unless we populate the map parameter set, so that is what we are doing. But a better longterm solution needs to be made. In which the graph says "opt" rather than "MAP" and uses the appropriate variables.
         #TODO: Also need to add things like WSSR based on magnitude and variance weightings.
         self.map_parameter_set = self.opt_parameter_set
         return [self.opt_parameter_set, self.opt_SSR]
@@ -951,6 +970,9 @@ def getLogPrior(self,discreteParameterVector):
         if type(self.UserInput.model['custom_logPrior']) != type(None):
             logPrior = self.UserInput.model['custom_logPrior'](discreteParameterVector)
             return logPrior
+        boundsChecksPassed = self.doInputParameterBoundsChecks(discreteParameterVector)
+        if boundsChecksPassed == False: #If false, return a 'zero probability' type result. Else, continue getting log of prior..
+            return float('-inf') #This approximates zero probability.        
         if self.UserInput.parameter_estimation_settings['scaling_uncertainties_type'] == "off":
             discreteParameterVector_scaled = np.array(discreteParameterVector)*1.0
         elif self.UserInput.parameter_estimation_settings['scaling_uncertainties_type'] != "off":
@@ -959,7 +981,19 @@ def getLogPrior(self,discreteParameterVector):
             else: #TODO: If we're in the else statemnt, then the scaling uncertainties is a covariance matrix, for which we plan to do row and column scaling, which has not yet been implemented. #We could pobably just use the diagonal in the short term.
                 print("WARNING: There is an error in your self.UserInput.scaling_uncertainties. Contact the developers with a bug report.")
                 discreteParameterVector_scaled = np.array(discreteParameterVector)*1.0
-        logPrior = multivariate_normal.logpdf(x=discreteParameterVector_scaled,mean=self.UserInput.mu_prior_scaled,cov=self.UserInput.covmat_prior_scaled)
+
+        if hasattr(self.UserInput, 'InputParametersPriorValuesUniformDistributionsIndices') == False: #this is the normal case, no uniform distributionns.
+            logPrior = multivariate_normal.logpdf(x=discreteParameterVector_scaled,mean=self.UserInput.mu_prior_scaled,cov=self.UserInput.covmat_prior_scaled)
+        elif hasattr(self.UserInput, 'InputParametersPriorValuesUniformDistributionsIndices') == True: #This means that at least one variable has a uniform prior distribution. So we need to remove that  parameter before doing the multivariate_normal.logpdf.
+            #Note that this if-statement is intentionally after the scaling uncertainties because that feature can be compatible with the uniform distribution.
+            discreteParameterVector_scaled_truncated = np.delete(discreteParameterVector_scaled, UserInput.InputParametersPriorValuesUniformDistributionsIndices) #delete does not change original array.
+            mu_prior_scaled_truncated = np.delete(self.UserInput.mu_prior_scaled, UserInput.InputParametersPriorValuesUniformDistributionsIndices) #delete does not change original array.
+            var_prior_scaled_truncated = np.delete(self.UserInput.var_prior_scaled, UserInput.InputParametersPriorValuesUniformDistributionsIndices) #delete does not change original array.
+            #Presently, we don't have full covmat support with uniform distributions. In principle, it would be better to use covmat_prior_scaled and delete the rows and columns since then we might have covmat support.
+            #For now, we just make the truncated covmat from the var_prior. We currently don't have full covmat support for the case of uniform distributions.
+            covmat_prior_scaled_truncated = np.diagflat(var_prior_scaled_truncated) 
+            logPrior = multivariate_normal.logpdf(x=discreteParameterVector_scaled_truncated,mean=mu_prior_scaled_truncated,cov=covmat_prior_scaled_truncated)
+        #Note: Below code should be okay regardless of whether there are uniform distributions since it only adjusts logPrior by a scalar.
         if self.UserInput.parameter_estimation_settings['undo_scaling_uncertainties_type'] == True:
             try:
                 scaling_factor = float(self.UserInput.parameter_estimation_settings['scaling_uncertainties_type'])
@@ -1268,17 +1302,17 @@ def callback(self, discreteParameterVector, *extraArgs):
         if self.FirstCall == True:
             parameterNamesString = ""
             for parameterIndex in range(len(discreteParameterVector)):
-                parmeterName = f"Par-{parameterIndex+1}"
-                parameterNamesString += f"{parmeterName:10s}\t"
+                parameterName = f"Par-{parameterIndex+1}"
+                parameterNamesString += f"{parameterName:10s}\t"
             headerString = "Iter  " + parameterNamesString + "ObjectiveF"
             print(headerString)
             self.FirstCall = False
 
         iterationNumberString = "{0:4d}  ".format(self.iterationNumber)
         discreteParameterVector = self.lastTrialDiscreteParameterVector #We take the stored one rather than the one provided to make sure that we're getting the same one as the stored objective function.
         parameterValuesString = ""
-        for paramaterValue in discreteParameterVector:
-            parameterValuesString += f"{paramaterValue:10.5e}\t"
+        for parameterValue in discreteParameterVector:
+            parameterValuesString += f"{parameterValue:10.5e}\t"
         currentObjectiveFunctionValue = f"{self.lastTrialObjectiveFunction:10.5e}"
         iterationOutputString = iterationNumberString + parameterValuesString + currentObjectiveFunctionValue
         print(iterationOutputString)

diff --git a/CheKiPEUQ/UserInput.py b/CheKiPEUQ/UserInput.py
@@ -16,6 +16,7 @@
 model = {} 
 model['InputParameterPriorValues'] =  [] #Should be like: [41.5, 41.5, 13.0, 13.0, 0.1, 0.1] # Ea1_mean, Ea2_mean, log_A1_mean, log_A2_mean, gamma_1_mean, gamma_2_mean 
 model['InputParametersPriorValuesUncertainties'] = []# Should be like: [200, 200, 13, 13, 0.1, 0.1] #If user wants to use a prior with covariance, then this must be a 2D array/ list. To assume no covariance, a 1D
+    #A value of -1 in the Uncertainties indicates the parameter in question is described a univorm distribution In this case, the InputParameterPriorValues_upperBounds and InputParameterPriorValues_lowerBounds must be defined for each parmeter (can be defined as None for the non-uniform parameters).
 model['parameterNamesAndMathTypeExpressionsDict'] = {} #This must be provided. It can be as simple as {"Param1":"1"} etc. but it must be a dictionary with strings as keys and as values. The next line is a comment with a more complicated example.
 #Example: model['parameterNamesAndMathTypeExpressionsDict'] = {'Ea_1':r'$E_{a1}$','Ea_2':r'$E_{a2}$','log_A1':r'$log(A_{1})$','log_A2':r'$log(A_{2})$','gamma1':r'$\gamma_{1}$','gamma2':r'$\gamma_{2}$'}
 model['populateIndependentVariablesFunction'] = None
@@ -24,7 +25,7 @@
 model['reducedParameterSpace'] = [] #This is to keep parameters as 'constants'. Any parameter index in this list will be allowed to change, the rest will be held as constants.
 model['responses_simulation_uncertainties'] = None #Can be none, a list/vector, or can be a function that returns the uncertainties after each simulation is done. The easiest way would be to have a function that extracts a list that gets updated in another namespace after each simulation.
 model['custom_logLikelihood'] = None #This should point to a function that takes the discrete parameter values as an argument and returns "logLikelihood, simulatedResponses". So the function returns a value for the logLikelihood (or proportional to it). The function must *also* return the simulated response output, though technically can just return the number 0 as the ssecond return.  The function can be a simple as accessing a global dictionary. This feature is intended for cases where the likelihood cannot be described by a normal/gaussian distribution.
-model['custom_logPrior'] = None  #This feature has not been implemented yet, but is intended for cases where the prior distribution is not described by a normal distribution. The user will provide a function that takes in the parameters and returns a logPrior (or something proportional to a logPrior).
+model['custom_logPrior'] = None  #This feature has  been implemented but not tested, it is intended for cases where the prior distribution is not described by a normal distribution. The user will provide a function that takes in the parameters and returns a logPrior (or something proportional to a logPrior). If MCMC will be performed, the user will still need to fill out InputParametersPriorValuesUncertainties with std deviations or a covariance matrix since that is used to decide the mcmc steps.
 model['InputParameterPriorValues_upperBounds'] = [] #This should be a list/array of the same shape as InputParameterPriorValues. Use a value of "None" for any parameter that should not be bounded in this direction.  The code then truncates any distribution to have a probability of ~0 when any of the parameters go outside of their bounds. ##As of May 4th 2020, this only has been checked for scaling_uncertainties_type = 'off'
 model['InputParameterPriorValues_lowerBounds'] = []#This should be a list/array of the same shape as InputParameterPriorValues. Use a value of "None" for any parameter that should not be bounded in this direction.  The code then truncates any distribution to have a probability of ~0 when any of the parameters go outside of their bounds. ##As of May 4th 2020, this only has been checked for scaling_uncertainties_type = 'off'
 

diff --git a/Examples/Example07/mcmc_log_file.txt b/Examples/Example07/mcmc_log_file.txt
@@ -3,7 +3,7 @@ self.map_index:1196
 self.map_parameter_set:[1.4727147  3.95779236]
 self.mu_AP_parameter_set:[1.46310408 3.94220832]
 self.stdap_parameter_set:[0.66791756 0.70516203]
-self.info_gain:19.176120481200073
+self.info_gain:14.030743614130037
 evidence:1.9764225761475367e-05
 posterior_cov_matrix:
 [[0.44634878 0.02351822]

diff --git a/Examples/Example24/mcmc_log_file.txt b/Examples/Example24/mcmc_log_file.txt
@@ -3,7 +3,7 @@ self.map_index:1175
 self.map_parameter_set:[1.15763372e+01 1.31694180e+05]
 self.mu_AP_parameter_set:[1.16042481e+01 1.31646291e+05]
 self.stdap_parameter_set:[9.11484052e-01 1.00766352e+04]
-self.info_gain:1.7157813446368657
+self.info_gain:1.4825573634364662
 evidence:3.0665615155128266e-15
 posterior_cov_matrix:
 [[8.30895499e-01 7.44773417e+03]