From 6f764586a188dacd850360db949deaa9f25b5398 Mon Sep 17 00:00:00 2001
From: odunbar <odunbar@caltech.edu>
Date: Mon, 19 Aug 2024 17:17:25 -0700
Subject: [PATCH] format

---
 examples/Darcy/calibrate.jl      | 284 ++++++++++++++++---------------
 examples/Darcy/emulate_sample.jl |   9 +-
 src/MarkovChainMonteCarlo.jl     |   4 +-
 3 files changed, 156 insertions(+), 141 deletions(-)

diff --git a/examples/Darcy/calibrate.jl b/examples/Darcy/calibrate.jl
index 3043239d..4e2ca883 100644
--- a/examples/Darcy/calibrate.jl
+++ b/examples/Darcy/calibrate.jl
@@ -45,146 +45,164 @@ rng = Random.MersenneTwister(seed)
 
 
 function main()
-# Define the spatial domain and discretization 
-dim = 2
-N, L = 80, 1.0
-pts_per_dim = LinRange(0, L, N)
-obs_ΔN = 10
-
-# To provide a simple test case, we assume that the true function parameter is a particular sample from the function space we set up to define our prior. More precisely we choose a value of the truth that doesnt have a vanishingly small probability under the prior defined by a probability distribution over functions; here taken as a family of Gaussian Random Fields (GRF). The function distribution is characterized by a covariance function - here a Matern kernel which assumes a level of smoothness over the samples from the distribution. We define an appropriate expansion of this distribution, here based on the Karhunen-Loeve expansion (similar to an eigenvalue-eigenfunction expansion) that is truncated to a finite number of terms, known as the degrees of freedom (`dofs`). The `dofs` define the effective dimension of the learning problem, decoupled from the spatial discretization. Explicitly, larger `dofs` may be required to represent multiscale functions, but come at an increased dimension of the parameter space and therefore a typical increase in cost and difficulty of the learning problem.
-
-smoothness = 1.0
-corr_length = 0.25
-dofs = 5
-
-grf = GRF.GaussianRandomField(
-    GRF.CovarianceFunction(dim, GRF.Matern(smoothness, corr_length)),
-    GRF.KarhunenLoeve(dofs),
-    pts_per_dim,
-    pts_per_dim,
-)
-
-# We define a wrapper around the GRF, and as the permeability field must be positive we introduce a domain constraint into the function distribution. Henceforth, the GRF is interfaced in the same manner as any other parameter distribution with regards to interface.
-pkg = GRFJL()
-distribution = GaussianRandomFieldInterface(grf, pkg) # our wrapper from EKP
-domain_constraint = bounded_below(0) # make κ positive
-pd = ParameterDistribution(Dict("distribution" => distribution, "name" => "kappa", "constraint" => domain_constraint)) # the fully constrained parameter distribution
-
-# Now we have a function distribution, we sample a reasonably high-probability value from this distribution as a true value (here all degrees of freedom set with `u_{\mathrm{true}} = -0.5`). We use the EKP transform function to build the corresponding instance of the ``\kappa_{\mathrm{true}}``.
-u_true = -1.5 * ones(dofs, 1) # the truth parameter
-println("True coefficients: ")
-println(u_true)
-κ_true = transform_unconstrained_to_constrained(pd, u_true) # builds and constrains the function.  
-κ_true = reshape(κ_true, N, N)
-
-# Now we generate the data sample for the truth in a perfect model setting by evaluating the the model here, and observing it by subsampling in each dimension every `obs_ΔN` points, and add some observational noise
-darcy = Setup_Param(pts_per_dim, obs_ΔN, κ_true)
-println(" Number of observation points: $(darcy.N_y)")
-h_2d_true = solve_Darcy_2D(darcy, κ_true)
-y_noiseless = compute_obs(darcy, h_2d_true)
-obs_noise_cov = 0.05^2 * I(length(y_noiseless)) * (maximum(y_noiseless) - minimum(y_noiseless))
-truth_sample = vec(y_noiseless + rand(rng, MvNormal(zeros(length(y_noiseless)), obs_noise_cov)))
-
-
-# Now we set up the Bayesian inversion algorithm. The prior we have already defined to construct our truth
-prior = pd
-
-
-# We define some algorithm parameters, here we take ensemble members larger than the dimension of the parameter space
-N_ens = 50 # number of ensemble members
-N_iter = 5 # number of EKI iterations
-
-# We sample the initial ensemble from the prior, and create the EKP object as an EKI algorithm using the `Inversion()` keyword
-initial_params = construct_initial_ensemble(rng, prior, N_ens)
-ekiobj = EKP.EnsembleKalmanProcess(initial_params, truth_sample, obs_noise_cov, Inversion())
-
-# We perform the inversion loop. Remember that within calls to `get_ϕ_final` the EKP transformations are applied, thus the ensemble that is returned will be the positively-bounded permeability field evaluated at all the discretization points. 
-println("Begin inversion")
-err = []
-final_it = [N_iter]
-for i in 1:N_iter
-    params_i = get_ϕ_final(prior, ekiobj)
-    g_ens = run_G_ensemble(darcy, params_i)
-    terminate = EKP.update_ensemble!(ekiobj, g_ens)
-    push!(err, get_error(ekiobj)[end]) #mean((params_true - mean(params_i,dims=2)).^2)
-    println("Iteration: " * string(i) * ", Error: " * string(err[i]))
-    if !isnothing(terminate)
-        final_it[1] = i - 1
-        break
-    end
-end
-n_iter = final_it[1]
-# We plot first the prior ensemble mean and pointwise variance of the permeability field, and also the pressure field solved with the ensemble mean. Each ensemble member is stored as a column and therefore for uses such as plotting one needs to reshape to the desired dimension.
-if PLOT_FLAG
-    gr(size = (1500, 400), legend = false)
-    prior_κ_ens = get_ϕ(prior, ekiobj, 1)
-    κ_ens_mean = reshape(mean(prior_κ_ens, dims = 2), N, N)
-    p1 = contour(pts_per_dim, pts_per_dim, κ_ens_mean', fill = true, levels = 15, title = "kappa mean", colorbar = true)
-    κ_ens_ptw_var = reshape(var(prior_κ_ens, dims = 2), N, N)
-    p2 = contour(
-        pts_per_dim,
-        pts_per_dim,
-        κ_ens_ptw_var',
-        fill = true,
-        levels = 15,
-        title = "kappa var",
-        colorbar = true,
-    )
-    h_2d = solve_Darcy_2D(darcy, κ_ens_mean)
-    p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true)
-    l = @layout [a b c]
-    plt = plot(p1, p2, p3, layout = l)
-    savefig(plt, joinpath(fig_save_directory, "output_prior.png")) # pre update
+    # Define the spatial domain and discretization 
+    dim = 2
+    N, L = 80, 1.0
+    pts_per_dim = LinRange(0, L, N)
+    obs_ΔN = 10
 
-end
+    # To provide a simple test case, we assume that the true function parameter is a particular sample from the function space we set up to define our prior. More precisely we choose a value of the truth that doesnt have a vanishingly small probability under the prior defined by a probability distribution over functions; here taken as a family of Gaussian Random Fields (GRF). The function distribution is characterized by a covariance function - here a Matern kernel which assumes a level of smoothness over the samples from the distribution. We define an appropriate expansion of this distribution, here based on the Karhunen-Loeve expansion (similar to an eigenvalue-eigenfunction expansion) that is truncated to a finite number of terms, known as the degrees of freedom (`dofs`). The `dofs` define the effective dimension of the learning problem, decoupled from the spatial discretization. Explicitly, larger `dofs` may be required to represent multiscale functions, but come at an increased dimension of the parameter space and therefore a typical increase in cost and difficulty of the learning problem.
 
-# Now we plot the final ensemble mean and pointwise variance of the permeability field, and also the pressure field solved with the ensemble mean.
-if PLOT_FLAG
-    gr(size = (1500, 400), legend = false)
-    final_κ_ens = get_ϕ_final(prior, ekiobj) # the `ϕ` indicates that the `params_i` are in the constrained space
-    κ_ens_mean = reshape(mean(final_κ_ens, dims = 2), N, N)
-    p1 = contour(pts_per_dim, pts_per_dim, κ_ens_mean', fill = true, levels = 15, title = "kappa mean", colorbar = true)
-    κ_ens_ptw_var = reshape(var(final_κ_ens, dims = 2), N, N)
-    p2 = contour(
+    smoothness = 1.0
+    corr_length = 0.25
+    dofs = 5
+
+    grf = GRF.GaussianRandomField(
+        GRF.CovarianceFunction(dim, GRF.Matern(smoothness, corr_length)),
+        GRF.KarhunenLoeve(dofs),
         pts_per_dim,
         pts_per_dim,
-        κ_ens_ptw_var',
-        fill = true,
-        levels = 15,
-        title = "kappa var",
-        colorbar = true,
     )
-    h_2d = solve_Darcy_2D(darcy, κ_ens_mean)
-    p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true)
-    l = @layout [a b c]
-    plt = plot(p1, p2, p3; layout = l)
-    savefig(plt, joinpath(fig_save_directory, "output_it_" * string(n_iter) * ".png")) # pre update
 
-end
-println("Final coefficients (ensemble mean):")
-println(get_u_mean_final(ekiobj))
+    # We define a wrapper around the GRF, and as the permeability field must be positive we introduce a domain constraint into the function distribution. Henceforth, the GRF is interfaced in the same manner as any other parameter distribution with regards to interface.
+    pkg = GRFJL()
+    distribution = GaussianRandomFieldInterface(grf, pkg) # our wrapper from EKP
+    domain_constraint = bounded_below(0) # make κ positive
+    pd = ParameterDistribution(
+        Dict("distribution" => distribution, "name" => "kappa", "constraint" => domain_constraint),
+    ) # the fully constrained parameter distribution
+
+    # Now we have a function distribution, we sample a reasonably high-probability value from this distribution as a true value (here all degrees of freedom set with `u_{\mathrm{true}} = -0.5`). We use the EKP transform function to build the corresponding instance of the ``\kappa_{\mathrm{true}}``.
+    u_true = -1.5 * ones(dofs, 1) # the truth parameter
+    println("True coefficients: ")
+    println(u_true)
+    κ_true = transform_unconstrained_to_constrained(pd, u_true) # builds and constrains the function.  
+    κ_true = reshape(κ_true, N, N)
+
+    # Now we generate the data sample for the truth in a perfect model setting by evaluating the the model here, and observing it by subsampling in each dimension every `obs_ΔN` points, and add some observational noise
+    darcy = Setup_Param(pts_per_dim, obs_ΔN, κ_true)
+    println(" Number of observation points: $(darcy.N_y)")
+    h_2d_true = solve_Darcy_2D(darcy, κ_true)
+    y_noiseless = compute_obs(darcy, h_2d_true)
+    obs_noise_cov = 0.05^2 * I(length(y_noiseless)) * (maximum(y_noiseless) - minimum(y_noiseless))
+    truth_sample = vec(y_noiseless + rand(rng, MvNormal(zeros(length(y_noiseless)), obs_noise_cov)))
+
+
+    # Now we set up the Bayesian inversion algorithm. The prior we have already defined to construct our truth
+    prior = pd
+
+
+    # We define some algorithm parameters, here we take ensemble members larger than the dimension of the parameter space
+    N_ens = 50 # number of ensemble members
+    N_iter = 5 # number of EKI iterations
+
+    # We sample the initial ensemble from the prior, and create the EKP object as an EKI algorithm using the `Inversion()` keyword
+    initial_params = construct_initial_ensemble(rng, prior, N_ens)
+    ekiobj = EKP.EnsembleKalmanProcess(initial_params, truth_sample, obs_noise_cov, Inversion())
+
+    # We perform the inversion loop. Remember that within calls to `get_ϕ_final` the EKP transformations are applied, thus the ensemble that is returned will be the positively-bounded permeability field evaluated at all the discretization points. 
+    println("Begin inversion")
+    err = []
+    final_it = [N_iter]
+    for i in 1:N_iter
+        params_i = get_ϕ_final(prior, ekiobj)
+        g_ens = run_G_ensemble(darcy, params_i)
+        terminate = EKP.update_ensemble!(ekiobj, g_ens)
+        push!(err, get_error(ekiobj)[end]) #mean((params_true - mean(params_i,dims=2)).^2)
+        println("Iteration: " * string(i) * ", Error: " * string(err[i]))
+        if !isnothing(terminate)
+            final_it[1] = i - 1
+            break
+        end
+    end
+    n_iter = final_it[1]
+    # We plot first the prior ensemble mean and pointwise variance of the permeability field, and also the pressure field solved with the ensemble mean. Each ensemble member is stored as a column and therefore for uses such as plotting one needs to reshape to the desired dimension.
+    if PLOT_FLAG
+        gr(size = (1500, 400), legend = false)
+        prior_κ_ens = get_ϕ(prior, ekiobj, 1)
+        κ_ens_mean = reshape(mean(prior_κ_ens, dims = 2), N, N)
+        p1 = contour(
+            pts_per_dim,
+            pts_per_dim,
+            κ_ens_mean',
+            fill = true,
+            levels = 15,
+            title = "kappa mean",
+            colorbar = true,
+        )
+        κ_ens_ptw_var = reshape(var(prior_κ_ens, dims = 2), N, N)
+        p2 = contour(
+            pts_per_dim,
+            pts_per_dim,
+            κ_ens_ptw_var',
+            fill = true,
+            levels = 15,
+            title = "kappa var",
+            colorbar = true,
+        )
+        h_2d = solve_Darcy_2D(darcy, κ_ens_mean)
+        p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true)
+        l = @layout [a b c]
+        plt = plot(p1, p2, p3, layout = l)
+        savefig(plt, joinpath(fig_save_directory, "output_prior.png")) # pre update
 
-# We can compare this with the true permeability and pressure field: 
-if PLOT_FLAG
-    gr(size = (1000, 400), legend = false)
-    p1 = contour(pts_per_dim, pts_per_dim, κ_true', fill = true, levels = 15, title = "kappa true", colorbar = true)
-    p2 = contour(
-        pts_per_dim,
-        pts_per_dim,
-        h_2d_true',
-        fill = true,
-        levels = 15,
-        title = "pressure true",
-        colorbar = true,
-    )
-    l = @layout [a b]
-    plt = plot(p1, p2, layout = l)
-    savefig(plt, joinpath(fig_save_directory, "output_true.png"))
-end
+    end
+
+    # Now we plot the final ensemble mean and pointwise variance of the permeability field, and also the pressure field solved with the ensemble mean.
+    if PLOT_FLAG
+        gr(size = (1500, 400), legend = false)
+        final_κ_ens = get_ϕ_final(prior, ekiobj) # the `ϕ` indicates that the `params_i` are in the constrained space
+        κ_ens_mean = reshape(mean(final_κ_ens, dims = 2), N, N)
+        p1 = contour(
+            pts_per_dim,
+            pts_per_dim,
+            κ_ens_mean',
+            fill = true,
+            levels = 15,
+            title = "kappa mean",
+            colorbar = true,
+        )
+        κ_ens_ptw_var = reshape(var(final_κ_ens, dims = 2), N, N)
+        p2 = contour(
+            pts_per_dim,
+            pts_per_dim,
+            κ_ens_ptw_var',
+            fill = true,
+            levels = 15,
+            title = "kappa var",
+            colorbar = true,
+        )
+        h_2d = solve_Darcy_2D(darcy, κ_ens_mean)
+        p3 = contour(pts_per_dim, pts_per_dim, h_2d', fill = true, levels = 15, title = "pressure", colorbar = true)
+        l = @layout [a b c]
+        plt = plot(p1, p2, p3; layout = l)
+        savefig(plt, joinpath(fig_save_directory, "output_it_" * string(n_iter) * ".png")) # pre update
+
+    end
+    println("Final coefficients (ensemble mean):")
+    println(get_u_mean_final(ekiobj))
+
+    # We can compare this with the true permeability and pressure field: 
+    if PLOT_FLAG
+        gr(size = (1000, 400), legend = false)
+        p1 = contour(pts_per_dim, pts_per_dim, κ_true', fill = true, levels = 15, title = "kappa true", colorbar = true)
+        p2 = contour(
+            pts_per_dim,
+            pts_per_dim,
+            h_2d_true',
+            fill = true,
+            levels = 15,
+            title = "pressure true",
+            colorbar = true,
+        )
+        l = @layout [a b]
+        plt = plot(p1, p2, layout = l)
+        savefig(plt, joinpath(fig_save_directory, "output_true.png"))
+    end
 
-# Finally the data is saved
-u_stored = get_u(ekiobj, return_array = false)
-g_stored = get_g(ekiobj, return_array = false)
+    # Finally the data is saved
+    u_stored = get_u(ekiobj, return_array = false)
+    g_stored = get_g(ekiobj, return_array = false)
 
     save(
         joinpath(data_save_directory, "calibrate_results.jld2"),
@@ -201,7 +219,7 @@ g_stored = get_g(ekiobj, return_array = false)
         "truth_input_constrained", # the discrete true parameter field
         κ_true,
         "truth_input_unconstrained", # the discrete true KL coefficients
-        u_true, 
+        u_true,
     )
 end
 
diff --git a/examples/Darcy/emulate_sample.jl b/examples/Darcy/emulate_sample.jl
index 655b46bd..7557eb56 100644
--- a/examples/Darcy/emulate_sample.jl
+++ b/examples/Darcy/emulate_sample.jl
@@ -39,7 +39,7 @@ function main()
             "cov_sample_multiplier" => 0.5,
             "n_iteration" => 20,
             "n_ensemble" => 120,
-            "localization" => Localizers.SECNice(1000,0.1,1.0),
+            "localization" => Localizers.SECNice(1000, 0.1, 1.0),
         )
         # we do not want termination, as our priors have relatively little interpretation
 
@@ -90,12 +90,9 @@ function main()
         # choice of machine-learning tool in the emulation stage
         nugget = 1e-12
         if case == "GP"
-#            gppackage = Emulators.SKLJL()
+            #            gppackage = Emulators.SKLJL()
             gppackage = Emulators.GPJL()
-            mlt = GaussianProcess(
-                gppackage;
-                noise_learn = false,
-            )
+            mlt = GaussianProcess(gppackage; noise_learn = false)
         elseif case ∈ ["RF-vector-svd-nonsep"]
             kernel_structure = NonseparableKernel(LowRankFactor(1, nugget))
             n_features = 100
diff --git a/src/MarkovChainMonteCarlo.jl b/src/MarkovChainMonteCarlo.jl
index 50be13c9..dd84f8be 100644
--- a/src/MarkovChainMonteCarlo.jl
+++ b/src/MarkovChainMonteCarlo.jl
@@ -610,12 +610,12 @@ function get_posterior(mcmc::MCMCWrapper, chain::MCMCChains.Chains)
     # live in same space as prior
     # checks if a function distribution, by looking at if the distribution is nested
     p_constraints = [
-        !isa(get_distribution(mcmc.prior)[pn],ParameterDistribution) ? # if not func-dist
+        !isa(get_distribution(mcmc.prior)[pn], ParameterDistribution) ? # if not func-dist
         flat_constraints[slice] : # constraints are slice
         get_all_constraints(get_distribution(mcmc.prior)[pn]) # get constraints of nested dist
         for (pn, slice) in zip(p_names, p_slices)
     ]
-    
+
     # distributions created as atoms and pieced together
     posterior_distribution = combine_distributions([
         ParameterDistribution(ps, pc, pn) for (ps, pc, pn) in zip(p_samples, p_constraints, p_names)