From 90adbe56e876a15c6f7f51133dd32756e5c0df5a Mon Sep 17 00:00:00 2001
From: Oliver Dunbar <47412152+odunbar@users.noreply.github.com>
Date: Tue, 13 Feb 2024 15:57:33 -0800
Subject: [PATCH] Orad/fix emulator bug repeat call (#281)

* bugfix repeated calls, by skipping build

* typo in test

plots, repeats, more tweaks

format

varying dimension is easier

ens propto dim

adds recompute_cov and inflation/localization for scalar RF

different plots and configs with localization etc.

exp to fit paper results

add statsbase

add plotting when no repeats

add coeffs, hardcode some restart options in scalar RF

add coeffs, hardcode some restart options in scalar RF

kron add regularization matrix

save states and replotting from file

prior plots for l63

added save+plot for GFunc

add prior case

add save+plot to ishigami

updated plotting for EDMF

remove coeffs from loss again

JLD2 in projects

config update

format

tests passing

add info statement
---
 examples/EDMF_data/plot_posterior.jl        | 120 +++++++-
 examples/EDMF_data/uq_for_edmf.jl           |  58 ++--
 examples/Emulator/G-function/Project.toml   |  13 +
 examples/Emulator/G-function/emulate.jl     | 317 ++++++++++++++++++++
 examples/Emulator/G-function/plot_result.jl | 136 +++++++++
 examples/Emulator/Ishigami/emulate.jl       |  16 +
 examples/Emulator/Ishigami/plot_result.jl   |  92 ++++++
 examples/Emulator/L63/Project.toml          |   2 +
 examples/Emulator/L63/emulate.jl            |  51 +++-
 examples/Emulator/L63/plot_results.jl       | 112 +++++++
 src/Emulator.jl                             |   4 +-
 src/RandomFeature.jl                        |  79 ++++-
 src/ScalarRandomFeature.jl                  |  93 ++++--
 src/VectorRandomFeature.jl                  |  34 +--
 test/RandomFeature/runtests.jl              |   2 -
 15 files changed, 1022 insertions(+), 107 deletions(-)
 create mode 100644 examples/Emulator/G-function/Project.toml
 create mode 100644 examples/Emulator/G-function/emulate.jl
 create mode 100644 examples/Emulator/G-function/plot_result.jl
 create mode 100644 examples/Emulator/Ishigami/plot_result.jl
 create mode 100644 examples/Emulator/L63/plot_results.jl

diff --git a/examples/EDMF_data/plot_posterior.jl b/examples/EDMF_data/plot_posterior.jl
index 79ac4d409..eab0eb71a 100644
--- a/examples/EDMF_data/plot_posterior.jl
+++ b/examples/EDMF_data/plot_posterior.jl
@@ -9,23 +9,112 @@ using Dates
 # CES 
 using CalibrateEmulateSample.ParameterDistributions
 
+#####
+# Creates 1 plots: One for a specific case, One with 2 cases, and One with all cases (final case being the prior).
+
 
 # date = Date(year,month,day)
 
 # 2-parameter calibration exp
 #exp_name = "ent-det-calibration"
-#date_of_run = Date(2023, 10, 17)
+#date_of_run = Date(2023, 10, 5)
 
 # 5-parameter calibration exp
 exp_name = "ent-det-tked-tkee-stab-calibration"
-date_of_run = Date(2024, 2, 2)
+date_of_run = Date(2024, 06, 14)
 
 # Output figure read/write directory
 figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run))
 data_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run))
 
+#case:
+cases = [
+    "GP", # diagonalize, train scalar GP, assume diag inputs
+    "RF-prior",
+    "RF-vector-svd-nonsep",
+]
+case_rf = cases[3]
+
+# load
+posterior_filepath = joinpath(data_save_directory, "$(case_rf)_posterior.jld2")
+if !isfile(posterior_filepath)
+    throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date"))
+else
+    println("Loading posterior distribution from: " * posterior_filepath)
+    posterior = load(posterior_filepath)["posterior"]
+end
+# get samples explicitly (may be easier to work with)
+posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns
+transformed_posterior_samples =
+    mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1)
+
+# histograms
+nparam_plots = sum(get_dimensions(posterior)) - 1
+density_filepath = joinpath(figure_save_directory, "$(case_rf)_posterior_dist_comp.png")
+transformed_density_filepath = joinpath(figure_save_directory, "$(case_rf)_posterior_dist_phys.png")
+labels = get_name(posterior)
+
+burnin = 50_000
+
+data_rf = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...)
+transformed_data_rf =
+    (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...)
+
+p = pairplot(data_rf => (PairPlots.Contourf(sigmas = 1:1:3),))
+trans_p = pairplot(transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),))
+
+save(density_filepath, p)
+save(transformed_density_filepath, trans_p)
+
+#
+#
+#
+
+case_gp = cases[1]
+# load
+posterior_filepath = joinpath(data_save_directory, "$(case_gp)_posterior.jld2")
+if !isfile(posterior_filepath)
+    throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date"))
+else
+    println("Loading posterior distribution from: " * posterior_filepath)
+    posterior = load(posterior_filepath)["posterior"]
+end
+# get samples explicitly (may be easier to work with)
+posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns
+transformed_posterior_samples =
+    mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1)
+
+# histograms
+nparam_plots = sum(get_dimensions(posterior)) - 1
+density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_posterior_dist_comp.png")
+transformed_density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_posterior_dist_phys.png")
+labels = get_name(posterior)
+data_gp = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...)
+transformed_data_gp =
+    (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...)
+#
+#
+#
+gp_smoothing = 1 # >1 = smoothing KDE in plotting
+
+p = pairplot(
+    data_rf => (PairPlots.Contourf(sigmas = 1:1:3),),
+    data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),),
+)
+trans_p = pairplot(
+    transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),),
+    transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),),
+)
+
+save(density_filepath, p)
+save(transformed_density_filepath, trans_p)
+
+
+
+# Finally include the prior too
+case_prior = cases[2]
 # load
-posterior_filepath = joinpath(data_save_directory, "posterior.jld2")
+posterior_filepath = joinpath(data_save_directory, "$(case_prior)_posterior.jld2")
 if !isfile(posterior_filepath)
     throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date"))
 else
@@ -39,15 +128,28 @@ transformed_posterior_samples =
 
 # histograms
 nparam_plots = sum(get_dimensions(posterior)) - 1
-density_filepath = joinpath(figure_save_directory, "posterior_dist_comp.png")
-transformed_density_filepath = joinpath(figure_save_directory, "posterior_dist_phys.png")
+density_filepath = joinpath(figure_save_directory, "all_posterior_dist_comp.png")
+transformed_density_filepath = joinpath(figure_save_directory, "all_posterior_dist_phys.png")
 labels = get_name(posterior)
 
-data = (; [(Symbol(labels[i]), posterior_samples[i, :]) for i in 1:length(labels)]...)
-transformed_data = (; [(Symbol(labels[i]), transformed_posterior_samples[i, :]) for i in 1:length(labels)]...)
+data_prior = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...)
+transformed_data_prior =
+    (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...)
+#
+#
+#
+
+p = pairplot(
+    data_rf => (PairPlots.Contourf(sigmas = 1:1:3),),
+    data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),),
+    data_prior => (PairPlots.Scatter(),),
+)
+trans_p = pairplot(
+    transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),),
+    transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),),
+    transformed_data_prior => (PairPlots.Scatter(),),
+)
 
-p = pairplot(data => (PairPlots.Scatter(),))
-trans_p = pairplot(transformed_data => (PairPlots.Scatter(),))
 save(density_filepath, p)
 save(transformed_density_filepath, trans_p)
 
diff --git a/examples/EDMF_data/uq_for_edmf.jl b/examples/EDMF_data/uq_for_edmf.jl
index ea2929278..3c1fe10e6 100644
--- a/examples/EDMF_data/uq_for_edmf.jl
+++ b/examples/EDMF_data/uq_for_edmf.jl
@@ -34,6 +34,12 @@ function main()
     # 5-parameter calibration exp
     exp_name = "ent-det-tked-tkee-stab-calibration"
 
+    cases = [
+        "GP", # diagonalize, train scalar GP, assume diag inputs
+        "RF-prior",
+        "RF-vector-svd-nonsep",
+    ]
+    case = cases[1]
 
     # Output figure save directory
     figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(Dates.today()))
@@ -120,8 +126,7 @@ function main()
             println("plotting ensembles...")
             for plot_i in 1:size(outputs, 1)
                 p = scatter(inputs_constrained[1, :], inputs_constrained[2, :], zcolor = outputs[plot_i, :])
-                savefig(p, joinpath(figure_save_directory, "output_" * string(plot_i) * ".png"))
-                savefig(p, joinpath(figure_save_directory, "output_" * string(plot_i) * ".pdf"))
+                savefig(p, joinpath(figure_save_directory, "$(case)_output_" * string(plot_i) * ".png"))
             end
             println("finished plotting ensembles.")
         end
@@ -200,17 +205,24 @@ function main()
     println("Begin Emulation stage")
     # Create GP object
 
-    cases = [
-        "GP", # diagonalize, train scalar GP, assume diag inputs
-        "RF-vector-svd-nonsep",
-        "RF-vector-nosvd-nonsep",  # don't perform decorrelation  
-    ]
-    case = cases[3]
-    n_repeats = 2
-
-    opt_diagnostics = []
-    emulators = []
-    for rep_idx in 1:n_repeats
+    overrides = Dict(
+        "verbose" => true,
+        "train_fraction" => 0.85,
+        "scheduler" => DataMisfitController(terminate_at = 100),
+        "cov_sample_multiplier" => 1.0,
+        "n_iteration" => 15,
+        "n_features_opt" => 200,
+        "localization" => SEC(0.05),
+    )
+    if case == "RF-prior"
+        overrides = Dict("verbose" => true, "cov_sample_multiplier" => 0.01, "n_iteration" => 0)
+    end
+    nugget = 1e-6
+    rng_seed = 99330
+    rng = Random.MersenneTwister(rng_seed)
+    input_dim = size(get_inputs(input_output_pairs), 1)
+    output_dim = size(get_outputs(input_output_pairs), 1)
+    if case == "GP"
 
         overrides = Dict(
             "verbose" => true,
@@ -222,13 +234,9 @@ function main()
             #    "n_ensemble" => 20,
             #           "localization" => SEC(1.0, 0.01), # localization / sample error correction for small ensembles
         )
-        nugget = 1e-10#1e-12#0.01
-        rng_seed = 99330
-        rng = Random.MersenneTwister(rng_seed)
-        input_dim = size(get_inputs(input_output_pairs), 1)
-        output_dim = size(get_outputs(input_output_pairs), 1)
-        decorrelate = true
-        if case == "GP"
+    elseif case ∈ ["RF-vector-svd-nonsep", "RF-prior"]
+        kernel_structure = NonseparableKernel(LowRankFactor(1, nugget))
+        n_features = 500
 
             gppackage = Emulators.SKLJL()
             pred_type = Emulators.YType()
@@ -312,7 +320,7 @@ function main()
 
     end
 
-    emulator_filepath = joinpath(data_save_directory, "emulator.jld2")
+    emulator_filepath = joinpath(data_save_directory, "$(case)_emulator.jld2")
     save(emulator_filepath, "emulator", emulator)
 
     println("Finished Emulation stage")
@@ -328,17 +336,17 @@ function main()
     # determine a good step size
     yt_sample = y_truth
     mcmc = MCMCWrapper(RWMHSampling(), yt_sample, prior, emulator; init_params = u0)
-    new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0)
+    new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 5000, discard_initial = 0)
 
     # Now begin the actual MCMC
     println("Begin MCMC - with step size ", new_step)
-    chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000)
+    chain = MarkovChainMonteCarlo.sample(mcmc, 300_000; stepsize = new_step, discard_initial = 2_000)
     posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain)
 
-    mcmc_filepath = joinpath(data_save_directory, "mcmc_and_chain.jld2")
+    mcmc_filepath = joinpath(data_save_directory, "$(case)_mcmc_and_chain.jld2")
     save(mcmc_filepath, "mcmc", mcmc, "chain", chain)
 
-    posterior_filepath = joinpath(data_save_directory, "posterior.jld2")
+    posterior_filepath = joinpath(data_save_directory, "$(case)_posterior.jld2")
     save(posterior_filepath, "posterior", posterior)
 
     println("Finished Sampling stage")
diff --git a/examples/Emulator/G-function/Project.toml b/examples/Emulator/G-function/Project.toml
new file mode 100644
index 000000000..7d7a6fa85
--- /dev/null
+++ b/examples/Emulator/G-function/Project.toml
@@ -0,0 +1,13 @@
+[deps]
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
+ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
+GlobalSensitivityAnalysis = "1b10255b-6da3-57ce-9089-d24e8517b87e"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RandomFeatures = "36c3bae2-c0c3-419d-b3b4-eebadd35c5e5"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/examples/Emulator/G-function/emulate.jl b/examples/Emulator/G-function/emulate.jl
new file mode 100644
index 000000000..84706b2c1
--- /dev/null
+++ b/examples/Emulator/G-function/emulate.jl
@@ -0,0 +1,317 @@
+
+using GlobalSensitivityAnalysis
+const GSA = GlobalSensitivityAnalysis
+using Distributions
+using DataStructures
+using Random
+using LinearAlgebra
+import StatsBase: percentile
+using JLD2
+
+using CalibrateEmulateSample.EnsembleKalmanProcesses
+using CalibrateEmulateSample.Emulators
+using CalibrateEmulateSample.DataContainers
+using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers
+
+using CairoMakie, ColorSchemes #for plots
+seed = 2589436
+
+output_directory = joinpath(@__DIR__, "output")
+if !isdir(output_directory)
+    mkdir(output_directory)
+end
+
+
+inner_func(x::AV, a::AV) where {AV <: AbstractVector} = prod((abs.(4 * x .- 2) + a) ./ (1 .+ a))
+
+"G-Function taken from https://www.sfu.ca/~ssurjano/gfunc.html"
+function GFunction(x::AM, a::AV) where {AM <: AbstractMatrix, AV <: AbstractVector}
+    @assert size(x, 1) == length(a)
+    return mapslices(y -> inner_func(y, a), x; dims = 1) #applys the map to columns
+end
+
+function GFunction(x::AM) where {AM <: AbstractMatrix}
+    a = [(i - 1.0) / 2.0 for i in 1:size(x, 1)]
+    return GFunction(x, a)
+end
+
+function main()
+
+    rng = MersenneTwister(seed)
+
+    n_repeats = 3 # repeat exp with same data.
+    n_dimensions = 3
+    # To create the sampling
+    n_data_gen = 800
+
+    data =
+        SobolData(params = OrderedDict([Pair(Symbol("x", i), Uniform(0, 1)) for i in 1:n_dimensions]), N = n_data_gen)
+
+    # To perform global analysis,
+    # one must generate samples using Sobol sequence (i.e. creates more than N points)
+    samples = GSA.sample(data)
+    n_data = size(samples, 1) # [n_samples x n_dim]
+    println("number of sobol points: ", n_data)
+    # run model (example)
+    y = GFunction(samples')' # G is applied to columns
+    # perform Sobol Analysis
+    result = analyze(data, y)
+
+    # plot the first 3 dimensions
+    plot_dim = n_dimensions >= 3 ? 3 : n_dimensions
+    f1 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4)
+    for i in 1:plot_dim
+        ax = Axis(f1[1, i], xlabel = "x" * string(i), ylabel = "f")
+        scatter!(ax, samples[:, i], y[:], color = :orange)
+    end
+
+    CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).png"), f1, px_per_unit = 3)
+    CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).pdf"), f1, px_per_unit = 3)
+
+    n_train_pts = n_dimensions * 250
+    ind = shuffle!(rng, Vector(1:n_data))[1:n_train_pts]
+    # now subsample the samples data
+    n_tp = length(ind)
+    input = zeros(n_dimensions, n_tp)
+    output = zeros(1, n_tp)
+    Γ = 1e-3
+    noise = rand(rng, Normal(0, Γ), n_tp)
+    for i in 1:n_tp
+        input[:, i] = samples[ind[i], :]
+        output[i] = y[ind[i]] + noise[i]
+    end
+    iopairs = PairedDataContainer(input, output)
+
+    # analytic sobol indices taken from
+    # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8989694/pdf/main.pdf
+    a = [(i - 1.0) / 2.0 for i in 1:n_dimensions]  # a_i < a_j => a_i more sensitive
+    prod_tmp = prod(1 .+ 1 ./ (3 .* (1 .+ a) .^ 2)) - 1
+    V = [(1 / (3 * (1 + ai)^2)) / prod_tmp for ai in a]
+    prod_tmp2 = [prod(1 .+ 1 ./ (3 .* (1 .+ a[1:end .!== j]) .^ 2)) for j in 1:n_dimensions]
+    TV = [(1 / (3 * (1 + ai)^2)) * prod_tmp2[i] / prod_tmp for (i, ai) in enumerate(a)]
+
+
+
+    cases = ["Prior", "GP", "RF-scalar"]
+    case = cases[3]
+    decorrelate = true
+    nugget = Float64(1e-12)
+
+    overrides = Dict(
+        "verbose" => true,
+        "scheduler" => DataMisfitController(terminate_at = 1e2),
+        "n_features_opt" => 300,
+        "train_fraction" => 0.8,#0.7
+        "n_iteration" => 20, # (=multiple of recompute_cov_at - 1 is most efficient)
+        "cov_sample_multiplier" => 1.0,
+        #        "localization" => SEC(0.1),#,Doesnt help much tbh
+        #        "accelerator" => NesterovAccelerator(),
+        "n_ensemble" => 200, #40*n_dimensions,
+        # THIS currently needs to be set in src if used: "recompute_cov_at" => 10, 
+    )
+    if case == "Prior"
+        # don't do anything
+        overrides["n_iteration"] = 0
+        overrides["cov_sample_multiplier"] = 0.1
+    end
+
+    y_preds = []
+    result_preds = []
+
+    for rep_idx in 1:n_repeats
+        @info "Repeat: $(rep_idx)"
+        # Build ML tools
+        if case == "GP"
+            gppackage = Emulators.SKLJL()
+            pred_type = Emulators.YType()
+            mlt = GaussianProcess(gppackage; prediction_type = pred_type, noise_learn = false)
+
+        elseif case ∈ ["RF-scalar", "Prior"]
+            rank = n_dimensions #<= 10 ? n_dimensions : 10
+            kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor())
+            n_features = n_dimensions <= 10 ? n_dimensions * 100 : 1000
+            if (n_features / n_train_pts > 0.9) && (n_features / n_train_pts < 1.1)
+                @warn "The number of features similar to the number of training points, poor performance expected, change one or other of these"
+            end
+            mlt = ScalarRandomFeatureInterface(
+                n_features,
+                n_dimensions,
+                rng = rng,
+                kernel_structure = kernel_structure,
+                optimizer_options = deepcopy(overrides),
+            )
+        end
+
+        # Emulate
+        emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate)
+        optimize_hyperparameters!(emulator)
+
+        # predict on all Sobol points with emulator (example)    
+        y_pred, y_var = predict(emulator, samples', transform_to_real = true)
+
+        # obtain emulated Sobol indices
+        result_pred = analyze(data, y_pred')
+        println("First order: ", result_pred[:firstorder])
+        println("Total order: ", result_pred[:totalorder])
+
+        push!(y_preds, y_pred)
+        push!(result_preds, result_pred)
+        GC.gc() #collect garbage
+
+        # PLotting:
+        if rep_idx == 1
+            f3, ax3, plt3 = scatter(
+                1:n_dimensions,
+                result_preds[1][:firstorder];
+                color = :red,
+                markersize = 8,
+                marker = :cross,
+                label = "V-emulate",
+                title = "input dimension: $(n_dimensions)",
+            )
+            scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx")
+            scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true")
+            scatter!(
+                ax3,
+                1:n_dimensions,
+                result_preds[1][:totalorder];
+                color = :blue,
+                label = "TV-emulate",
+                markersize = 8,
+                marker = :cross,
+            )
+            scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx")
+            scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true")
+            axislegend(ax3)
+
+            CairoMakie.save(
+                joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"),
+                f3,
+                px_per_unit = 3,
+            )
+            CairoMakie.save(
+                joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"),
+                f3,
+                px_per_unit = 3,
+            )
+        else
+            # get percentiles:
+            fo_mat = zeros(n_dimensions, rep_idx)
+            to_mat = zeros(n_dimensions, rep_idx)
+
+            for (idx, rp) in enumerate(result_preds)
+                fo_mat[:, idx] = rp[:firstorder]
+                to_mat[:, idx] = rp[:totalorder]
+            end
+
+            firstorder_med = percentile.(eachrow(fo_mat), 50)
+            firstorder_low = percentile.(eachrow(fo_mat), 5)
+            firstorder_up = percentile.(eachrow(fo_mat), 95)
+
+            totalorder_med = percentile.(eachrow(to_mat), 50)
+            totalorder_low = percentile.(eachrow(to_mat), 5)
+            totalorder_up = percentile.(eachrow(to_mat), 95)
+
+            println("(50%) firstorder: ", firstorder_med)
+            println("(5%)  firstorder: ", firstorder_low)
+            println("(95%)  firstorder: ", firstorder_up)
+
+            println("(50%) totalorder: ", totalorder_med)
+            println("(5%)  totalorder: ", totalorder_low)
+            println("(95%)  totalorder: ", totalorder_up)
+            #
+            f3, ax3, plt3 = errorbars(
+                1:n_dimensions,
+                firstorder_med,
+                firstorder_med - firstorder_low,
+                firstorder_up - firstorder_med;
+                whiskerwidth = 10,
+                color = :red,
+                label = "V-emulate",
+                title = "input dimension: $(n_dimensions)",
+            )
+            scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx")
+            scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true")
+            errorbars!(
+                ax3,
+                1:n_dimensions,
+                totalorder_med,
+                totalorder_med - totalorder_low,
+                totalorder_up - totalorder_med;
+                whiskerwidth = 10,
+                color = :blue,
+                label = "TV-emulate",
+            )
+            scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx")
+            scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true")
+            axislegend(ax3)
+
+            CairoMakie.save(
+                joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"),
+                f3,
+                px_per_unit = 3,
+            )
+            CairoMakie.save(
+                joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"),
+                f3,
+                px_per_unit = 3,
+            )
+
+        end
+        # plots - first 3 dimensions
+        if rep_idx == 1
+            f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4)
+            for i in 1:plot_dim
+                ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f")
+                scatter!(ax2, samples[:, i], y_preds[1][:], color = :blue)
+                scatter!(ax2, samples[ind, i], y[ind] + noise, color = :red, markersize = 8)
+            end
+            CairoMakie.save(
+                joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png"),
+                f2,
+                px_per_unit = 3,
+            )
+            CairoMakie.save(
+                joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf"),
+                f2,
+                px_per_unit = 3,
+            )
+        end
+    end
+
+    println(" ")
+    println("True Sobol Indices")
+    println("******************")
+    println("    firstorder: ", V)
+    println("    totalorder: ", TV)
+    println(" ")
+    println("Sampled truth Sobol Indices (# points $n_data)")
+    println("***************************")
+    println("    firstorder: ", result[:firstorder])
+    println("    totalorder: ", result[:totalorder])
+    println(" ")
+
+    println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $Γ)")
+    println("***************************************************************")
+
+
+    jldsave(
+        joinpath(output_directory, "Gfunction_$(case)_$(n_dimensions).jld2");
+        sobol_pts = samples,
+        train_idx = ind,
+        analytic_V = V,
+        analytic_TV = TV,
+        estimated_sobol = result,
+        mlt_sobol = result_preds,
+        mlt_pred_y = y_preds,
+        true_y = y,
+        noise_y = Γ,
+        observed_y = output,
+    )
+
+
+    return y_preds, result_preds
+end
+
+
+main()
diff --git a/examples/Emulator/G-function/plot_result.jl b/examples/Emulator/G-function/plot_result.jl
new file mode 100644
index 000000000..17cc02308
--- /dev/null
+++ b/examples/Emulator/G-function/plot_result.jl
@@ -0,0 +1,136 @@
+using CairoMakie, ColorSchemes #for plots
+
+
+
+function main()
+
+    output_directory = "output"
+    cases = ["Prior", "GP", "RF-scalar"]
+    case = cases[3]
+    n_dimensions = 3
+    filename = joinpath(output_directory, "Gfunction_$(case)_$(n_dimensions).jld2")
+
+    (
+        sobol_pts,
+        train_idx,
+        mlt_pred_y,
+        mlt_sobol,
+        analytic_V,
+        analytic_TV,
+        true_y,
+        noise_y,
+        observed_y,
+        estimated_sobol,
+    ) = load(
+        filename,
+        "sobol_pts",
+        "train_idx",
+        "mlt_pred_y",
+        "mlt_sobol",
+        "analytic_V",
+        "analytic_TV",
+        "true_y",
+        "noise_y",
+        "observed_y",
+        "estimated_sobol",
+    )
+
+    n_repeats = length(mlt_sobol)
+
+    if n_repeats == 1
+        f3, ax3, plt3 = scatter(
+            1:n_dimensions,
+            mlt_sobol[1][:firstorder];
+            color = :red,
+            markersize = 8,
+            marker = :cross,
+            label = "V-emulate",
+            title = "input dimension: $(n_dimensions)",
+        )
+        scatter!(ax3, estimated_sobol[:firstorder], color = :red, markersize = 8, label = "V-approx")
+        scatter!(ax3, analytic_V, color = :red, markersize = 12, marker = :xcross, label = "V-true")
+        scatter!(
+            ax3,
+            1:n_dimensions,
+            mlt_sobol[1][:totalorder];
+            color = :blue,
+            label = "TV-emulate",
+            markersize = 8,
+            marker = :cross,
+        )
+        scatter!(ax3, estimated_sobol[:totalorder], color = :blue, markersize = 8, label = "TV-approx")
+        scatter!(ax3, analytic_TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true")
+        axislegend(ax3)
+
+        CairoMakie.save(joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), f3, px_per_unit = 3)
+        CairoMakie.save(joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), f3, px_per_unit = 3)
+    else
+        # get percentiles:
+        fo_mat = zeros(n_dimensions, n_repeats)
+        to_mat = zeros(n_dimensions, n_repeats)
+
+        for (idx, rp) in enumerate(mlt_sobol)
+            fo_mat[:, idx] = rp[:firstorder]
+            to_mat[:, idx] = rp[:totalorder]
+        end
+
+        firstorder_med = percentile.(eachrow(fo_mat), 50)
+        firstorder_low = percentile.(eachrow(fo_mat), 5)
+        firstorder_up = percentile.(eachrow(fo_mat), 95)
+
+        totalorder_med = percentile.(eachrow(to_mat), 50)
+        totalorder_low = percentile.(eachrow(to_mat), 5)
+        totalorder_up = percentile.(eachrow(to_mat), 95)
+
+        println("(50%) firstorder: ", firstorder_med)
+        println("(5%)  firstorder: ", firstorder_low)
+        println("(95%)  firstorder: ", firstorder_up)
+
+        println("(50%) totalorder: ", totalorder_med)
+        println("(5%)  totalorder: ", totalorder_low)
+        println("(95%)  totalorder: ", totalorder_up)
+        #
+        f3, ax3, plt3 = errorbars(
+            1:n_dimensions,
+            firstorder_med,
+            firstorder_med - firstorder_low,
+            firstorder_up - firstorder_med;
+            whiskerwidth = 10,
+            color = :red,
+            label = "V-emulate",
+            title = "input dimension: $(n_dimensions)",
+        )
+        scatter!(ax3, estimated_sobol[:firstorder], color = :red, markersize = 8, label = "V-approx")
+        scatter!(ax3, analytic_V, color = :red, markersize = 12, marker = :xcross, label = "V-true")
+        errorbars!(
+            ax3,
+            1:n_dimensions,
+            totalorder_med,
+            totalorder_med - totalorder_low,
+            totalorder_up - totalorder_med;
+            whiskerwidth = 10,
+            color = :blue,
+            label = "TV-emulate",
+        )
+        scatter!(ax3, estimated_sobol[:totalorder], color = :blue, markersize = 8, label = "TV-approx")
+        scatter!(ax3, analytic_TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true")
+        axislegend(ax3)
+
+        CairoMakie.save(joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), f3, px_per_unit = 3)
+        CairoMakie.save(joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), f3, px_per_unit = 3)
+
+    end
+    # plots - first 3 dimensions
+    plot_dim = n_dimensions >= 3 ? 3 : n_dimensions
+    f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4)
+    for i in 1:plot_dim
+        ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f")
+        scatter!(ax2, sobol_pts[:, i], mlt_pred_y[1][:], color = :blue)
+        scatter!(ax2, sobol_pts[train_idx, i], observed_y[:], color = :red, markersize = 8)
+    end
+    CairoMakie.save(joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png"), f2, px_per_unit = 3)
+    CairoMakie.save(joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf"), f2, px_per_unit = 3)
+end
+
+
+main()
diff --git a/examples/Emulator/Ishigami/emulate.jl b/examples/Emulator/Ishigami/emulate.jl
index aff3d457b..d404c36cc 100644
--- a/examples/Emulator/Ishigami/emulate.jl
+++ b/examples/Emulator/Ishigami/emulate.jl
@@ -1,4 +1,5 @@
 
+
 using GlobalSensitivityAnalysis
 const GSA = GlobalSensitivityAnalysis
 using Distributions
@@ -11,6 +12,8 @@ using CalibrateEmulateSample.Emulators
 using CalibrateEmulateSample.DataContainers
 using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers
 
+using JLD2
+
 using CairoMakie, ColorSchemes #for plots
 seed = 2589456
 #=
@@ -136,8 +139,10 @@ function main()
         push!(y_preds, y_pred)
         push!(result_preds, result_pred)
 
+        jldsave(joinpath(output_directory, "emulator_repeat_$(rep_idx)_$(case).jld2"); emulator)
     end
 
+
     # analytic sobol indices
     a = 7
     b = 0.1
@@ -149,6 +154,17 @@ function main()
     VT2 = a^2 / 8
     VT3 = 8 * b^2 * π^8 / 225
 
+    jldsave(
+        joinpath(output_directory, "results_$case.jld2");
+        sobol_pts = samples,
+        train_idx = ind,
+        mlt_pred_y = y_preds,
+        mlt_sobol = result_preds,
+        analytic_sobol = [V1, V2, V3, VT1, VT2, VT3],
+        true_y = y,
+        noise_y = Γ,
+        estimated_sobol = result,
+    )
 
     println(" ")
     println("True Sobol Indices")
diff --git a/examples/Emulator/Ishigami/plot_result.jl b/examples/Emulator/Ishigami/plot_result.jl
new file mode 100644
index 000000000..a86f1f96f
--- /dev/null
+++ b/examples/Emulator/Ishigami/plot_result.jl
@@ -0,0 +1,92 @@
+using CairoMakie, ColorSchemes #for plots
+
+
+
+function main()
+
+    output_directory = "output"
+    cases = ["Prior", "GP", "RF-scalar"]
+    case = cases[3]
+    filename = joinpath(output_directory, "results_$case.jld2")
+
+    (sobol_pts, train_idx, mlt_pred_y, mlt_sobol, analytic_sobol, true_y, noise_y, estimated_sobol) = load(
+        filename,
+        "sobol_pts",
+        "train_idx",
+        "mlt_pred_y",
+        "mlt_sobol",
+        "analytic_sobol",
+        "true_y",
+        "noise_y",
+        "estimated_sobol",
+    )
+
+    n_repeats = length(mlt_sobol)
+    (V, V1, V2, V3, VT1, VT2, VT3) = analytic_sobol
+
+    f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4)
+    axx = Axis(f1[1, 1], xlabel = "x1", ylabel = "f")
+    axy = Axis(f1[1, 2], xlabel = "x2", ylabel = "f")
+    axz = Axis(f1[1, 3], xlabel = "x3", ylabel = "f")
+
+    scatter!(axx, sobol_pts[:, 1], true_y[:], color = :orange)
+    scatter!(axy, sobol_pts[:, 2], true_y[:], color = :orange)
+    scatter!(axz, sobol_pts[:, 3], true_y[:], color = :orange)
+
+    save(joinpath(output_directory, "ishigami_slices_truth.png"), f1, px_per_unit = 3)
+    save(joinpath(output_directory, "ishigami_slices_truth.pdf"), f1, px_per_unit = 3)
+
+
+    # display some info
+    println(" ")
+    println("True Sobol Indices")
+    println("******************")
+    println("    firstorder: ", [V1 / V, V2 / V, V3 / V])
+    println("    totalorder: ", [VT1 / V, VT2 / V, VT3 / V])
+    println(" ")
+    println("Sampled truth Sobol Indices (# points $n_data)")
+    println("***************************")
+    println("    firstorder: ", estimated_sobol[:firstorder])
+    println("    totalorder: ", estimated_sobol[:totalorder])
+    println(" ")
+
+    println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $noise_y)")
+    println("***************************************************************")
+    if n_repeats == 1
+        println("    firstorder: ", mlt_sobol[1][:firstorder])
+        println("    totalorder: ", mlt_sobol[1][:totalorder])
+    else
+        firstorder_mean = mean([ms[:firstorder] for ms in mlt_sobol])
+        firstorder_std = std([ms[:firstorder] for ms in mlt_sobol])
+        totalorder_mean = mean([ms[:totalorder] for ms in mlt_sobol])
+        totalorder_std = std([ms[:totalorder] for ms in mlt_sobol])
+
+        println("(mean) firstorder: ", firstorder_mean)
+        println("(std)  firstorder: ", firstorder_std)
+        println("(mean) totalorder: ", totalorder_mean)
+        println("(std)  totalorder: ", totalorder_std)
+    end
+
+    # plots
+
+    f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4)
+    axx_em = Axis(f2[1, 1], xlabel = "x1", ylabel = "f")
+    axy_em = Axis(f2[1, 2], xlabel = "x2", ylabel = "f")
+    axz_em = Axis(f2[1, 3], xlabel = "x3", ylabel = "f")
+    scatter!(axx_em, sobol_pts[:, 1], mlt_pred_y[1][:], color = :blue)
+    scatter!(axy_em, sobol_pts[:, 2], mlt_pred_y[1][:], color = :blue)
+    scatter!(axz_em, sobol_pts[:, 3], mlt_pred_y[1][:], color = :blue)
+    scatter!(axx_em, sobol_pts[train_idx, 1], true_y[train_idx] + noise, color = :red, markersize = 8)
+    scatter!(axy_em, sobol_pts[train_idx, 2], true_y[train_idx] + noise, color = :red, markersize = 8)
+    scatter!(axz_em, sobol_pts[train_idx, 3], true_y[train_idx] + noise, color = :red, markersize = 8)
+
+    save(joinpath(output_directory, "ishigami_slices_$(case).png"), f2, px_per_unit = 3)
+    save(joinpath(output_directory, "ishigami_slices_$(case).pdf"), f2, px_per_unit = 3)
+
+
+
+
+end
+
+
+main()
diff --git a/examples/Emulator/L63/Project.toml b/examples/Emulator/L63/Project.toml
index e2c9baa08..a7d2c0a6d 100644
--- a/examples/Emulator/L63/Project.toml
+++ b/examples/Emulator/L63/Project.toml
@@ -3,7 +3,9 @@ CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
diff --git a/examples/Emulator/L63/emulate.jl b/examples/Emulator/L63/emulate.jl
index 17b7864c0..dae19eb7c 100644
--- a/examples/Emulator/L63/emulate.jl
+++ b/examples/Emulator/L63/emulate.jl
@@ -1,3 +1,4 @@
+
 using OrdinaryDiffEq
 using Random, Distributions, LinearAlgebra
 ENV["GKSwstype"] = "100"
@@ -8,6 +9,7 @@ using JLD2
 using CalibrateEmulateSample.Emulators
 using CalibrateEmulateSample.EnsembleKalmanProcesses
 using CalibrateEmulateSample.DataContainers
+using EnsembleKalmanProcesses.Localizers
 
 function lorenz(du, u, p, t)
     du[1] = 10.0 * (u[2] - u[1])
@@ -90,9 +92,9 @@ function main()
 
 
     # Emulate
-    cases = ["GP", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"]
+    cases = ["GP", "RF-prior", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"]
 
-    case = cases[5]
+    case = cases[6]
 
     nugget = Float64(1e-12)
     u_test = []
@@ -104,17 +106,37 @@ function main()
 
         rf_optimizer_overrides = Dict(
             "scheduler" => DataMisfitController(terminate_at = 1e4),
-            "cov_sample_multiplier" => 1.0,
-            "n_features_opt" => 200,
-            "n_iteration" => 10, #30
+            "cov_sample_multiplier" => 5.0,
+            "n_features_opt" => 150,
+            "n_iteration" => 20,
             "accelerator" => NesterovAccelerator(),
+            #            "localization" => EnsembleKalmanProcesses.Localizers.SECNice(0.05,1.0), # localization / s
+            "n_ensemble" => 200,
+            "verbose" => true,
         )
 
+
         # Build ML tools
         if case == "GP"
             gppackage = Emulators.GPJL()
             pred_type = Emulators.YType()
             mlt = GaussianProcess(gppackage; prediction_type = pred_type, noise_learn = false)
+        elseif case == "RF-prior"
+            #No optimization
+            rf_optimizer_overrides["n_iteration"] = 0
+            rf_optimizer_overrides["cov_sample_multiplier"] = 0.1
+            # put in whatever you want to reflect
+            kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget))
+            n_features = 500
+            mlt = VectorRandomFeatureInterface(
+                n_features,
+                3,
+                3,
+                rng = rng,
+                kernel_structure = kernel_structure,
+                optimizer_options = rf_optimizer_overrides,
+            )
+
         elseif case ∈ ["RF-scalar", "RF-scalar-diagin"]
             n_features = 10 * Int(floor(sqrt(3 * n_tp)))
             kernel_structure =
@@ -128,7 +150,7 @@ function main()
                 optimizer_options = rf_optimizer_overrides,
             )
         elseif case ∈ ["RF-svd-nonsep"]
-            kernel_structure = NonseparableKernel(LowRankFactor(6, nugget))
+            kernel_structure = NonseparableKernel(LowRankFactor(4, nugget))
             n_features = 500
 
             mlt = VectorRandomFeatureInterface(
@@ -163,6 +185,19 @@ function main()
             )
         end
 
+        #save config for RF
+        if !(case == "GP") && (rep_idx == 1)
+            JLD2.save(
+                joinpath(output_directory, case * "_l63_config.jld2"),
+                "rf_optimizer_overrides",
+                rf_optimizer_overrides,
+                "n_features",
+                n_features,
+                "kernel_structure",
+                kernel_structure,
+            )
+        end
+
         # Emulate
         if case ∈ ["RF-nosvd-nonsep", "RF-nosvd-sep"]
             decorrelate = false
@@ -209,7 +244,7 @@ function main()
         # plots for the first repeat
         if rep_idx == 1
             # plotting trace
-            f = Figure(resolution = (900, 450))
+            f = Figure(size = (900, 450))
             axx = Axis(f[1, 1], xlabel = "time", ylabel = "x")
             axy = Axis(f[2, 1], xlabel = "time", ylabel = "y")
             axz = Axis(f[3, 1], xlabel = "time", ylabel = "z")
@@ -292,7 +327,7 @@ function main()
         push!(u_cdf, u_cdf_tmp)
     end
 
-    f4 = Figure(resolution = (900, Int(floor(900 / 1.618))))
+    f4 = Figure(size = (900, Int(floor(900 / 1.618))))
     axx = Axis(f4[1, 1], xlabel = "", ylabel = "x")
     axy = Axis(f4[1, 2], xlabel = "", ylabel = "y")
     axz = Axis(f4[1, 3], xlabel = "", ylabel = "z")
diff --git a/examples/Emulator/L63/plot_results.jl b/examples/Emulator/L63/plot_results.jl
new file mode 100644
index 000000000..1ae772d93
--- /dev/null
+++ b/examples/Emulator/L63/plot_results.jl
@@ -0,0 +1,112 @@
+using Random, Distributions, LinearAlgebra
+ENV["GKSwstype"] = "100"
+using CairoMakie, ColorSchemes #for plots
+using JLD2
+
+function main()
+    # filepaths
+    output_directory = joinpath(@__DIR__, "output")
+    cases = ["GP", "RF-prior", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep"]
+    case = cases[2]
+
+    # Load from saved files
+    #=
+        config_file = JLD2.load(joinpath(output_directory, case * "_l63_config.jld2"))
+        rf_optimizer_overrides = config_file["rf_optimizer_overrides"]
+        n_features = config_file["n_features"]
+        kernel_structure = config_file["kernel_structure"]
+        =#
+
+    histogram_data = JLD2.load(joinpath(output_directory, case * "_l63_histdata.jld2"))
+    solhist = histogram_data["solhist"]
+    uhist = histogram_data["uhist"]
+    trajectory_data = JLD2.load(joinpath(output_directory, case * "_l63_testdata.jld2"))
+    solplot = trajectory_data["solplot"]
+    uplot = trajectory_data["uplot"]
+
+    # plotting trajectories for just first repeat
+    uplot_tmp = uplot[1]
+    uhist_tmp = uhist[1]
+
+    # copied from emulate.jl
+    dt = 0.01
+    tmax_test = 20 #100
+    xx = 0.0:dt:tmax_test
+
+
+    f = Figure(size = (900, 450))
+    axx = Axis(f[1, 1], xlabel = "time", ylabel = "x")
+    axy = Axis(f[2, 1], xlabel = "time", ylabel = "y")
+    axz = Axis(f[3, 1], xlabel = "time", ylabel = "z")
+
+    tt = 1:length(xx)
+    lines!(axx, xx, uplot_tmp[1, tt], color = :blue)
+    lines!(axy, xx, uplot_tmp[2, tt], color = :blue)
+    lines!(axz, xx, uplot_tmp[3, tt], color = :blue)
+
+    lines!(axx, xx, solplot[1, tt], color = :orange)
+    lines!(axy, xx, solplot[2, tt], color = :orange)
+    lines!(axz, xx, solplot[3, tt], color = :orange)
+
+    current_figure()
+    # save
+    save(joinpath(output_directory, case * "_l63_test.png"), f, px_per_unit = 3)
+    save(joinpath(output_directory, case * "_l63_test.pdf"), f, pt_per_unit = 3)
+
+    # plot attractor
+    f3 = Figure()
+    lines(f3[1, 1], uplot_tmp[1, :], uplot_tmp[3, :], color = :blue)
+    lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange)
+
+    # save
+    save(joinpath(output_directory, case * "_l63_attr.png"), f3, px_per_unit = 3)
+    save(joinpath(output_directory, case * "_l63_attr.pdf"), f3, pt_per_unit = 3)
+
+    # plotting histograms
+    f2 = Figure()
+    hist(f2[1, 1], uhist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
+    hist(f2[1, 2], uhist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
+    hist(f2[1, 3], uhist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5))
+
+    hist!(f2[1, 1], solhist[1, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
+    hist!(f2[1, 2], solhist[2, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
+    hist!(f2[1, 3], solhist[3, :], bins = 50, normalization = :pdf, color = (:orange, 0.5))
+
+    # save
+    save(joinpath(output_directory, case * "_l63_pdf.png"), f2, px_per_unit = 3)
+    save(joinpath(output_directory, case * "_l63_pdf.pdf"), f2, pt_per_unit = 3)
+
+
+
+    # compare  marginal histograms to truth - rough measure of fit
+    solcdf = sort(solhist, dims = 2)
+
+    ucdf = []
+    for u in uhist
+        ucdf_tmp = sort(u, dims = 2)
+        push!(ucdf, ucdf_tmp)
+    end
+
+    f4 = Figure(size = (900, Int(floor(900 / 1.618))))
+    axx = Axis(f4[1, 1], xlabel = "", ylabel = "x")
+    axy = Axis(f4[1, 2], xlabel = "", ylabel = "y")
+    axz = Axis(f4[1, 3], xlabel = "", ylabel = "z")
+
+    unif_samples = (1:size(solcdf, 2)) / size(solcdf, 2)
+
+    for u in ucdf
+        lines!(axx, u[1, :], unif_samples, color = (:blue, 0.2), linewidth = 4)
+        lines!(axy, u[2, :], unif_samples, color = (:blue, 0.2), linewidth = 4)
+        lines!(axz, u[3, :], unif_samples, color = (:blue, 0.2), linewidth = 4)
+    end
+
+    lines!(axx, solcdf[1, :], unif_samples, color = (:orange, 1.0), linewidth = 4)
+    lines!(axy, solcdf[2, :], unif_samples, color = (:orange, 1.0), linewidth = 4)
+    lines!(axz, solcdf[3, :], unif_samples, color = (:orange, 1.0), linewidth = 4)
+
+    # save
+    save(joinpath(output_directory, case * "_l63_cdfs.png"), f4, px_per_unit = 3)
+    save(joinpath(output_directory, case * "_l63_cdfs.pdf"), f4, pt_per_unit = 3)
+end
+
+main()
diff --git a/src/Emulator.jl b/src/Emulator.jl
index e4d58e94a..5d716c14c 100644
--- a/src/Emulator.jl
+++ b/src/Emulator.jl
@@ -220,10 +220,9 @@ function predict(
 
     # [1.] normalize
     normalized_new_inputs = normalize(emulator, new_inputs)
-
     # [2.]  predict. Note: ds = decorrelated, standard
     ds_outputs, ds_output_var = predict(emulator.machine_learning_tool, normalized_new_inputs, mlt_kwargs...)
-
+    
     # [3.] transform back to real coordinates or remain in decorrelated coordinates
     if !isa(get_machine_learning_tool(emulator), VectorRandomFeatureInterface)
         if transform_to_real && emulator.decomposition === nothing
@@ -312,6 +311,7 @@ function calculate_normalization(inputs::VOrM) where {VOrM <: AbstractVecOrMat}
         svd_in = svd(input_cov)
         sqrt_inv_sv = 1 ./ sqrt.(svd_in.S[1:rank(input_cov)])
         normalization = Diagonal(sqrt_inv_sv) * svd_in.Vt[1:rank(input_cov), :] #non-square
+        @info "reducing input dimension from $(size(input_cov,1)) to $rank(input_cov) during low rank in normalization"
     end
     return normalization
 end
diff --git a/src/RandomFeature.jl b/src/RandomFeature.jl
index 3caa6117e..9faf6cd70 100644
--- a/src/RandomFeature.jl
+++ b/src/RandomFeature.jl
@@ -3,6 +3,7 @@ using RandomFeatures
 const RF = RandomFeatures
 using EnsembleKalmanProcesses
 const EKP = EnsembleKalmanProcesses
+using EnsembleKalmanProcesses.Localizers
 using ..ParameterDistributions
 using ..Utilities
 using StableRNGs
@@ -14,7 +15,7 @@ export SeparableKernel, NonseparableKernel
 export get_input_cov_structure, get_output_cov_structure, get_cov_structure
 export get_eps
 export rank
-export shrinkage_cov
+export shrinkage_cov, nice_cov
 
 
 abstract type RandomFeatureInterface <: MachineLearningTool end
@@ -488,20 +489,19 @@ function calculate_mean_cov_and_coeffs(
     # sizes (output_dim x n_test), (output_dim x output_dim x n_test) 
 
     ## TODO - the theory states that the following should be set:
-    # scaled_coeffs = sqrt(1 / (n_features)) * RF.Methods.get_coeffs(fitted_features)
+    #    scaled_coeffs = sqrt(1 / (n_features)) * RF.Methods.get_coeffs(fitted_features)
+    scaled_coeffs = 1e-3 * rand(n_features)#overwrite with noise...
     # However the convergence is much improved with setting this to zero:
-    scaled_coeffs = 0
-
-
+    #scaled_coeffs = 0    
     if decomp_type == "cholesky"
         chol_fac = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).L
+
         complexity = 2 * sum(log(chol_fac[i, i]) for i in 1:size(chol_fac, 1))
     else
         svd_singval = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).S
         complexity = sum(log, svd_singval) # note this is log(abs(det))
     end
-    complexity = sqrt(abs(complexity)) #abs can introduce nonconvexity, 
-
+    complexity = sqrt(complexity)
     return scaled_coeffs, complexity
 
 end
@@ -540,6 +540,55 @@ function shrinkage_cov(sample_mat::AA) where {AA <: AbstractMatrix}
 end
 
 
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Calculate the empirical covariance, additionally applying the Noise Informed Covariance Estimator (NICE) Vishnay et al. 2024.
+"""
+function nice_cov(sample_mat::AA, n_samples = 400, δ::FT = 1.0) where {AA <: AbstractMatrix, FT <: Real}
+
+    n_sample_cov = size(sample_mat, 2)
+    Γ = cov(sample_mat, dims = 2)
+
+    bd_tol = 1e8 * eps()
+
+    v = sqrt.(diag(Γ))
+    V = Diagonal(v) #stds
+    V_inv = inv(V)
+    corr = clamp.(V_inv * Γ * V_inv, -1 + bd_tol, 1 - bd_tol) # full corr
+
+    # parameter sweep over the exponents
+    max_exponent = 2 * 5 # must be even
+    interp_steps = 100
+    # use find the variability in the corr coeff matrix entries
+    std_corrs = approximate_corr_std.(corr, n_sample_cov, n_samples) # Found in EKP.Localizers !! slowest part of code -> could speed up by precomputing an interpolation of [-1,1]
+
+    std_tol = sqrt(sum(std_corrs .^ 2))
+    α_min_exceeded = [max_exponent]
+    for α in 2:2:max_exponent # even exponents give a PSD
+        corr_psd = corr .^ (α + 1) # abs not needed as α even
+        # find the first exponent that exceeds the noise tolerance in norm
+        if norm(corr_psd - corr) > δ * std_tol
+            α_min_exceeded[1] = α
+            break
+        end
+    end
+    corr_psd = corr .^ α_min_exceeded[1]
+    corr_psd_prev = corr .^ (α_min_exceeded[1] - 2) # previous PSD correction 
+
+    for α in LinRange(1.0, 0.0, interp_steps)
+        corr_interp = ((1 - α) * (corr_psd_prev) + α * corr_psd) .* corr
+        if norm(corr_interp - corr) < δ * std_tol
+            corr[:, :] = corr_interp #update the correlation matrix block
+            break
+        end
+    end
+
+    return posdef_correct(V * corr * V) # rebuild the cov matrix
+
+end
+
+
 
 """
 $(DocStringExtensions.TYPEDSIGNATURES)
@@ -625,9 +674,13 @@ function estimate_mean_and_coeffnorm_covariance(
     end
 
     sample_mat = vcat(blockmeans, coeffl2norm, complexity)
-    shrinkage = true
-    if shrinkage
+
+    correction = "shrinkage"
+    #    correction = "nice"
+    if correction == "shrinkage"
         Γ = shrinkage_cov(sample_mat)
+    elseif correction == "nice"
+        Γ = nice_cov(sample_mat)
     else
         Γ = cov(sample_mat, dims = 2)
     end
@@ -833,9 +886,13 @@ function estimate_mean_and_coeffnorm_covariance(
 
 
     sample_mat = vcat(blockmeans, coeffl2norm, complexity)
-    shrinkage = true
-    if shrinkage
+
+    correction = "shrinkage"
+    #    correction = "nice"
+    if correction == "shrinkage"
         Γ = shrinkage_cov(sample_mat)
+    elseif correction == "nice"
+        Γ = nice_cov(sample_mat)
     else
         Γ = cov(sample_mat, dims = 2)
     end
diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl
index 2547164b8..c7cfc50ed 100644
--- a/src/ScalarRandomFeature.jl
+++ b/src/ScalarRandomFeature.jl
@@ -163,6 +163,8 @@ function ScalarRandomFeatureInterface(
         "multithread" => "ensemble", # instead of "tullio"
         "verbose" => false, # verbose optimizer statements
         "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum
+        "recompute_cov_at" => Inf, # whether to recompute the output_covariance
+        "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles
     )
 
     if !isnothing(optimizer_options)
@@ -201,6 +203,7 @@ function hyperparameter_distribution_from_flat(
 
     M = zeros(input_dim) #scalar output
     U = hyperparameters_from_flat(x, input_dim, kernel_structure)
+
     if !isposdef(U)
         println("U not posdef - correcting")
         U = posdef_correct(U)
@@ -327,7 +330,6 @@ function build_models!(
     optimizer_options = get_optimizer_options(srfi)
     optimizer = get_optimizer(srfi) # empty vector
 
-
     # Optimize features with EKP for each output dim
     # [1.] Split data into test/train 80/20 
     train_fraction = optimizer_options["train_fraction"]
@@ -347,8 +349,6 @@ function build_models!(
     n_iteration = optimizer_options["n_iteration"]
     diagnostics = zeros(n_iteration, n_rfms)
     for i in 1:n_rfms
-
-
         io_pairs_opt = PairedDataContainer(
             input_values[:, idx_shuffle],
             reshape(output_values[i, idx_shuffle], 1, size(output_values, 2)),
@@ -362,7 +362,7 @@ function build_models!(
         else
             throw(
                 ArgumentError(
-                    "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)",
+                    "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl), or \"ensemble\" (threading is done over the ensemble)",
                 ),
             )
         end
@@ -409,28 +409,72 @@ function build_models!(
         opt_verbose_flag = optimizer_options["verbose"]
         scheduler = optimizer_options["scheduler"]
         accelerator = optimizer_options["accelerator"]
+        recompute_cov_at = optimizer_options["recompute_cov_at"]
+        localization = optimizer_options["localization"]
 
         initial_params = construct_initial_ensemble(rng, prior, n_ensemble)
-        min_complexity = n_features_opt * log(regularization.λ)
-        min_complexity = sqrt(abs(min_complexity))
-        data = vcat(get_outputs(io_pairs_opt)[(n_train + 1):end], 0.0, min_complexity)
-        ekiobj = EKP.EnsembleKalmanProcess(
-            initial_params,
-            data,
-            Γ,
-            Inversion(),
-            scheduler = scheduler,
-            rng = rng,
-            accelerator = accelerator,
-            verbose = opt_verbose_flag,
-        )
+        data = vcat(get_outputs(io_pairs_opt)[(n_train + 1):end], 0.0, 0.0)
+        ekiobj = [
+            EKP.EnsembleKalmanProcess(
+                initial_params,
+                data,
+                Γ,
+                Inversion(),
+                scheduler = scheduler,
+                rng = rng,
+                accelerator = accelerator,
+                verbose = opt_verbose_flag,
+                localization_method = localization,
+            ),
+        ]
         err = zeros(n_iteration)
 
         # [4.] optimize with EKP
-        for it in 1:n_iteration
+        Γ_old = [Γ]
+        for i in 1:n_iteration
 
             #get parameters:
-            lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj))
+            lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj[1]))
+
+            if i % recompute_cov_at == 0
+
+                internal_Γ_new, approx_σ2_new = estimate_mean_and_coeffnorm_covariance(
+                    srfi,
+                    rng,
+                    transform_unconstrained_to_constrained(prior, mean(get_u_final(ekiobj[1]), dims = 2)),
+                    regularization,
+                    n_features_opt,
+                    n_train,
+                    n_test,
+                    batch_sizes,
+                    io_pairs_opt,
+                    n_cov_samples,
+                    decomp_type,
+                    multithread_type,
+                )
+
+                Γ_new = internal_Γ_new
+                Γ_new[1:n_test, 1:n_test] += regularization  # + approx_σ2 
+                Γ_new[(n_test + 1):end, (n_test + 1):end] += I
+                if opt_verbose_flag
+                    @info "updated algorithm covariance difference: $(norm(Γ_new - Γ_old[1]))"
+                end
+                Γ_old[1] = Γ_new
+                # update eki. (the reason why it was a vector)
+                # TODO: This restart is horrible.
+                ekiobj[1] = EKP.EnsembleKalmanProcess(
+                    get_u_final(ekiobj[1]), # unconstrained
+                    data,
+                    Γ_new,
+                    Inversion(),
+                    scheduler = DataMisfitController(terminate_at = 1e4),
+                    rng = rng,
+                    accelerator = DefaultAccelerator(),
+                    localization_method = Localizers.SECNice(200),
+                    verbose = opt_verbose_flag,
+                )
+
+            end
 
             g_ens, _ = calculate_ensemble_mean_and_coeffnorm(
                 srfi,
@@ -445,24 +489,23 @@ function build_models!(
                 decomp_type,
                 multithread_type,
             )
+
             inflation = optimizer_options["inflation"]
             if inflation > 0
-                terminated =
-                    EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation
+                terminated = EKP.update_ensemble!(ekiobj[1], g_ens, additive_inflation = true, s = inflation) # small regularizing inflation
             else
-                terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation
+                terminated = EKP.update_ensemble!(ekiobj[1], g_ens) # small regularizing inflation
             end
             if !isnothing(terminated)
                 break # if the timestep was terminated due to timestepping condition
             end
 
-            err[it] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2)
-
+            err[i] = get_error(ekiobj[1])[end] #mean((params_true - mean(params_i,dims=2)).^2)
         end
         diagnostics[:, i] = copy(err)
 
         # [5.] extract optimal hyperparameters
-        hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:]
+        hp_optimal = get_ϕ_mean_final(prior, ekiobj[1])[:]
 
         if opt_verbose_flag
             names = get_name(prior)
diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl
index 465074e5d..6434cad0f 100644
--- a/src/VectorRandomFeature.jl
+++ b/src/VectorRandomFeature.jl
@@ -418,17 +418,15 @@ function build_models!(
         regularization = I
 
     else
-        reg_mat = regularization_matrix
+        # think of the regularization_matrix as the observational noise covariance, or a related quantity
         if !isposdef(regularization_matrix)
             regularization = posdef_correct(regularization_matrix)
             println("RF regularization matrix is not positive definite, correcting")
 
         else
-            # think of the regularization_matrix as the observational noise covariance, or a related quantity
-            regularization = exp((1 / output_dim) * sum(log.(eigvals(reg_mat)))) * I #i.e. det(M)^{1/output_dim} I
-
-            #regularization = reg_mat #using the full p.d. tikhonov exp. EXPENSIVE, and challenge get complexity terms
+            regularization = regularization_matrix
         end
+
     end
 
     idx_shuffle = randperm(rng, n_data)
@@ -472,38 +470,25 @@ function build_models!(
     if tikhonov_opt_val == 0
         # Build the covariance
         Γ = internal_Γ
-        Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += regularization # + approx_σ2
+        Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] +=
+            isa(regularization, UniformScaling) ? regularization : kron(I(n_test), regularization) # + approx_σ2
         Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I
-
-        #in diag case we have data logdet = λ^m, in non diag case we have logdet(Λ^) to match the different reg matrices.
-        min_complexity =
-            isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) :
-            n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L)))
-        min_complexity = sqrt(abs(min_complexity))
-
-
-        data = vcat(reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), 0.0, min_complexity) #flatten data
+        data = vcat(reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), 0.0, 0.0) #flatten data
 
     elseif tikhonov_opt_val > 0
         # augment the state to add tikhonov
         outsize = size(internal_Γ, 1)
         Γ = zeros(outsize + n_hp, outsize + n_hp)
         Γ[1:outsize, 1:outsize] = internal_Γ
-        Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += approx_σ2 + regularization
+        Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += kron(I(n_test), regularization) # block diag regularization
         Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I
 
         Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior)
 
-        #TODO the min complexity here is not the correct object in the non-diagonal case
-        min_complexity =
-            isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) :
-            n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L)))
-        min_complexity = sqrt(abs(min_complexity))
-
         data = vcat(
             reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1),
             0.0,
-            min_complexity,
+            0.0,
             zeros(size(Γ, 1) - outsize, 1),
         ) #flatten data with additional zeros
     else
@@ -572,8 +557,7 @@ function build_models!(
         end
         inflation = optimizer_options["inflation"]
         if inflation > 0
-            terminated =
-                EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation
+            terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation
         else
             terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation
         end
diff --git a/test/RandomFeature/runtests.jl b/test/RandomFeature/runtests.jl
index 75110bfa3..470b860ba 100644
--- a/test/RandomFeature/runtests.jl
+++ b/test/RandomFeature/runtests.jl
@@ -165,8 +165,6 @@ rng = Random.MersenneTwister(seed)
         @test get_input_dim(srfi) == input_dim
         @test get_rng(srfi) == rng
         @test get_kernel_structure(srfi) == kernel_structure
-        @test get_optimizer_options(srfi) == optimizer_options
-
         # check defaults 
         srfi2 = ScalarRandomFeatureInterface(n_features, input_dim)
         @test get_batch_sizes(srfi2) === nothing