From e847d04973374736692a9718622e037f307271db Mon Sep 17 00:00:00 2001
From: odunbar <odunbar@caltech.edu>
Date: Thu, 19 Oct 2023 14:12:32 -0700
Subject: [PATCH 1/4] add ishigami example for RF and GP - GP scalar fails

ability to run repeated RF training

bugfix shuffled indices for training data

add accelerators & can fix n_features_opt

remove comments

format

runtests
---
 examples/Emulator/Ishigami/Project.toml |   9 ++
 examples/Emulator/Ishigami/emulate.jl   | 195 ++++++++++++++++++++++++
 examples/Emulator/L63/emulate.jl        |  53 +++++--
 src/ScalarRandomFeature.jl              |  11 +-
 src/VectorRandomFeature.jl              |  21 +--
 test/RandomFeature/runtests.jl          |   6 +-
 6 files changed, 268 insertions(+), 27 deletions(-)
 create mode 100644 examples/Emulator/Ishigami/Project.toml
 create mode 100644 examples/Emulator/Ishigami/emulate.jl

diff --git a/examples/Emulator/Ishigami/Project.toml b/examples/Emulator/Ishigami/Project.toml
new file mode 100644
index 000000000..930071c5b
--- /dev/null
+++ b/examples/Emulator/Ishigami/Project.toml
@@ -0,0 +1,9 @@
+[deps]
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
+ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+GlobalSensitivityAnalysis = "1b10255b-6da3-57ce-9089-d24e8517b87e"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/examples/Emulator/Ishigami/emulate.jl b/examples/Emulator/Ishigami/emulate.jl
new file mode 100644
index 000000000..b152a9608
--- /dev/null
+++ b/examples/Emulator/Ishigami/emulate.jl
@@ -0,0 +1,195 @@
+
+using GlobalSensitivityAnalysis
+const GSA = GlobalSensitivityAnalysis
+using Distributions
+using DataStructures
+using Random
+using LinearAlgebra
+
+using CalibrateEmulateSample.EnsembleKalmanProcesses
+using CalibrateEmulateSample.Emulators
+using CalibrateEmulateSample.DataContainers
+
+using CairoMakie, ColorSchemes #for plots
+seed = 2589456
+#=
+#take in parameters x as [3 x pts] matrix
+# Classical values (a,b) = (7, 0.05) from Sobol, Levitan 1999
+#             also (a,b) = (7, 0.1) from Marrel et al 2009
+function ishigami(x::MM; a = 7.0, b = 0.05) where {MM <: AbstractMatrix}
+    @assert size(x,1) == 3
+    return (1 .+ b * x[3,:].^4) * sin.(x[1,:]) + a * sin.(x[2,:]).^2 
+end
+=#
+function main()
+
+    rng = MersenneTwister(seed)
+
+    n_repeats = 20 # repeat exp with same data.
+
+    # To create the sampling
+    n_data_gen = 2000
+
+    data = SobolData(
+        params = OrderedDict(:x1 => Uniform(-π, π), :x2 => Uniform(-π, π), :x3 => Uniform(-π, π)),
+        N = n_data_gen,
+    )
+
+    # To perform global analysis,
+    # one must generate samples using Sobol sequence (i.e. creates more than N points)
+    samples = GSA.sample(data)
+    n_data = size(samples, 1) # [n_samples x 3]
+    # run model (example)
+    y = GSA.ishigami(samples)
+    # perform Sobol Analysis
+    result = analyze(data, y)
+
+    f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4)
+    axx = Axis(f1[1, 1], xlabel = "x1", ylabel = "f")
+    axy = Axis(f1[1, 2], xlabel = "x2", ylabel = "f")
+    axz = Axis(f1[1, 3], xlabel = "x3", ylabel = "f")
+
+    scatter!(axx, samples[:, 1], y[:], color = :orange)
+    scatter!(axy, samples[:, 2], y[:], color = :orange)
+    scatter!(axz, samples[:, 3], y[:], color = :orange)
+
+    save("ishigami_slices_truth.png", f1, px_per_unit = 3)
+    save("ishigami_slices_truth.pdf", f1, px_per_unit = 3)
+
+    n_train_pts = 300
+    ind = shuffle!(rng, Vector(1:n_data))[1:n_train_pts]
+    # now subsample the samples data
+    n_tp = length(ind)
+    input = zeros(3, n_tp)
+    output = zeros(1, n_tp)
+    Γ = 1e-2
+    noise = rand(rng, Normal(0, Γ), n_tp)
+    for i in 1:n_tp
+        input[:, i] = samples[ind[i], :]
+        output[i] = y[ind[i]] + noise[i]
+    end
+    iopairs = PairedDataContainer(input, output)
+
+    cases = ["Prior", "GP", "RF-scalar"]
+    case = cases[3]
+    decorrelate = true
+    nugget = Float64(1e-12)
+
+    overrides = Dict(
+        "scheduler" => DataMisfitController(terminate_at = 1e4),
+        "cov_sample_multiplier" => 1.0,
+        "n_features_opt" => 100,
+        "n_iteration" => 10,
+    )
+    if case == "Prior"
+        # don't do anything
+        overrides["n_iteration"] = 0
+        overrides["cov_sample_multiplier"] = 0.1
+    end
+
+    y_preds = []
+    result_preds = []
+
+    for rep_idx in 1:n_repeats
+
+        # Build ML tools
+        if case == "GP"
+            gppackage = Emulators.GPJL()
+            pred_type = Emulators.YType()
+            mlt = GaussianProcess(
+                gppackage;
+                kernel = nothing, # use default squared exponential kernel
+                prediction_type = pred_type,
+                noise_learn = false,
+            )
+
+        elseif case ∈ ["RF-scalar", "Prior"]
+
+            kernel_structure = SeparableKernel(LowRankFactor(3, nugget), OneDimFactor())
+            n_features = 500
+            mlt = ScalarRandomFeatureInterface(
+                n_features,
+                3,
+                rng = rng,
+                kernel_structure = kernel_structure,
+                optimizer_options = overrides,
+            )
+        end
+
+        # Emulate
+        emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate)
+        optimize_hyperparameters!(emulator)
+
+        # predict on all Sobol points with emulator (example)    
+        y_pred, y_var = predict(emulator, samples', transform_to_real = true)
+
+        # obtain emulated Sobol indices
+        result_pred = analyze(data, y_pred')
+        push!(y_preds, y_pred)
+        push!(result_preds, result_pred)
+
+    end
+
+    # analytic sobol indices
+    a = 7
+    b = 0.1
+    V = a^2 / 8 + b * π^4 / 5 + b^2 * π^8 / 18 + 1 / 2
+    V1 = 0.5 * (1 + b * π^4 / 5)^2
+    V2 = a^2 / 8
+    V3 = 0
+    VT1 = 0.5 * (1 + b * π^4 / 5)^2 + 8 * b^2 * π^8 / 225
+    VT2 = a^2 / 8
+    VT3 = 8 * b^2 * π^8 / 225
+
+
+    println(" ")
+    println("True Sobol Indices")
+    println("******************")
+    println("    firstorder: ", [V1 / V, V2 / V, V3 / V])
+    println("    totalorder: ", [VT1 / V, VT2 / V, VT3 / V])
+    println(" ")
+    println("Sampled truth Sobol Indices (# points $n_data)")
+    println("***************************")
+    println("    firstorder: ", result[:firstorder])
+    println("    totalorder: ", result[:totalorder])
+    println(" ")
+
+    println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $Γ)")
+    println("***************************************************************")
+    if n_repeats == 1
+        println("    firstorder: ", result_preds[1][:firstorder])
+        println("    totalorder: ", result_preds[1][:totalorder])
+    else
+        firstorder_mean = mean([rp[:firstorder] for rp in result_preds])
+        firstorder_std = std([rp[:firstorder] for rp in result_preds])
+        totalorder_mean = mean([rp[:totalorder] for rp in result_preds])
+        totalorder_std = std([rp[:totalorder] for rp in result_preds])
+
+        println("(mean) firstorder: ", firstorder_mean)
+        println("(std)  firstorder: ", firstorder_std)
+        println("(mean) totalorder: ", totalorder_mean)
+        println("(std)  totalorder: ", totalorder_std)
+    end
+
+
+    # plots
+
+    f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4)
+    axx_em = Axis(f2[1, 1], xlabel = "x1", ylabel = "f")
+    axy_em = Axis(f2[1, 2], xlabel = "x2", ylabel = "f")
+    axz_em = Axis(f2[1, 3], xlabel = "x3", ylabel = "f")
+    scatter!(axx_em, samples[:, 1], y_preds[1][:], color = :blue)
+    scatter!(axy_em, samples[:, 2], y_preds[1][:], color = :blue)
+    scatter!(axz_em, samples[:, 3], y_preds[1][:], color = :blue)
+    scatter!(axx_em, samples[ind, 1], y[ind] + noise, color = :red, markersize = 8)
+    scatter!(axy_em, samples[ind, 2], y[ind] + noise, color = :red, markersize = 8)
+    scatter!(axz_em, samples[ind, 3], y[ind] + noise, color = :red, markersize = 8)
+
+    save("ishigami_slices_$(case).png", f2, px_per_unit = 3)
+    save("ishigami_slices_$(case).pdf", f2, px_per_unit = 3)
+
+
+end
+
+
+main()
diff --git a/examples/Emulator/L63/emulate.jl b/examples/Emulator/L63/emulate.jl
index 289314295..659f2a29d 100644
--- a/examples/Emulator/L63/emulate.jl
+++ b/examples/Emulator/L63/emulate.jl
@@ -5,10 +5,12 @@ using CairoMakie, ColorSchemes #for plots
 using JLD2
 
 # CES 
+using CalibrateEmulateSample
 using CalibrateEmulateSample.Emulators
 using CalibrateEmulateSample.ParameterDistributions
 using CalibrateEmulateSample.EnsembleKalmanProcesses
 using CalibrateEmulateSample.DataContainers
+EKP = CalibrateEmulateSample.EnsembleKalmanProcesses
 
 
 function lorenz(du, u, p, t)
@@ -47,30 +49,35 @@ function main()
     # Create training pairs (with noise) from subsampling [burnin,tmax] 
     tburn = 10
     burnin = Int(floor(10 / dt))
-    n_train_pts = 400 #600 
-    ind = shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts]
+    n_train_pts = 600 #600 
+    ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts])
     n_tp = length(ind)
     input = zeros(3, n_tp)
     output = zeros(3, n_tp)
-    Γy = 1e-6 * I(3)
+    Γy = 1e-4 * I(3)
     noise = rand(rng, MvNormal(zeros(3), Γy), n_tp)
     for i in 1:n_tp
-        input[:, i] = sol.u[i]
-        output[:, i] = sol.u[i + 1] + noise[:, i]
+        input[:, i] = sol.u[ind[i]]
+        output[:, i] = sol.u[ind[i] + 1] + noise[:, i]
     end
     iopairs = PairedDataContainer(input, output)
 
     # Emulate
-    cases = ["GP", "RF-scalar", "RF-scalar-diagin", "RF-vector-svd-nonsep"]
+    cases =
+        ["GP", "RF-scalar", "RF-scalar-diagin", "RF-vector-svd-nonsep", "RF-vector-nosvd-nonsep", "RF-vector-nosvd-sep"]
 
-    case = cases[4]
+    case = cases[1]
     decorrelate = true
     nugget = Float64(1e-12)
     overrides = Dict(
         "verbose" => true,
-        "scheduler" => DataMisfitController(terminate_at = 1e4),
-        "cov_sample_multiplier" => 2.0,
-        "n_iteration" => 10,
+        "scheduler" => DefaultScheduler(1.0),#DataMisfitController(terminate_at = 1e4),
+        "cov_sample_multiplier" => 0.5,
+        "n_features_opt" => 200,
+        "n_iteration" => 20,
+        #        "n_ensemble" => 20,
+        #        "localization" => EKP.Localizers.SEC(1.0,0.1),
+        "accelerator" => NesterovAccelerator(),
     )
 
     # Build ML tools
@@ -97,9 +104,33 @@ function main()
             optimizer_options = overrides,
         )
     elseif case ∈ ["RF-vector-svd-nonsep"]
-        kernel_structure = NonseparableKernel(LowRankFactor(9, nugget))
+        kernel_structure = NonseparableKernel(LowRankFactor(6, nugget))
         n_features = 500
 
+        mlt = VectorRandomFeatureInterface(
+            n_features,
+            3,
+            3,
+            rng = rng,
+            kernel_structure = kernel_structure,
+            optimizer_options = overrides,
+        )
+    elseif case ∈ ["RF-vector-nosvd-nonsep"]
+        kernel_structure = NonseparableKernel(LowRankFactor(3, nugget))
+        n_features = 500
+        decorrelate = false # don't do SVD
+        mlt = VectorRandomFeatureInterface(
+            n_features,
+            3,
+            3,
+            rng = rng,
+            kernel_structure = kernel_structure,
+            optimizer_options = overrides,
+        )
+    elseif case ∈ ["RF-vector-nosvd-sep"]
+        kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget))
+        n_features = 500
+        decorrelate = false # don't do SVD
         mlt = VectorRandomFeatureInterface(
             n_features,
             3,
diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl
index 2c9ccc736..4c2fdb22d 100644
--- a/src/ScalarRandomFeature.jl
+++ b/src/ScalarRandomFeature.jl
@@ -116,7 +116,9 @@ Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for
      - "tikhonov":  tikhonov regularization parameter if >0
      - "inflation":  additive inflation ∈ [0,1] with 0 being no inflation
      - "train_fraction":  e.g. 0.8 (default)  means 80:20 train - test split
+     - "n_features_opt":  fix the number of features for optimization (default `n_features`, as used for prediction)
      - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra
+     - "accelerator": use EKP accelerators (default is no acceleration)
      - "verbose" => false, verbose optimizer statements
 """
 function ScalarRandomFeatureInterface(
@@ -145,11 +147,13 @@ function ScalarRandomFeatureInterface(
         "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble
         "n_iteration" => 5, # number of eki iterations
         "scheduler" => EKP.DataMisfitController(), # Adaptive timestepping,
-        "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme
+        "cov_sample_multiplier" => 2.0, # multiplier for samples to estimate covariance in optimization scheme
         "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation
         "train_fraction" => 0.8, # 80:20 train - test split
+        "n_features_opt" => n_features, # number of features for the optimization 
         "multithread" => "ensemble", # instead of "tullio"
         "verbose" => false, # verbose optimizer statements
+        "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum
     )
 
     if !isnothing(optimizer_options)
@@ -314,7 +318,8 @@ function build_models!(
     train_fraction = optimizer_options["train_fraction"]
     n_train = Int(floor(train_fraction * n_data))
     n_test = n_data - n_train
-    n_features_opt = min(n_features, Int(floor(10 * n_test)))#Int(floor(2 * n_test))) #we take a specified number of features for optimization.
+    n_features_opt = optimizer_options["n_features_opt"]
+
     idx_shuffle = randperm(rng, n_data)
 
     #regularization = I = 1.0 in scalar case
@@ -386,6 +391,7 @@ function build_models!(
         n_iteration = optimizer_options["n_iteration"]
         opt_verbose_flag = optimizer_options["verbose"]
         scheduler = optimizer_options["scheduler"]
+        accelerator = optimizer_options["accelerator"]
 
         initial_params = construct_initial_ensemble(rng, prior, n_ensemble)
         min_complexity = log(regularization.λ)
@@ -398,6 +404,7 @@ function build_models!(
             Inversion(),
             scheduler = scheduler,
             rng = rng,
+            accelerator = accelerator,
             verbose = opt_verbose_flag,
         )
         err = zeros(n_iteration)
diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl
index 5326d1678..96a0ef231 100644
--- a/src/VectorRandomFeature.jl
+++ b/src/VectorRandomFeature.jl
@@ -144,7 +144,9 @@ Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for
       - "tikhonov": tikhonov regularization parameter if > 0
       - "inflation": additive inflation ∈ [0,1] with 0 being no inflation
       - "train_fraction": e.g. 0.8 (default)  means 80:20 train - test split
+      - "n_features_opt":  fix the number of features for optimization (default `n_features`, as used for prediction)
       - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra
+      - "accelerator": use EKP accelerators (default is no acceleration)
       - "verbose" => false, verbose optimizer statements to check convergence, priors and optimal parameters.
 
 """
@@ -183,9 +185,11 @@ function VectorRandomFeatureInterface(
         "tikhonov" => 0, # tikhonov regularization parameter if >0
         "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation
         "train_fraction" => 0.8, # 80:20 train - test split
+        "n_features_opt" => n_features, # number of features for the optimization 
         "multithread" => "ensemble", # instead of "tullio"
         "verbose" => false, # verbose optimizer statements
         "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles
+        "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum
     )
 
     if !isnothing(optimizer_options)
@@ -384,19 +388,7 @@ function build_models!(
     train_fraction = optimizer_options["train_fraction"]
     n_train = Int(floor(train_fraction * n_data)) # 20% split
     n_test = n_data - n_train
-
-    # there are less hyperparameters when we diagonalize
-    if nameof(typeof(kernel_structure)) == :SeparableKernel
-        if nameof(typeof(get_output_cov_structure(kernel_structure))) == :DiagonalFactor
-            n_features_opt = min(n_features, Int(floor(4 * n_train))) #we take a specified number of features for optimization. MAGIC NUMBER
-        else
-            # note the n_features_opt should NOT exceed output_dim * n_train or the regularization is replaced with a diagonal approx.
-            n_features_opt = max(min(n_features, Int(floor(4 * sqrt(n_train * output_dim)))), Int(floor(1.9 * n_train)))#we take a specified number of features for optimization. MAGIC NUMBER
-        end
-    else
-        # note the n_features_opt should NOT exceed output_dim * n_train or the regularization is replaced with a diagonal approx.
-        n_features_opt = max(min(n_features, Int(floor(4 * sqrt(n_train * output_dim)))), Int(floor(1.9 * n_train)))#we take a specified number of features for optimization. MAGIC NUMBER
-    end
+    n_features_opt = optimizer_options["n_features_opt"]
 
     @info (
         "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features"
@@ -511,6 +503,8 @@ function build_models!(
     opt_verbose_flag = optimizer_options["verbose"]
     scheduler = optimizer_options["scheduler"]
     localization = optimizer_options["localization"]
+    accelerator = optimizer_options["accelerator"]
+
     if !isposdef(Γ)
         Γ = posdef_correct(Γ)
     end
@@ -526,6 +520,7 @@ function build_models!(
         rng = rng,
         verbose = opt_verbose_flag,
         localization_method = localization,
+        accelerator = accelerator,
     )
     err = zeros(n_iteration)
 
diff --git a/test/RandomFeature/runtests.jl b/test/RandomFeature/runtests.jl
index 3e5b1351c..6867040ee 100644
--- a/test/RandomFeature/runtests.jl
+++ b/test/RandomFeature/runtests.jl
@@ -140,10 +140,12 @@ rng = Random.MersenneTwister(seed)
             "n_ensemble" => max(ndims(prior) + 1, 10),
             "n_iteration" => 5,
             "scheduler" => DataMisfitController(),
-            "cov_sample_multiplier" => 10.0,
+            "n_features_opt" => n_features,
+            "cov_sample_multiplier" => 2.0,
             "inflation" => 1e-4,
             "train_fraction" => 0.8,
             "multithread" => "ensemble",
+            "accelerator" => DefaultAccelerator(),
             "verbose" => false,
         )
 
@@ -201,10 +203,12 @@ rng = Random.MersenneTwister(seed)
             "n_iteration" => 5,
             "scheduler" => DataMisfitController(),
             "cov_sample_multiplier" => 10.0,
+            "n_features_opt" => n_features,
             "tikhonov" => 0,
             "inflation" => 1e-4,
             "train_fraction" => 0.8,
             "multithread" => "ensemble",
+            "accelerator" => DefaultAccelerator(),
             "verbose" => false,
             "localization" => EnsembleKalmanProcesses.Localizers.NoLocalization(),
         )

From a7eff328cb655f4e8d11d01a14f842439ebd1ce8 Mon Sep 17 00:00:00 2001
From: odunbar <odunbar@caltech.edu>
Date: Fri, 20 Oct 2023 15:32:40 -0700
Subject: [PATCH 2/4] get latest Julia for buildkite

---
 .buildkite/pipeline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 040ed0e7b..fd82cdefa 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,5 +1,5 @@
 env:
-  JULIA_VERSION: "1.8.0"
+  JULIA_VERSION: "1"
   OPENBLAS_NUM_THREADS: 1
   GKSwstype: nul
 

From 938745da13fee1c5c7aa963cbdbbb2033facd190 Mon Sep 17 00:00:00 2001
From: odunbar <odunbar@caltech.edu>
Date: Fri, 20 Oct 2023 16:15:36 -0700
Subject: [PATCH 3/4] buildkite update

---
 .buildkite/pipeline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index fd82cdefa..e04b64aee 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,5 +1,5 @@
 env:
-  JULIA_VERSION: "1"
+  JULIA_VERSION: "1.9.5"
   OPENBLAS_NUM_THREADS: 1
   GKSwstype: nul
 

From 651c0aaa79b882f588d7b80856151b22a3461dc1 Mon Sep 17 00:00:00 2001
From: odunbar <odunbar@caltech.edu>
Date: Fri, 20 Oct 2023 16:58:16 -0700
Subject: [PATCH 4/4] try 1.9.2

---
 .buildkite/pipeline.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index e04b64aee..3df4d232f 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,5 +1,5 @@
 env:
-  JULIA_VERSION: "1.9.5"
+  JULIA_VERSION: "1.9.2"
   OPENBLAS_NUM_THREADS: 1
   GKSwstype: nul