diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index cc8952d45..59cc0c134 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,5 +1,5 @@ env: - JULIA_VERSION: "1.8.0" + JULIA_VERSION: "1.9.2" OPENBLAS_NUM_THREADS: 1 GKSwstype: nul diff --git a/examples/Emulator/Ishigami/Project.toml b/examples/Emulator/Ishigami/Project.toml new file mode 100644 index 000000000..930071c5b --- /dev/null +++ b/examples/Emulator/Ishigami/Project.toml @@ -0,0 +1,9 @@ +[deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +GlobalSensitivityAnalysis = "1b10255b-6da3-57ce-9089-d24e8517b87e" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/examples/Emulator/Ishigami/emulate.jl b/examples/Emulator/Ishigami/emulate.jl new file mode 100644 index 000000000..b152a9608 --- /dev/null +++ b/examples/Emulator/Ishigami/emulate.jl @@ -0,0 +1,195 @@ + +using GlobalSensitivityAnalysis +const GSA = GlobalSensitivityAnalysis +using Distributions +using DataStructures +using Random +using LinearAlgebra + +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers + +using CairoMakie, ColorSchemes #for plots +seed = 2589456 +#= +#take in parameters x as [3 x pts] matrix +# Classical values (a,b) = (7, 0.05) from Sobol, Levitan 1999 +# also (a,b) = (7, 0.1) from Marrel et al 2009 +function ishigami(x::MM; a = 7.0, b = 0.05) where {MM <: AbstractMatrix} + @assert size(x,1) == 3 + return (1 .+ b * x[3,:].^4) * sin.(x[1,:]) + a * sin.(x[2,:]).^2 +end +=# +function main() + + rng = MersenneTwister(seed) + + n_repeats = 20 # repeat exp with same data. + + # To create the sampling + n_data_gen = 2000 + + data = SobolData( + params = OrderedDict(:x1 => Uniform(-π, π), :x2 => Uniform(-π, π), :x3 => Uniform(-π, π)), + N = n_data_gen, + ) + + # To perform global analysis, + # one must generate samples using Sobol sequence (i.e. creates more than N points) + samples = GSA.sample(data) + n_data = size(samples, 1) # [n_samples x 3] + # run model (example) + y = GSA.ishigami(samples) + # perform Sobol Analysis + result = analyze(data, y) + + f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4) + axx = Axis(f1[1, 1], xlabel = "x1", ylabel = "f") + axy = Axis(f1[1, 2], xlabel = "x2", ylabel = "f") + axz = Axis(f1[1, 3], xlabel = "x3", ylabel = "f") + + scatter!(axx, samples[:, 1], y[:], color = :orange) + scatter!(axy, samples[:, 2], y[:], color = :orange) + scatter!(axz, samples[:, 3], y[:], color = :orange) + + save("ishigami_slices_truth.png", f1, px_per_unit = 3) + save("ishigami_slices_truth.pdf", f1, px_per_unit = 3) + + n_train_pts = 300 + ind = shuffle!(rng, Vector(1:n_data))[1:n_train_pts] + # now subsample the samples data + n_tp = length(ind) + input = zeros(3, n_tp) + output = zeros(1, n_tp) + Γ = 1e-2 + noise = rand(rng, Normal(0, Γ), n_tp) + for i in 1:n_tp + input[:, i] = samples[ind[i], :] + output[i] = y[ind[i]] + noise[i] + end + iopairs = PairedDataContainer(input, output) + + cases = ["Prior", "GP", "RF-scalar"] + case = cases[3] + decorrelate = true + nugget = Float64(1e-12) + + overrides = Dict( + "scheduler" => DataMisfitController(terminate_at = 1e4), + "cov_sample_multiplier" => 1.0, + "n_features_opt" => 100, + "n_iteration" => 10, + ) + if case == "Prior" + # don't do anything + overrides["n_iteration"] = 0 + overrides["cov_sample_multiplier"] = 0.1 + end + + y_preds = [] + result_preds = [] + + for rep_idx in 1:n_repeats + + # Build ML tools + if case == "GP" + gppackage = Emulators.GPJL() + pred_type = Emulators.YType() + mlt = GaussianProcess( + gppackage; + kernel = nothing, # use default squared exponential kernel + prediction_type = pred_type, + noise_learn = false, + ) + + elseif case ∈ ["RF-scalar", "Prior"] + + kernel_structure = SeparableKernel(LowRankFactor(3, nugget), OneDimFactor()) + n_features = 500 + mlt = ScalarRandomFeatureInterface( + n_features, + 3, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, + ) + end + + # Emulate + emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate) + optimize_hyperparameters!(emulator) + + # predict on all Sobol points with emulator (example) + y_pred, y_var = predict(emulator, samples', transform_to_real = true) + + # obtain emulated Sobol indices + result_pred = analyze(data, y_pred') + push!(y_preds, y_pred) + push!(result_preds, result_pred) + + end + + # analytic sobol indices + a = 7 + b = 0.1 + V = a^2 / 8 + b * π^4 / 5 + b^2 * π^8 / 18 + 1 / 2 + V1 = 0.5 * (1 + b * π^4 / 5)^2 + V2 = a^2 / 8 + V3 = 0 + VT1 = 0.5 * (1 + b * π^4 / 5)^2 + 8 * b^2 * π^8 / 225 + VT2 = a^2 / 8 + VT3 = 8 * b^2 * π^8 / 225 + + + println(" ") + println("True Sobol Indices") + println("******************") + println(" firstorder: ", [V1 / V, V2 / V, V3 / V]) + println(" totalorder: ", [VT1 / V, VT2 / V, VT3 / V]) + println(" ") + println("Sampled truth Sobol Indices (# points $n_data)") + println("***************************") + println(" firstorder: ", result[:firstorder]) + println(" totalorder: ", result[:totalorder]) + println(" ") + + println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $Γ)") + println("***************************************************************") + if n_repeats == 1 + println(" firstorder: ", result_preds[1][:firstorder]) + println(" totalorder: ", result_preds[1][:totalorder]) + else + firstorder_mean = mean([rp[:firstorder] for rp in result_preds]) + firstorder_std = std([rp[:firstorder] for rp in result_preds]) + totalorder_mean = mean([rp[:totalorder] for rp in result_preds]) + totalorder_std = std([rp[:totalorder] for rp in result_preds]) + + println("(mean) firstorder: ", firstorder_mean) + println("(std) firstorder: ", firstorder_std) + println("(mean) totalorder: ", totalorder_mean) + println("(std) totalorder: ", totalorder_std) + end + + + # plots + + f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4) + axx_em = Axis(f2[1, 1], xlabel = "x1", ylabel = "f") + axy_em = Axis(f2[1, 2], xlabel = "x2", ylabel = "f") + axz_em = Axis(f2[1, 3], xlabel = "x3", ylabel = "f") + scatter!(axx_em, samples[:, 1], y_preds[1][:], color = :blue) + scatter!(axy_em, samples[:, 2], y_preds[1][:], color = :blue) + scatter!(axz_em, samples[:, 3], y_preds[1][:], color = :blue) + scatter!(axx_em, samples[ind, 1], y[ind] + noise, color = :red, markersize = 8) + scatter!(axy_em, samples[ind, 2], y[ind] + noise, color = :red, markersize = 8) + scatter!(axz_em, samples[ind, 3], y[ind] + noise, color = :red, markersize = 8) + + save("ishigami_slices_$(case).png", f2, px_per_unit = 3) + save("ishigami_slices_$(case).pdf", f2, px_per_unit = 3) + + +end + + +main() diff --git a/examples/Emulator/L63/emulate.jl b/examples/Emulator/L63/emulate.jl index 289314295..659f2a29d 100644 --- a/examples/Emulator/L63/emulate.jl +++ b/examples/Emulator/L63/emulate.jl @@ -5,10 +5,12 @@ using CairoMakie, ColorSchemes #for plots using JLD2 # CES +using CalibrateEmulateSample using CalibrateEmulateSample.Emulators using CalibrateEmulateSample.ParameterDistributions using CalibrateEmulateSample.EnsembleKalmanProcesses using CalibrateEmulateSample.DataContainers +EKP = CalibrateEmulateSample.EnsembleKalmanProcesses function lorenz(du, u, p, t) @@ -47,30 +49,35 @@ function main() # Create training pairs (with noise) from subsampling [burnin,tmax] tburn = 10 burnin = Int(floor(10 / dt)) - n_train_pts = 400 #600 - ind = shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts] + n_train_pts = 600 #600 + ind = Int.(shuffle!(rng, Vector(burnin:(tmax / dt - 1)))[1:n_train_pts]) n_tp = length(ind) input = zeros(3, n_tp) output = zeros(3, n_tp) - Γy = 1e-6 * I(3) + Γy = 1e-4 * I(3) noise = rand(rng, MvNormal(zeros(3), Γy), n_tp) for i in 1:n_tp - input[:, i] = sol.u[i] - output[:, i] = sol.u[i + 1] + noise[:, i] + input[:, i] = sol.u[ind[i]] + output[:, i] = sol.u[ind[i] + 1] + noise[:, i] end iopairs = PairedDataContainer(input, output) # Emulate - cases = ["GP", "RF-scalar", "RF-scalar-diagin", "RF-vector-svd-nonsep"] + cases = + ["GP", "RF-scalar", "RF-scalar-diagin", "RF-vector-svd-nonsep", "RF-vector-nosvd-nonsep", "RF-vector-nosvd-sep"] - case = cases[4] + case = cases[1] decorrelate = true nugget = Float64(1e-12) overrides = Dict( "verbose" => true, - "scheduler" => DataMisfitController(terminate_at = 1e4), - "cov_sample_multiplier" => 2.0, - "n_iteration" => 10, + "scheduler" => DefaultScheduler(1.0),#DataMisfitController(terminate_at = 1e4), + "cov_sample_multiplier" => 0.5, + "n_features_opt" => 200, + "n_iteration" => 20, + # "n_ensemble" => 20, + # "localization" => EKP.Localizers.SEC(1.0,0.1), + "accelerator" => NesterovAccelerator(), ) # Build ML tools @@ -97,9 +104,33 @@ function main() optimizer_options = overrides, ) elseif case ∈ ["RF-vector-svd-nonsep"] - kernel_structure = NonseparableKernel(LowRankFactor(9, nugget)) + kernel_structure = NonseparableKernel(LowRankFactor(6, nugget)) n_features = 500 + mlt = VectorRandomFeatureInterface( + n_features, + 3, + 3, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, + ) + elseif case ∈ ["RF-vector-nosvd-nonsep"] + kernel_structure = NonseparableKernel(LowRankFactor(3, nugget)) + n_features = 500 + decorrelate = false # don't do SVD + mlt = VectorRandomFeatureInterface( + n_features, + 3, + 3, + rng = rng, + kernel_structure = kernel_structure, + optimizer_options = overrides, + ) + elseif case ∈ ["RF-vector-nosvd-sep"] + kernel_structure = SeparableKernel(LowRankFactor(3, nugget), LowRankFactor(3, nugget)) + n_features = 500 + decorrelate = false # don't do SVD mlt = VectorRandomFeatureInterface( n_features, 3, diff --git a/examples/Emulator/RandomFeature/Project.toml b/examples/Emulator/RandomFeature/Project.toml index 9d41c3993..5fa1349f4 100644 --- a/examples/Emulator/RandomFeature/Project.toml +++ b/examples/Emulator/RandomFeature/Project.toml @@ -1,5 +1,4 @@ [deps] -CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl index 2c9ccc736..4c2fdb22d 100644 --- a/src/ScalarRandomFeature.jl +++ b/src/ScalarRandomFeature.jl @@ -116,7 +116,9 @@ Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for - "tikhonov": tikhonov regularization parameter if >0 - "inflation": additive inflation ∈ [0,1] with 0 being no inflation - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split + - "n_features_opt": fix the number of features for optimization (default `n_features`, as used for prediction) - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra + - "accelerator": use EKP accelerators (default is no acceleration) - "verbose" => false, verbose optimizer statements """ function ScalarRandomFeatureInterface( @@ -145,11 +147,13 @@ function ScalarRandomFeatureInterface( "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble "n_iteration" => 5, # number of eki iterations "scheduler" => EKP.DataMisfitController(), # Adaptive timestepping, - "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme + "cov_sample_multiplier" => 2.0, # multiplier for samples to estimate covariance in optimization scheme "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation "train_fraction" => 0.8, # 80:20 train - test split + "n_features_opt" => n_features, # number of features for the optimization "multithread" => "ensemble", # instead of "tullio" "verbose" => false, # verbose optimizer statements + "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum ) if !isnothing(optimizer_options) @@ -314,7 +318,8 @@ function build_models!( train_fraction = optimizer_options["train_fraction"] n_train = Int(floor(train_fraction * n_data)) n_test = n_data - n_train - n_features_opt = min(n_features, Int(floor(10 * n_test)))#Int(floor(2 * n_test))) #we take a specified number of features for optimization. + n_features_opt = optimizer_options["n_features_opt"] + idx_shuffle = randperm(rng, n_data) #regularization = I = 1.0 in scalar case @@ -386,6 +391,7 @@ function build_models!( n_iteration = optimizer_options["n_iteration"] opt_verbose_flag = optimizer_options["verbose"] scheduler = optimizer_options["scheduler"] + accelerator = optimizer_options["accelerator"] initial_params = construct_initial_ensemble(rng, prior, n_ensemble) min_complexity = log(regularization.λ) @@ -398,6 +404,7 @@ function build_models!( Inversion(), scheduler = scheduler, rng = rng, + accelerator = accelerator, verbose = opt_verbose_flag, ) err = zeros(n_iteration) diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl index 5326d1678..96a0ef231 100644 --- a/src/VectorRandomFeature.jl +++ b/src/VectorRandomFeature.jl @@ -144,7 +144,9 @@ Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for - "tikhonov": tikhonov regularization parameter if > 0 - "inflation": additive inflation ∈ [0,1] with 0 being no inflation - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split + - "n_features_opt": fix the number of features for optimization (default `n_features`, as used for prediction) - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra + - "accelerator": use EKP accelerators (default is no acceleration) - "verbose" => false, verbose optimizer statements to check convergence, priors and optimal parameters. """ @@ -183,9 +185,11 @@ function VectorRandomFeatureInterface( "tikhonov" => 0, # tikhonov regularization parameter if >0 "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation "train_fraction" => 0.8, # 80:20 train - test split + "n_features_opt" => n_features, # number of features for the optimization "multithread" => "ensemble", # instead of "tullio" "verbose" => false, # verbose optimizer statements "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles + "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum ) if !isnothing(optimizer_options) @@ -384,19 +388,7 @@ function build_models!( train_fraction = optimizer_options["train_fraction"] n_train = Int(floor(train_fraction * n_data)) # 20% split n_test = n_data - n_train - - # there are less hyperparameters when we diagonalize - if nameof(typeof(kernel_structure)) == :SeparableKernel - if nameof(typeof(get_output_cov_structure(kernel_structure))) == :DiagonalFactor - n_features_opt = min(n_features, Int(floor(4 * n_train))) #we take a specified number of features for optimization. MAGIC NUMBER - else - # note the n_features_opt should NOT exceed output_dim * n_train or the regularization is replaced with a diagonal approx. - n_features_opt = max(min(n_features, Int(floor(4 * sqrt(n_train * output_dim)))), Int(floor(1.9 * n_train)))#we take a specified number of features for optimization. MAGIC NUMBER - end - else - # note the n_features_opt should NOT exceed output_dim * n_train or the regularization is replaced with a diagonal approx. - n_features_opt = max(min(n_features, Int(floor(4 * sqrt(n_train * output_dim)))), Int(floor(1.9 * n_train)))#we take a specified number of features for optimization. MAGIC NUMBER - end + n_features_opt = optimizer_options["n_features_opt"] @info ( "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features" @@ -511,6 +503,8 @@ function build_models!( opt_verbose_flag = optimizer_options["verbose"] scheduler = optimizer_options["scheduler"] localization = optimizer_options["localization"] + accelerator = optimizer_options["accelerator"] + if !isposdef(Γ) Γ = posdef_correct(Γ) end @@ -526,6 +520,7 @@ function build_models!( rng = rng, verbose = opt_verbose_flag, localization_method = localization, + accelerator = accelerator, ) err = zeros(n_iteration) diff --git a/test/RandomFeature/runtests.jl b/test/RandomFeature/runtests.jl index 3e5b1351c..6867040ee 100644 --- a/test/RandomFeature/runtests.jl +++ b/test/RandomFeature/runtests.jl @@ -140,10 +140,12 @@ rng = Random.MersenneTwister(seed) "n_ensemble" => max(ndims(prior) + 1, 10), "n_iteration" => 5, "scheduler" => DataMisfitController(), - "cov_sample_multiplier" => 10.0, + "n_features_opt" => n_features, + "cov_sample_multiplier" => 2.0, "inflation" => 1e-4, "train_fraction" => 0.8, "multithread" => "ensemble", + "accelerator" => DefaultAccelerator(), "verbose" => false, ) @@ -201,10 +203,12 @@ rng = Random.MersenneTwister(seed) "n_iteration" => 5, "scheduler" => DataMisfitController(), "cov_sample_multiplier" => 10.0, + "n_features_opt" => n_features, "tikhonov" => 0, "inflation" => 1e-4, "train_fraction" => 0.8, "multithread" => "ensemble", + "accelerator" => DefaultAccelerator(), "verbose" => false, "localization" => EnsembleKalmanProcesses.Localizers.NoLocalization(), )