diff --git a/examples/EDMF_data/emulator-rank-test.jl b/examples/EDMF_data/emulator-rank-test.jl index 16728b35..f26a07ef 100644 --- a/examples/EDMF_data/emulator-rank-test.jl +++ b/examples/EDMF_data/emulator-rank-test.jl @@ -51,7 +51,7 @@ function main() n_iteration = 10 - + # Output figure save directory figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(Dates.today())) data_save_directory = joinpath(@__DIR__, "output", exp_name, string(Dates.today())) @@ -136,43 +136,43 @@ function main() truth_cov = truth_cov[good_datadim, good_datadim] # split out a training set: - n_test = Int(floor(0.2*length(good_particles))) - n_train = Int(ceil(0.8*length(good_particles))) - + n_test = Int(floor(0.2 * length(good_particles))) + n_train = Int(ceil(0.8 * length(good_particles))) + rng_seed = 14225 rng = Random.MersenneTwister(rng_seed) # random train-set # train_idx = shuffle(rng, collect(1:n_test+n_train))[1:n_train] # final train earlier iteration, test final iteration - train_idx = collect(1:n_test+n_train)[1:n_train] - test_idx = setdiff(collect(1:n_test+n_train),train_idx) - - train_inputs = inputs[:,train_idx] - train_outputs = outputs[:,train_idx] - - test_inputs = inputs[:,test_idx] - test_outputs = outputs[:,test_idx] - + train_idx = collect(1:(n_test + n_train))[1:n_train] + test_idx = setdiff(collect(1:(n_test + n_train)), train_idx) + + train_inputs = inputs[:, train_idx] + train_outputs = outputs[:, train_idx] + + test_inputs = inputs[:, test_idx] + test_outputs = outputs[:, test_idx] + train_pairs = DataContainers.PairedDataContainer(train_inputs, train_outputs) end - + @info "Completed data loading stage" println(" ") ############################################## # [3. ] Build Emulator from calibration data # ############################################## - opt_diagnostics = zeros(length(rank_test),n_repeats,n_iteration) - train_err = zeros(length(rank_test),n_repeats) - test_err = zeros(length(rank_test),n_repeats) - ttt = zeros(length(rank_test),n_repeats) + opt_diagnostics = zeros(length(rank_test), n_repeats, n_iteration) + train_err = zeros(length(rank_test), n_repeats) + test_err = zeros(length(rank_test), n_repeats) + ttt = zeros(length(rank_test), n_repeats) @info "Begin Emulation stage" # Create GP object train_frac = 0.9 n_cross_val_sets = 2 - max_feature_size=size(get_outputs(train_pairs), 2) * size(get_outputs(train_pairs),1) * (1-train_frac) + max_feature_size = size(get_outputs(train_pairs), 2) * size(get_outputs(train_pairs), 1) * (1 - train_frac) for (rank_id, rank_val) in enumerate(rank_test) #test over different ranks for svd-nonsep rng_seed = 99330 rng = Random.MersenneTwister(rng_seed) @@ -186,10 +186,10 @@ function main() "scheduler" => DataMisfitController(terminate_at = 1000), "cov_sample_multiplier" => 0.2, "n_iteration" => n_iteration, - "n_features_opt" => Int(floor((max_feature_size/5))),# here: /5 with rank <= 3 works + "n_features_opt" => Int(floor((max_feature_size / 5))),# here: /5 with rank <= 3 works "localization" => SEC(0.05), "n_ensemble" => 400, - "n_cross_val_sets" => n_cross_val_sets, + "n_cross_val_sets" => n_cross_val_sets, ) if case == "RF-prior" overrides = Dict("verbose" => true, "cov_sample_multiplier" => 0.01, "n_iteration" => 0) @@ -200,7 +200,7 @@ function main() decorrelate = true if case == "GP" - rank_val=0 + rank_val = 0 gppackage = Emulators.SKLJL() pred_type = Emulators.YType() mlt = GaussianProcess( @@ -210,9 +210,9 @@ function main() noise_learn = false, ) elseif case ∈ ["RF-vector-svd-sep"] - kernel_structure = SeparableKernel(LowRankFactor(5, nugget),LowRankFactor(rank_val,nugget)) + kernel_structure = SeparableKernel(LowRankFactor(5, nugget), LowRankFactor(rank_val, nugget)) n_features = 500 - + mlt = VectorRandomFeatureInterface( n_features, input_dim, @@ -221,11 +221,11 @@ function main() kernel_structure = kernel_structure, optimizer_options = overrides, ) - + elseif case ∈ ["RF-vector-svd-nonsep", "RF-prior"] kernel_structure = NonseparableKernel(LowRankFactor(rank_val, nugget)) n_features = 500 - + mlt = VectorRandomFeatureInterface( n_features, input_dim, @@ -237,7 +237,7 @@ function main() elseif case ∈ ["RF-vector-nosvd-nonsep"] kernel_structure = NonseparableKernel(LowRankFactor(rank_val, nugget)) n_features = 500 - + mlt = VectorRandomFeatureInterface( n_features, input_dim, @@ -248,11 +248,11 @@ function main() ) decorrelate = false end - + # Fit an emulator to the data normalized = true - - ttt[rank_id,rep_idx] = @elapsed begin + + ttt[rank_id, rep_idx] = @elapsed begin emulator = Emulator( mlt, train_pairs; @@ -260,7 +260,7 @@ function main() normalize_inputs = normalized, decorrelate = decorrelate, ) - + # Optimize the GP hyperparameters for better fit optimize_hyperparameters!(emulator) end @@ -271,7 +271,7 @@ function main() train_mean, _ = Emulators.predict(emulator, train_inputs[:, i:i], transform_to_real = true) # 3x1 train_err_tmp[1] += norm(train_mean - train_outputs[:, i]) end - train_err[rank_id,rep_idx] = 1 / size(train_inputs, 2) * train_err_tmp[1] + train_err[rank_id, rep_idx] = 1 / size(train_inputs, 2) * train_err_tmp[1] # error of emulator on test points: test_err_tmp = [0.0] @@ -279,32 +279,36 @@ function main() test_mean, _ = Emulators.predict(emulator, test_inputs[:, i:i], transform_to_real = true) # 3x1 test_err_tmp[1] += norm(test_mean - test_outputs[:, i]) end - test_err[rank_id,rep_idx] = 1 / size(test_inputs, 2) * test_err_tmp[1] + test_err[rank_id, rep_idx] = 1 / size(test_inputs, 2) * test_err_tmp[1] @info "train error ($(size(train_inputs,2)) pts): $(train_err[rank_id,rep_idx])" @info "test error ($(size(test_inputs, 2)) pts): $(test_err[rank_id,rep_idx])" if !(case ∈ ["GP", "RF-prior"]) - opt_diagnostics[rank_id,rep_idx,:] = get_optimizer(mlt)[1] #length-1 vec of vec -> vec + opt_diagnostics[rank_id, rep_idx, :] = get_optimizer(mlt)[1] #length-1 vec of vec -> vec eki_conv_filepath = joinpath(data_save_directory, "$(case)_$(rank_val)_eki-conv.jld2") save(eki_conv_filepath, "opt_diagnostics", opt_diagnostics) end -# emulator_filepath = joinpath(data_save_directory, "$(case)_$(rank_val)_emulator.jld2") -# save(emulator_filepath, "emulator", emulator) + # emulator_filepath = joinpath(data_save_directory, "$(case)_$(rank_val)_emulator.jld2") + # save(emulator_filepath, "emulator", emulator) JLD2.save( - joinpath(data_save_directory, case * "_edmf_rank_test_results.jld2"), - "rank_test", collect(rank_test), - "timings", ttt, - "train_err", train_err, - "test_err", test_err, - ) + joinpath(data_save_directory, case * "_edmf_rank_test_results.jld2"), + "rank_test", + collect(rank_test), + "timings", + ttt, + "train_err", + train_err, + "test_err", + test_err, + ) end end - - - + + + @info "Finished Emulation stage" end diff --git a/examples/EDMF_data/plot_posterior.jl b/examples/EDMF_data/plot_posterior.jl index ac44b902..0133c00c 100644 --- a/examples/EDMF_data/plot_posterior.jl +++ b/examples/EDMF_data/plot_posterior.jl @@ -7,176 +7,177 @@ using Dates using CalibrateEmulateSample.ParameterDistributions function main() -##### -# Creates 1 plots: One for a specific case, One with 2 cases, and One with all cases (final case being the prior). - - -# date = Date(year,month,day) - -# 2-parameter calibration exp -#exp_name = "ent-det-calibration" -#date_of_run = Date(2023, 10, 5) - -# 5-parameter calibration exp -exp_name = "ent-det-tked-tkee-stab-calibration" -date_of_run = Date(2024, 11, 03) -@info "plotting results found in $(date_of_run)" -# Output figure read/write directory -figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) -data_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) - -#case: -cases = [ - "GP", # diagonalize, train scalar GP, assume diag inputs - "RF-prior", - "RF-vector-svd-nonsep", - "RF-vector-svd-sep", - "RF-vector-nosvd-nonsep", -] -case_rf = cases[3] -kernel_rank = 3 -prior_kernel_rank = 3 -# load -posterior_filepath = joinpath(data_save_directory, "$(case_rf)_$(kernel_rank)_posterior.jld2") -if !isfile(posterior_filepath) - throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) -else - @info "Loading posterior distribution from: " * posterior_filepath - posterior = load(posterior_filepath)["posterior"] -end -# get samples explicitly (may be easier to work with) -posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns -transformed_posterior_samples = - mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) - -# histograms -nparam_plots = sum(get_dimensions(posterior)) - 1 -density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(kernel_rank)_posterior_dist_comp.png") -transformed_density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(kernel_rank)_posterior_dist_phys.png") -labels = get_name(posterior) - -burnin = 50_000 - - -data_rf = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) -transformed_data_rf = - (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) - - - -# p = pairplot(data_rf => (PairPlots.Scatter(),)) - -p = pairplot(data_rf => (PairPlots.Contourf(sigmas = 1:1:3),)) -trans_p = pairplot(transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),)) - -save(density_filepath, p) -save(transformed_density_filepath, trans_p) -@info "saved RF contour plot" - -# -# -# + ##### + # Creates 1 plots: One for a specific case, One with 2 cases, and One with all cases (final case being the prior). + + + # date = Date(year,month,day) + + # 2-parameter calibration exp + #exp_name = "ent-det-calibration" + #date_of_run = Date(2023, 10, 5) + + # 5-parameter calibration exp + exp_name = "ent-det-tked-tkee-stab-calibration" + date_of_run = Date(2024, 11, 03) + @info "plotting results found in $(date_of_run)" + # Output figure read/write directory + figure_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) + data_save_directory = joinpath(@__DIR__, "output", exp_name, string(date_of_run)) + + #case: + cases = [ + "GP", # diagonalize, train scalar GP, assume diag inputs + "RF-prior", + "RF-vector-svd-nonsep", + "RF-vector-svd-sep", + "RF-vector-nosvd-nonsep", + ] + case_rf = cases[3] + kernel_rank = 3 + prior_kernel_rank = 3 + # load + posterior_filepath = joinpath(data_save_directory, "$(case_rf)_$(kernel_rank)_posterior.jld2") + if !isfile(posterior_filepath) + throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) + else + @info "Loading posterior distribution from: " * posterior_filepath + posterior = load(posterior_filepath)["posterior"] + end + # get samples explicitly (may be easier to work with) + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + + # histograms + nparam_plots = sum(get_dimensions(posterior)) - 1 + density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(kernel_rank)_posterior_dist_comp.png") + transformed_density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(kernel_rank)_posterior_dist_phys.png") + labels = get_name(posterior) + + burnin = 50_000 + + + data_rf = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + transformed_data_rf = + (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + + + + # p = pairplot(data_rf => (PairPlots.Scatter(),)) + + p = pairplot(data_rf => (PairPlots.Contourf(sigmas = 1:1:3),)) + trans_p = pairplot(transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),)) -case_gp = cases[1] -# load -posterior_filepath = joinpath(data_save_directory, "$(case_gp)_0_posterior.jld2") -if !isfile(posterior_filepath) - throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) -else - @info "Loading posterior distribution from: " * posterior_filepath - posterior = load(posterior_filepath)["posterior"] -end -# get samples explicitly (may be easier to work with) -posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns -transformed_posterior_samples = - mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) - -# histograms -nparam_plots = sum(get_dimensions(posterior)) - 1 -density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_$(kernel_rank)_posterior_dist_comp.png") -transformed_density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_$(kernel_rank)_posterior_dist_phys.png") -labels = get_name(posterior) -data_gp = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) -transformed_data_gp = - (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) -# -# -# -gp_smoothing = 1 # >1 = smoothing KDE in plotting - -@warn "Contourf in PairPlots.jl for multiple histograms works in v2.8.0. Check versions if not seen correctly." -p = pairplot( - data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), - data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), -) + save(density_filepath, p) + save(transformed_density_filepath, trans_p) + @info "saved RF contour plot" + + # + # + # + + case_gp = cases[1] + # load + posterior_filepath = joinpath(data_save_directory, "$(case_gp)_0_posterior.jld2") + if !isfile(posterior_filepath) + throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) + else + @info "Loading posterior distribution from: " * posterior_filepath + posterior = load(posterior_filepath)["posterior"] + end + # get samples explicitly (may be easier to work with) + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + + # histograms + nparam_plots = sum(get_dimensions(posterior)) - 1 + density_filepath = joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_$(kernel_rank)_posterior_dist_comp.png") + transformed_density_filepath = + joinpath(figure_save_directory, "$(case_rf)_$(case_gp)_$(kernel_rank)_posterior_dist_phys.png") + labels = get_name(posterior) + data_gp = (; [(Symbol(labels[i]), posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + transformed_data_gp = + (; [(Symbol(labels[i]), transformed_posterior_samples[i, burnin:end]) for i in 1:length(labels)]...) + # + # + # + gp_smoothing = 1 # >1 = smoothing KDE in plotting + + @warn "Contourf in PairPlots.jl for multiple histograms works in v2.8.0. Check versions if not seen correctly." + p = pairplot( + data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + ) trans_p = pairplot( - transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), - transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), -) -#= - trans_p = pairplot( - Series((transformed_data_rf, color=Makie.wong_colors(0.5)[1]) => (PairPlots.Contourf(sigmas = 1:1:3)),), - Series((transformed_data_gp, color=Makie.wong_colors(0.5)[2]) => (PairPlots.Contourf(sigmas = 1:1:3)),), + transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), ) -=# + #= + trans_p = pairplot( + Series((transformed_data_rf, color=Makie.wong_colors(0.5)[1]) => (PairPlots.Contourf(sigmas = 1:1:3)),), + Series((transformed_data_gp, color=Makie.wong_colors(0.5)[2]) => (PairPlots.Contourf(sigmas = 1:1:3)),), + ) + =# save(density_filepath, p) -save(transformed_density_filepath, trans_p) + save(transformed_density_filepath, trans_p) + + @info "saved RF/GP contour plot" + + + # Finally include the prior too + case_prior = cases[2] + # load + posterior_filepath = joinpath(data_save_directory, "$(case_prior)_$(prior_kernel_rank)_posterior.jld2") + if !isfile(posterior_filepath) + throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) + else + @info "Loading posterior distribution from: " * posterior_filepath + posterior = load(posterior_filepath)["posterior"] + end + # get samples explicitly (may be easier to work with) + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + + # histograms + nparam_plots = sum(get_dimensions(posterior)) - 1 + density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_comp.png") + transformed_density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_phys.png") + labels = get_name(posterior) + + prior_burnin = 240000 + + data_prior = (; [(Symbol(labels[i]), posterior_samples[i, prior_burnin:end]) for i in 1:length(labels)]...) + transformed_data_prior = + (; [(Symbol(labels[i]), transformed_posterior_samples[i, prior_burnin:end]) for i in 1:length(labels)]...) + # + # + # + + p = pairplot( + data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + data_prior => (PairPlots.Scatter(),), + ) + trans_p = pairplot( + transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), + transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), + transformed_data_prior => (PairPlots.Scatter(),), + ) -@info "saved RF/GP contour plot" + save(density_filepath, p) + save(transformed_density_filepath, trans_p) + density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_comp.pdf") + transformed_density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_phys.pdf") + save(density_filepath, p) + save(transformed_density_filepath, trans_p) -# Finally include the prior too -case_prior = cases[2] -# load -posterior_filepath = joinpath(data_save_directory, "$(case_prior)_$(prior_kernel_rank)_posterior.jld2") -if !isfile(posterior_filepath) - throw(ArgumentError(posterior_filepath * " not found. Please check experiment name and date")) -else - @info "Loading posterior distribution from: " * posterior_filepath - posterior = load(posterior_filepath)["posterior"] -end -# get samples explicitly (may be easier to work with) -posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns -transformed_posterior_samples = - mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) - -# histograms -nparam_plots = sum(get_dimensions(posterior)) - 1 -density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_comp.png") -transformed_density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_phys.png") -labels = get_name(posterior) - -prior_burnin = 240000 - -data_prior = (; [(Symbol(labels[i]), posterior_samples[i, prior_burnin:end]) for i in 1:length(labels)]...) -transformed_data_prior = - (; [(Symbol(labels[i]), transformed_posterior_samples[i, prior_burnin:end]) for i in 1:length(labels)]...) -# -# -# - -p = pairplot( - data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), - data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), - data_prior => (PairPlots.Scatter(),), -) -trans_p = pairplot( - transformed_data_rf => (PairPlots.Contourf(sigmas = 1:1:3),), - transformed_data_gp => (PairPlots.Contourf(sigmas = 1:1:3, bandwidth = gp_smoothing),), - transformed_data_prior => (PairPlots.Scatter(),), -) - -save(density_filepath, p) -save(transformed_density_filepath, trans_p) - -density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_comp.pdf") -transformed_density_filepath = joinpath(figure_save_directory, "all_$(kernel_rank)_posterior_dist_phys.pdf") -save(density_filepath, p) -save(transformed_density_filepath, trans_p) - -@info "saved RF/Prior/GP contour plot" + @info "saved RF/Prior/GP contour plot" end main() diff --git a/examples/EDMF_data/uq_for_edmf.jl b/examples/EDMF_data/uq_for_edmf.jl index 80516cbc..5deb7811 100644 --- a/examples/EDMF_data/uq_for_edmf.jl +++ b/examples/EDMF_data/uq_for_edmf.jl @@ -202,17 +202,18 @@ function main() kernel_rank = 3 # svd-sep should be kr - 5 (as input rank is set to 5) n_cross_val_sets = 2 @info "Kernel rank: $(kernel_rank)" - max_feature_size=size(get_outputs(input_output_pairs), 2) * size(get_outputs(input_output_pairs),1) * (1-train_frac) + max_feature_size = + size(get_outputs(input_output_pairs), 2) * size(get_outputs(input_output_pairs), 1) * (1 - train_frac) overrides = Dict( "verbose" => true, "train_fraction" => train_frac, "scheduler" => DataMisfitController(terminate_at = 1000), "cov_sample_multiplier" => 0.2, "n_iteration" => 15, - "n_features_opt" => Int(floor((max_feature_size/5))),# here: /5 with rank <= 3 works - "localization" => SEC(0.05), + "n_features_opt" => Int(floor((max_feature_size / 5))),# here: /5 with rank <= 3 works + "localization" => SEC(0.05), "n_ensemble" => 400, - "n_cross_val_sets" => n_cross_val_sets, + "n_cross_val_sets" => n_cross_val_sets, ) if case == "RF-prior" overrides = Dict("verbose" => true, "cov_sample_multiplier" => 0.01, "n_iteration" => 0) @@ -225,7 +226,7 @@ function main() decorrelate = true opt_diagnostics = [] if case == "GP" - kernel_rank=0 + kernel_rank = 0 gppackage = Emulators.SKLJL() pred_type = Emulators.YType() mlt = GaussianProcess( @@ -235,7 +236,7 @@ function main() noise_learn = false, ) elseif case ∈ ["RF-vector-svd-sep"] - kernel_structure = SeparableKernel(LowRankFactor(5, nugget),LowRankFactor(kernel_rank,nugget)) + kernel_structure = SeparableKernel(LowRankFactor(5, nugget), LowRankFactor(kernel_rank, nugget)) n_features = 500 mlt = VectorRandomFeatureInterface( @@ -246,7 +247,7 @@ function main() kernel_structure = kernel_structure, optimizer_options = overrides, ) - + elseif case ∈ ["RF-vector-svd-nonsep", "RF-prior"] kernel_structure = NonseparableKernel(LowRankFactor(kernel_rank, nugget)) n_features = 500 @@ -320,7 +321,7 @@ function main() emulator_filepath = joinpath(data_save_directory, "$(case)_$(kernel_rank)_emulator.jld2") save(emulator_filepath, "emulator", emulator) - if length(opt_diagnostics)>0 + if length(opt_diagnostics) > 0 eki_conv_filepath = joinpath(data_save_directory, "$(case)_$(kernel_rank)_eki-conv.jld2") save(eki_conv_filepath, "opt_diagnostics", opt_diagnostics) end diff --git a/examples/Emulator/G-function/emulate-test-n-features.jl b/examples/Emulator/G-function/emulate-test-n-features.jl index 2a3651b9..042694fb 100644 --- a/examples/Emulator/G-function/emulate-test-n-features.jl +++ b/examples/Emulator/G-function/emulate-test-n-features.jl @@ -99,16 +99,16 @@ function main() nugget = Float64(1e-12) n_features_vec = [25, 50, 100, 200, 400, 800] # < data points ideally as output dim = 1 - ttt = zeros(length(n_features_vec),n_repeats) - train_err = zeros(length(n_features_vec),n_repeats) - test_err = zeros(length(n_features_vec),n_repeats) + ttt = zeros(length(n_features_vec), n_repeats) + train_err = zeros(length(n_features_vec), n_repeats) + test_err = zeros(length(n_features_vec), n_repeats) n_cross_val_sets = 2 - + for (f_idx, n_features_opt) in enumerate(n_features_vec) y_preds = [] result_preds = [] - + overrides = Dict( #"verbose" => true, "scheduler" => DataMisfitController(terminate_at = 1e3), @@ -125,7 +125,7 @@ function main() overrides["n_iteration"] = 0 overrides["cov_sample_multiplier"] = 0.1 end - + for rep_idx in 1:n_repeats @info "Testing #features = $(n_features_opt) \n repeat $(rep_idx) of $(n_repeats)" # Build ML tools @@ -149,163 +149,170 @@ function main() optimizer_options = deepcopy(overrides), ) end - + # Emulate ttt[f_idx, rep_idx] = @elapsed begin emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate) optimize_hyperparameters!(emulator) end - + # errors: # training error y_pred, y_var = predict(emulator, get_inputs(iopairs), transform_to_real = true) - train_err[f_idx,rep_idx] = sqrt(sum((y_pred - get_outputs(iopairs)).^2))/n_train_pts + train_err[f_idx, rep_idx] = sqrt(sum((y_pred - get_outputs(iopairs)) .^ 2)) / n_train_pts # predict on all test points with emulator (example) y_pred, y_var = predict(emulator, samples', transform_to_real = true) #predict on all points - ind_test = ind_total[n_train_pts+1:end] - test_err[f_idx,rep_idx] = sqrt(sum((y_pred[ind_test] - y[ind_test]).^2))/length(ind_test) + ind_test = ind_total[(n_train_pts + 1):end] + test_err[f_idx, rep_idx] = sqrt(sum((y_pred[ind_test] - y[ind_test]) .^ 2)) / length(ind_test) JLD2.save( - joinpath(output_directory, "diff_n_features_GFunction_$(case)_$(n_dimensions)_ntest-$(Int(n_train_pts/5))_cv-$(n_cross_val_sets).jld2"), - "n_features_vec", n_features_vec, - "timings", ttt, - "train_err", train_err, - "test_err", test_err, + joinpath( + output_directory, + "diff_n_features_GFunction_$(case)_$(n_dimensions)_ntest-$(Int(n_train_pts/5))_cv-$(n_cross_val_sets).jld2", + ), + "n_features_vec", + n_features_vec, + "timings", + ttt, + "train_err", + train_err, + "test_err", + test_err, ) #save every iteration for safety - + # obtain emulated Sobol indices result_pred = analyze(data, y_pred') println("First order: ", result_pred[:firstorder]) println("Total order: ", result_pred[:totalorder]) - + push!(y_preds, y_pred) push!(result_preds, result_pred) GC.gc() #collect garbage - - # PLotting: -#= if rep_idx == 1 - f3, ax3, plt3 = scatter( - 1:n_dimensions, - result_preds[1][:firstorder]; - color = :red, - markersize = 8, - marker = :cross, - label = "V-emulate", - title = "input dimension: $(n_dimensions)", - ) - scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") - scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") - scatter!( - ax3, - 1:n_dimensions, - result_preds[1][:totalorder]; - color = :blue, - label = "TV-emulate", - markersize = 8, - marker = :cross, - ) - scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") - scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") - axislegend(ax3) - - CairoMakie.save( - joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), - f3, - px_per_unit = 3, - ) - CairoMakie.save( - joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), - f3, - px_per_unit = 3, - ) - else - # get percentiles: - fo_mat = zeros(n_dimensions, rep_idx) - to_mat = zeros(n_dimensions, rep_idx) - - for (idx, rp) in enumerate(result_preds) - fo_mat[:, idx] = rp[:firstorder] - to_mat[:, idx] = rp[:totalorder] - end - - firstorder_med = percentile.(eachrow(fo_mat), 50) - firstorder_low = percentile.(eachrow(fo_mat), 5) - firstorder_up = percentile.(eachrow(fo_mat), 95) - - totalorder_med = percentile.(eachrow(to_mat), 50) - totalorder_low = percentile.(eachrow(to_mat), 5) - totalorder_up = percentile.(eachrow(to_mat), 95) - - println("(50%) firstorder: ", firstorder_med) - println("(5%) firstorder: ", firstorder_low) - println("(95%) firstorder: ", firstorder_up) - - println("(50%) totalorder: ", totalorder_med) - println("(5%) totalorder: ", totalorder_low) - println("(95%) totalorder: ", totalorder_up) - # - f3, ax3, plt3 = errorbars( - 1:n_dimensions, - firstorder_med, - firstorder_med - firstorder_low, - firstorder_up - firstorder_med; - whiskerwidth = 10, - color = :red, - label = "V-emulate", - title = "input dimension: $(n_dimensions)", - ) - scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") - scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") - errorbars!( - ax3, - 1:n_dimensions, - totalorder_med, - totalorder_med - totalorder_low, - totalorder_up - totalorder_med; - whiskerwidth = 10, - color = :blue, - label = "TV-emulate", - ) - scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") - scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") - axislegend(ax3) - - CairoMakie.save( - joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), - f3, - px_per_unit = 3, - ) - CairoMakie.save( - joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), - f3, - px_per_unit = 3, - ) - end - # plots - first 3 dimensions - if rep_idx == 1 - f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) - for i in 1:plot_dim - ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f") - scatter!(ax2, samples[:, i], y_preds[1][:], color = :blue) - scatter!(ax2, samples[ind, i], y[ind] + noise, color = :red, markersize = 8) - end - CairoMakie.save( - joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png"), - f2, - px_per_unit = 3, - ) - CairoMakie.save( - joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf"), - f2, - px_per_unit = 3, - ) - end - - end - =# + # PLotting: + #= if rep_idx == 1 + f3, ax3, plt3 = scatter( + 1:n_dimensions, + result_preds[1][:firstorder]; + color = :red, + markersize = 8, + marker = :cross, + label = "V-emulate", + title = "input dimension: $(n_dimensions)", + ) + scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + scatter!( + ax3, + 1:n_dimensions, + result_preds[1][:totalorder]; + color = :blue, + label = "TV-emulate", + markersize = 8, + marker = :cross, + ) + scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + axislegend(ax3) + + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), + f3, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), + f3, + px_per_unit = 3, + ) + else + # get percentiles: + fo_mat = zeros(n_dimensions, rep_idx) + to_mat = zeros(n_dimensions, rep_idx) + + for (idx, rp) in enumerate(result_preds) + fo_mat[:, idx] = rp[:firstorder] + to_mat[:, idx] = rp[:totalorder] + end + + firstorder_med = percentile.(eachrow(fo_mat), 50) + firstorder_low = percentile.(eachrow(fo_mat), 5) + firstorder_up = percentile.(eachrow(fo_mat), 95) + + totalorder_med = percentile.(eachrow(to_mat), 50) + totalorder_low = percentile.(eachrow(to_mat), 5) + totalorder_up = percentile.(eachrow(to_mat), 95) + + println("(50%) firstorder: ", firstorder_med) + println("(5%) firstorder: ", firstorder_low) + println("(95%) firstorder: ", firstorder_up) + + println("(50%) totalorder: ", totalorder_med) + println("(5%) totalorder: ", totalorder_low) + println("(95%) totalorder: ", totalorder_up) + # + f3, ax3, plt3 = errorbars( + 1:n_dimensions, + firstorder_med, + firstorder_med - firstorder_low, + firstorder_up - firstorder_med; + whiskerwidth = 10, + color = :red, + label = "V-emulate", + title = "input dimension: $(n_dimensions)", + ) + scatter!(ax3, result[:firstorder], color = :red, markersize = 8, label = "V-approx") + scatter!(ax3, V, color = :red, markersize = 12, marker = :xcross, label = "V-true") + errorbars!( + ax3, + 1:n_dimensions, + totalorder_med, + totalorder_med - totalorder_low, + totalorder_up - totalorder_med; + whiskerwidth = 10, + color = :blue, + label = "TV-emulate", + ) + scatter!(ax3, result[:totalorder], color = :blue, markersize = 8, label = "TV-approx") + scatter!(ax3, TV, color = :blue, markersize = 12, marker = :xcross, label = "TV-true") + axislegend(ax3) + + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).png"), + f3, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_sens_$(case)_$(n_dimensions).pdf"), + f3, + px_per_unit = 3, + ) + + end + # plots - first 3 dimensions + if rep_idx == 1 + f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) + for i in 1:plot_dim + ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax2, samples[:, i], y_preds[1][:], color = :blue) + scatter!(ax2, samples[ind, i], y[ind] + noise, color = :red, markersize = 8) + end + CairoMakie.save( + joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).png"), + f2, + px_per_unit = 3, + ) + CairoMakie.save( + joinpath(output_directory, "GFunction_slices_$(case)_$(n_dimensions).pdf"), + f2, + px_per_unit = 3, + ) + end + + end + =# end end diff --git a/examples/Emulator/G-function/emulate.jl b/examples/Emulator/G-function/emulate.jl index aa1c117d..23007f5d 100644 --- a/examples/Emulator/G-function/emulate.jl +++ b/examples/Emulator/G-function/emulate.jl @@ -1,110 +1,110 @@ - using GlobalSensitivityAnalysis - const GSA = GlobalSensitivityAnalysis - using Distributions - using DataStructures - using Random - using LinearAlgebra - import StatsBase: percentile - using JLD2 - - using CalibrateEmulateSample.EnsembleKalmanProcesses - using CalibrateEmulateSample.Emulators - using CalibrateEmulateSample.DataContainers - using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers - - using CairoMakie, ColorSchemes #for plots - seed = 2589436 - - output_directory = joinpath(@__DIR__, "output") - if !isdir(output_directory) - mkdir(output_directory) - end - - - inner_func(x::AV, a::AV) where {AV <: AbstractVector} = prod((abs.(4 * x .- 2) + a) ./ (1 .+ a)) - - "G-Function taken from https://www.sfu.ca/~ssurjano/gfunc.html" - function GFunction(x::AM, a::AV) where {AM <: AbstractMatrix, AV <: AbstractVector} - @assert size(x, 1) == length(a) - return mapslices(y -> inner_func(y, a), x; dims = 1) #applys the map to columns - end - - function GFunction(x::AM) where {AM <: AbstractMatrix} - a = [(i - 1.0) / 2.0 for i in 1:size(x, 1)] - return GFunction(x, a) - end - - function main() - - rng = MersenneTwister(seed) - - n_repeats = 20# repeat exp with same data. - n_dimensions = 20 - # To create the sampling - n_data_gen = 800 - - data = - SobolData(params = OrderedDict([Pair(Symbol("x", i), Uniform(0, 1)) for i in 1:n_dimensions]), N = n_data_gen) - - # To perform global analysis, - # one must generate samples using Sobol sequence (i.e. creates more than N points) - samples = GSA.sample(data) - n_data = size(samples, 1) # [n_samples x n_dim] - println("number of sobol points: ", n_data) - # run model (example) - y = GFunction(samples')' # G is applied to columns - # perform Sobol Analysis - result = analyze(data, y) - - # plot the first 3 dimensions - plot_dim = n_dimensions >= 3 ? 3 : n_dimensions - f1 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) - for i in 1:plot_dim - ax = Axis(f1[1, i], xlabel = "x" * string(i), ylabel = "f") - scatter!(ax, samples[:, i], y[:], color = :orange) - end - - CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).png"), f1, px_per_unit = 3) - CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).pdf"), f1, px_per_unit = 3) - - n_train_pts = n_dimensions * 250 - ind = shuffle!(rng, Vector(1:n_data))[1:n_train_pts] - # now subsample the samples data - n_tp = length(ind) - input = zeros(n_dimensions, n_tp) - output = zeros(1, n_tp) - Γ = 1e-3 - noise = rand(rng, Normal(0, Γ), n_tp) - for i in 1:n_tp - input[:, i] = samples[ind[i], :] - output[i] = y[ind[i]] + noise[i] - end - iopairs = PairedDataContainer(input, output) - - # analytic sobol indices taken from - # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8989694/pdf/main.pdf - a = [(i - 1.0) / 2.0 for i in 1:n_dimensions] # a_i < a_j => a_i more sensitive - prod_tmp = prod(1 .+ 1 ./ (3 .* (1 .+ a) .^ 2)) - 1 - V = [(1 / (3 * (1 + ai)^2)) / prod_tmp for ai in a] - prod_tmp2 = [prod(1 .+ 1 ./ (3 .* (1 .+ a[1:end .!== j]) .^ 2)) for j in 1:n_dimensions] - TV = [(1 / (3 * (1 + ai)^2)) * prod_tmp2[i] / prod_tmp for (i, ai) in enumerate(a)] - - - - cases = ["Prior", "GP", "RF-scalar"] - case = cases[3] - decorrelate = true - nugget = Float64(1e-12) - - overrides = Dict( - "verbose" => true, - "scheduler" => DataMisfitController(terminate_at = 1e2), - "n_features_opt" => 150, - "n_iteration" => 10, - "cov_sample_multiplier" => 3.0, - #"localization" => SEC(0.1),#,Doesnt help much tbh - #"accelerator" => NesterovAccelerator(), +using GlobalSensitivityAnalysis +const GSA = GlobalSensitivityAnalysis +using Distributions +using DataStructures +using Random +using LinearAlgebra +import StatsBase: percentile +using JLD2 + +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers + +using CairoMakie, ColorSchemes #for plots +seed = 2589436 + +output_directory = joinpath(@__DIR__, "output") +if !isdir(output_directory) + mkdir(output_directory) +end + + +inner_func(x::AV, a::AV) where {AV <: AbstractVector} = prod((abs.(4 * x .- 2) + a) ./ (1 .+ a)) + +"G-Function taken from https://www.sfu.ca/~ssurjano/gfunc.html" +function GFunction(x::AM, a::AV) where {AM <: AbstractMatrix, AV <: AbstractVector} + @assert size(x, 1) == length(a) + return mapslices(y -> inner_func(y, a), x; dims = 1) #applys the map to columns +end + +function GFunction(x::AM) where {AM <: AbstractMatrix} + a = [(i - 1.0) / 2.0 for i in 1:size(x, 1)] + return GFunction(x, a) +end + +function main() + + rng = MersenneTwister(seed) + + n_repeats = 20# repeat exp with same data. + n_dimensions = 20 + # To create the sampling + n_data_gen = 800 + + data = + SobolData(params = OrderedDict([Pair(Symbol("x", i), Uniform(0, 1)) for i in 1:n_dimensions]), N = n_data_gen) + + # To perform global analysis, + # one must generate samples using Sobol sequence (i.e. creates more than N points) + samples = GSA.sample(data) + n_data = size(samples, 1) # [n_samples x n_dim] + println("number of sobol points: ", n_data) + # run model (example) + y = GFunction(samples')' # G is applied to columns + # perform Sobol Analysis + result = analyze(data, y) + + # plot the first 3 dimensions + plot_dim = n_dimensions >= 3 ? 3 : n_dimensions + f1 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4) + for i in 1:plot_dim + ax = Axis(f1[1, i], xlabel = "x" * string(i), ylabel = "f") + scatter!(ax, samples[:, i], y[:], color = :orange) + end + + CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).png"), f1, px_per_unit = 3) + CairoMakie.save(joinpath(output_directory, "GFunction_slices_truth_$(n_dimensions).pdf"), f1, px_per_unit = 3) + + n_train_pts = n_dimensions * 250 + ind = shuffle!(rng, Vector(1:n_data))[1:n_train_pts] + # now subsample the samples data + n_tp = length(ind) + input = zeros(n_dimensions, n_tp) + output = zeros(1, n_tp) + Γ = 1e-3 + noise = rand(rng, Normal(0, Γ), n_tp) + for i in 1:n_tp + input[:, i] = samples[ind[i], :] + output[i] = y[ind[i]] + noise[i] + end + iopairs = PairedDataContainer(input, output) + + # analytic sobol indices taken from + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8989694/pdf/main.pdf + a = [(i - 1.0) / 2.0 for i in 1:n_dimensions] # a_i < a_j => a_i more sensitive + prod_tmp = prod(1 .+ 1 ./ (3 .* (1 .+ a) .^ 2)) - 1 + V = [(1 / (3 * (1 + ai)^2)) / prod_tmp for ai in a] + prod_tmp2 = [prod(1 .+ 1 ./ (3 .* (1 .+ a[1:end .!== j]) .^ 2)) for j in 1:n_dimensions] + TV = [(1 / (3 * (1 + ai)^2)) * prod_tmp2[i] / prod_tmp for (i, ai) in enumerate(a)] + + + + cases = ["Prior", "GP", "RF-scalar"] + case = cases[3] + decorrelate = true + nugget = Float64(1e-12) + + overrides = Dict( + "verbose" => true, + "scheduler" => DataMisfitController(terminate_at = 1e2), + "n_features_opt" => 150, + "n_iteration" => 10, + "cov_sample_multiplier" => 3.0, + #"localization" => SEC(0.1),#,Doesnt help much tbh + #"accelerator" => NesterovAccelerator(), "n_ensemble" => 100, #40*n_dimensions, "n_cross_val_sets" => 4, ) @@ -147,13 +147,13 @@ emulator = Emulator(mlt, iopairs; obs_noise_cov = Γ * I, decorrelate = decorrelate) optimize_hyperparameters!(emulator) end - + if case == "RF-scalar" diag_tmp = reduce(hcat, get_optimizer(mlt)) # (n_iteration, dim_output=1) convergence for each scalar mode as cols push!(opt_diagnostics, diag_tmp) end - - @info "statistics of training time for case $(case): \n mean(s): $(mean(times[1:rep_idx])) \n var(s) : $(var(times[1:rep_idx]))" + + @info "statistics of training time for case $(case): \n mean(s): $(mean(times[1:rep_idx])) \n var(s) : $(var(times[1:rep_idx]))" # predict on all Sobol points with emulator (example) y_pred, y_var = predict(emulator, samples', transform_to_real = true) @@ -169,8 +169,8 @@ # PLotting: fontsize = 24 if rep_idx == 1 - f3 = Figure(markersize = 8,fontsize = fontsize) - ax3 = Axis(f3[1,1]) + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1]) scatter!( ax3, 1:n_dimensions, @@ -230,8 +230,8 @@ println("(5%) totalorder: ", totalorder_low) println("(95%) totalorder: ", totalorder_up) # - f3 = Figure(markersize = 8,fontsize = fontsize) - ax3 = Axis(f3[1,1]) + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1]) errorbars!( ax3, 1:n_dimensions, @@ -316,7 +316,7 @@ end - + println(" ") println("True Sobol Indices") println("******************") @@ -331,8 +331,8 @@ println("Sampled Emulated Sobol Indices (# obs $n_train_pts, noise var $Γ)") println("***************************************************************") - - + + jldsave( joinpath(output_directory, "GFunction_$(case)_$(n_dimensions).jld2"); sobol_pts = samples, diff --git a/examples/Emulator/G-function/plot_result.jl b/examples/Emulator/G-function/plot_result.jl index d66e560b..03455b87 100644 --- a/examples/Emulator/G-function/plot_result.jl +++ b/examples/Emulator/G-function/plot_result.jl @@ -10,7 +10,7 @@ function main() n_dimensions = 3 filename = joinpath(output_directory, "Gfunction_$(case)_$(n_dimensions).jld2") legend = true - + ( sobol_pts, train_idx, @@ -39,8 +39,8 @@ function main() n_repeats = length(mlt_sobol) fontsize = 24 if n_repeats == 1 - f3 = Figure(markersize = 8,fontsize = fontsize) - ax3 = Axis(f3[1,1], xticks = 1:2:n_dimensions, ylabel = "Sobol Index", xlabel="i") + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1], xticks = 1:2:n_dimensions, ylabel = "Sobol Index", xlabel = "i") scatter!( ax3, 1:n_dimensions, @@ -71,7 +71,7 @@ function main() CairoMakie.save(png_out, f3, px_per_unit = 3) CairoMakie.save(pdf_out, f3, px_per_unit = 3) @info "Plotted sensitivities, case dim = $n_dimensions, \n storing plots in: \n $png_out \n $pdf_out" - + else # get percentiles: fo_mat = zeros(n_dimensions, n_repeats) @@ -98,8 +98,8 @@ function main() println("(5%) totalorder: ", totalorder_low) println("(95%) totalorder: ", totalorder_up) # - f3 = Figure(markersize = 8,fontsize = fontsize) - ax3 = Axis(f3[1,1], xticks = 1:2:n_dimensions, ylabel = "Sobol Index", xlabel = "i") + f3 = Figure(markersize = 8, fontsize = fontsize) + ax3 = Axis(f3[1, 1], xticks = 1:2:n_dimensions, ylabel = "Sobol Index", xlabel = "i") errorbars!( ax3, 1:n_dimensions, @@ -132,11 +132,11 @@ function main() CairoMakie.save(png_out, f3, px_per_unit = 3) CairoMakie.save(pdf_out, f3, px_per_unit = 3) @info "Plotted sensitivities, case dim = $n_dimensions, \n storing plots in: \n $png_out \n $pdf_out" - + end # plots - first 3 dimensions plot_dim = n_dimensions >= 3 ? 3 : n_dimensions - f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4, fontsize=fontsize) + f2 = Figure(resolution = (1.618 * plot_dim * 300, 300), markersize = 4, fontsize = fontsize) for i in 1:plot_dim ax2 = Axis(f2[1, i], xlabel = "x" * string(i), ylabel = "f") scatter!(ax2, sobol_pts[:, i], mlt_pred_y[1][:], color = :blue) diff --git a/examples/Emulator/Ishigami/emulate.jl b/examples/Emulator/Ishigami/emulate.jl index a02443bb..45e04acd 100644 --- a/examples/Emulator/Ishigami/emulate.jl +++ b/examples/Emulator/Ishigami/emulate.jl @@ -77,7 +77,7 @@ function main() noise = rand(rng, Normal(0, Γ), n_tp) for i in 1:n_tp input[:, i] = samples[ind[i], :] - output[i] = y[ind[i]] + noise[i] + output[i] = y[ind[i]] + noise[i] end iopairs = PairedDataContainer(input, output) @@ -198,7 +198,7 @@ function main() # plots - f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize=28) + f2 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize = 28) axx_em = Axis(f2[1, 1], xlabel = "x1", ylabel = "f") axy_em = Axis(f2[1, 2], xlabel = "x2", ylabel = "f") axz_em = Axis(f2[1, 3], xlabel = "x3", ylabel = "f") diff --git a/examples/Emulator/Ishigami/plot_result.jl b/examples/Emulator/Ishigami/plot_result.jl index f6fa2257..18345977 100644 --- a/examples/Emulator/Ishigami/plot_result.jl +++ b/examples/Emulator/Ishigami/plot_result.jl @@ -4,33 +4,34 @@ using JLD2 function main() - - + + output_directory = "output" cases = ["Prior", "GP", "RF-scalar"] case = cases[2] filename = joinpath(output_directory, "results_$case.jld2") - (sobol_pts, train_idx, mlt_pred_y, mlt_sobol, analytic_sobol, true_y, noise_sample, noise_cov, estimated_sobol) = load( - filename, - "sobol_pts", - "train_idx", - "mlt_pred_y", - "mlt_sobol", - "analytic_sobol", - "true_y", - "noise_sample", - "noise_cov", - "estimated_sobol", - ) + (sobol_pts, train_idx, mlt_pred_y, mlt_sobol, analytic_sobol, true_y, noise_sample, noise_cov, estimated_sobol) = + load( + filename, + "sobol_pts", + "train_idx", + "mlt_pred_y", + "mlt_sobol", + "analytic_sobol", + "true_y", + "noise_sample", + "noise_cov", + "estimated_sobol", + ) n_data = size(sobol_pts, 1) n_train_pts = length(train_idx) n_repeats = length(mlt_sobol) (V, V1, V2, V3, VT1, VT2, VT3) = analytic_sobol - fontsize=28 + fontsize = 28 f1 = Figure(resolution = (1.618 * 900, 300), markersize = 4, fontsize = fontsize) axx = Axis(f1[1, 1], xlabel = "x1", ylabel = "f") axy = Axis(f1[1, 2], xlabel = "x2", ylabel = "f") diff --git a/examples/Emulator/L63/emulate.jl b/examples/Emulator/L63/emulate.jl index b63e3a9b..64ee4b6c 100644 --- a/examples/Emulator/L63/emulate.jl +++ b/examples/Emulator/L63/emulate.jl @@ -31,7 +31,7 @@ function main() #for later plots fontsize = 20 - wideticks= WilkinsonTicks(3, k_min=3, k_max=4) # prefer few ticks + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks # Run L63 from 0 -> tmax @@ -96,7 +96,16 @@ function main() # Emulate - cases = ["GP", "RF-prior", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep","RF-svd-sep"] + cases = [ + "GP", + "RF-prior", + "RF-scalar", + "RF-scalar-diagin", + "RF-svd-nonsep", + "RF-nosvd-nonsep", + "RF-nosvd-sep", + "RF-svd-sep", + ] case = cases[7] @@ -212,7 +221,7 @@ function main() optimize_hyperparameters!(emulator) # diagnostics - if case ∈ ["RF-svd-nonsep", "RF-nosvd-nonsep", "RF-svd-sep"]] + if case ∈ ["RF-svd-nonsep", "RF-nosvd-nonsep", "RF-svd-sep"] push!(opt_diagnostics, get_optimizer(mlt)[1]) #length-1 vec of vec -> vec end @@ -249,10 +258,10 @@ function main() if rep_idx == 1 # plotting trace - f = Figure(size = (900, 450), fontsize=fontsize) + f = Figure(size = (900, 450), fontsize = fontsize) axx = Axis(f[1, 1], ylabel = "x", yticks = wideticks) axy = Axis(f[2, 1], ylabel = "y", yticks = wideticks) - axz = Axis(f[3, 1], xlabel = "time", ylabel = "z", yticks = [10,30,50]) + axz = Axis(f[3, 1], xlabel = "time", ylabel = "z", yticks = [10, 30, 50]) xx = 0:dt:tmax_test lines!(axx, xx, u_test_tmp[1, :], color = :blue) @@ -269,7 +278,7 @@ function main() save(joinpath(output_directory, case * "_l63_test.pdf"), f, pt_per_unit = 3) # plot attractor - f3 = Figure(fontsize=fontsize) + f3 = Figure(fontsize = fontsize) lines(f3[1, 1], u_test_tmp[1, :], u_test_tmp[3, :], color = :blue) lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange) @@ -278,11 +287,11 @@ function main() save(joinpath(output_directory, case * "_l63_attr.pdf"), f3, pt_per_unit = 3) # plotting histograms - f2 = Figure(fontsize=1.25*fontsize) - axx = Axis(f2[1, 1], xlabel = "x", ylabel = "pdf", xticks=wideticks, yticklabelsvisible = false) - axy = Axis(f2[1, 2], xlabel = "y", xticks=wideticks, yticklabelsvisible = false) - axz = Axis(f2[1, 3], xlabel = "z", xticks= [10,30,50], yticklabelsvisible = false) - + f2 = Figure(fontsize = 1.25 * fontsize) + axx = Axis(f2[1, 1], xlabel = "x", ylabel = "pdf", xticks = wideticks, yticklabelsvisible = false) + axy = Axis(f2[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f2[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) + hist!(axx, u_hist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) hist!(axy, u_hist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) hist!(axz, u_hist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) @@ -336,10 +345,10 @@ function main() push!(u_cdf, u_cdf_tmp) end - f4 = Figure(size = (900, Int(floor(900 / 1.618))), fontsize = 1.5*fontsize) - axx = Axis(f4[1, 1], xlabel = "x", ylabel = "cdf", xticks=wideticks) - axy = Axis(f4[1, 2], xlabel = "y", xticks=wideticks,yticklabelsvisible = false) - axz = Axis(f4[1, 3], xlabel = "z", xticks= [10,30,50],yticklabelsvisible = false) + f4 = Figure(size = (900, Int(floor(900 / 1.618))), fontsize = 1.5 * fontsize) + axx = Axis(f4[1, 1], xlabel = "x", ylabel = "cdf", xticks = wideticks) + axy = Axis(f4[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f4[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) unif_samples = (1:size(sol_cdf, 2)) / size(sol_cdf, 2) diff --git a/examples/Emulator/L63/emulate_diff-rank-test.jl b/examples/Emulator/L63/emulate_diff-rank-test.jl index 2d8f8b3a..222147bf 100644 --- a/examples/Emulator/L63/emulate_diff-rank-test.jl +++ b/examples/Emulator/L63/emulate_diff-rank-test.jl @@ -26,14 +26,14 @@ function main() # rng rng = MersenneTwister(1232435) - n_repeats = 5 # repeat exp with same data. + n_repeats = 1# 5 # repeat exp with same data. println("run experiment $n_repeats times") rank_test = 1:9 # must be 1:k for now n_iteration = 20 #for later plots fontsize = 20 - wideticks= WilkinsonTicks(3, k_min=3, k_max=4) # prefer few ticks + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks # Run L63 from 0 -> tmax @@ -98,18 +98,27 @@ function main() # Emulate - cases = ["GP", "RF-prior", "RF-scalar", "RF-scalar-diagin", "RF-svd-nonsep", "RF-nosvd-nonsep", "RF-nosvd-sep", "RF-svd-sep"] - - case = cases[8] #5 + cases = [ + "GP", + "RF-prior", + "RF-scalar", + "RF-scalar-diagin", + "RF-svd-nonsep", + "RF-nosvd-nonsep", + "RF-nosvd-sep", + "RF-svd-sep", + ] + + case = cases[1] #5 nugget = Float64(1e-8) u_test = [] u_hist = [] - opt_diagnostics = zeros(length(rank_test),n_repeats,n_iteration) - train_err = zeros(length(rank_test),n_repeats) - test_err = zeros(length(rank_test),n_repeats) + opt_diagnostics = zeros(length(rank_test), n_repeats, n_iteration) + train_err = zeros(length(rank_test), n_repeats) + test_err = zeros(length(rank_test), n_repeats) - ttt = zeros(length(rank_test),n_repeats) + ttt = zeros(length(rank_test), n_repeats) for (rank_id, rank_val) in enumerate(rank_test) #test over different ranks for svd-nonsep @info "Test rank: $(rank_val)" for rep_idx in 1:n_repeats @@ -135,7 +144,7 @@ function main() elseif case ∈ ["RF-svd-nonsep"] kernel_structure = NonseparableKernel(LowRankFactor(rank_val, nugget)) n_features = 500 - + mlt = VectorRandomFeatureInterface( n_features, 3, @@ -143,10 +152,10 @@ function main() rng = rng, kernel_structure = kernel_structure, optimizer_options = rf_optimizer_overrides, - ) + ) elseif case ∈ ["RF-svd-sep", "RF-nosvd-sep"] - rank_out = Int(ceil(rank_val/3)) # 1 1 1 2 2 2 - rank_in = rank_val - 3*(rank_out-1) # 1 2 3 1 2 3 + rank_out = Int(ceil(rank_val / 3)) # 1 1 1 2 2 2 + rank_in = rank_val - 3 * (rank_out - 1) # 1 2 3 1 2 3 @info "Test rank in: $(rank_in) out: $(rank_out)" kernel_structure = SeparableKernel(LowRankFactor(rank_in, nugget), LowRankFactor(rank_out, nugget)) n_features = 500 @@ -160,7 +169,7 @@ function main() ) end - + #save config for RF if !(case == "GP") && (rep_idx == 1) JLD2.save( @@ -184,12 +193,12 @@ function main() emulator = Emulator(mlt, iopairs; obs_noise_cov = Γy, decorrelate = decorrelate) optimize_hyperparameters!(emulator) end - + # diagnostics if case != "GP" - opt_diagnostics[rank_id,rep_idx,:] = get_optimizer(mlt)[1] #length-1 vec of vec -> vec + opt_diagnostics[rank_id, rep_idx, :] = get_optimizer(mlt)[1] #length-1 vec of vec -> vec end - + # Predict with emulator u_test_tmp = zeros(3, length(xspan_test)) u_test_tmp[:, 1] = sol_test.u[1] @@ -206,38 +215,43 @@ function main() train_mean, _ = predict(emulator, input[:, i:i], transform_to_real = true) # 3x1 train_err_tmp[1] += norm(train_mean - output[:, i]) end - train_err[rank_id,rep_idx] = 1 / size(input, 2) * train_err_tmp[1] + train_err[rank_id, rep_idx] = 1 / size(input, 2) * train_err_tmp[1] # test error i -> o test_err_tmp = [0.0] for i in 1:(length(xspan_test) - 1) - test_mean, _ = predict(emulator, reshape(sol_test.u[i],:,1), transform_to_real = true) # 3x1 matrix - test_err_tmp[1] += norm(test_mean[:] - sol_test.u[i+1]) + test_mean, _ = predict(emulator, reshape(sol_test.u[i], :, 1), transform_to_real = true) # 3x1 matrix + test_err_tmp[1] += norm(test_mean[:] - sol_test.u[i + 1]) end - test_err[rank_id,rep_idx] = 1/(length(xspan_test) - 1) * test_err_tmp[1] + test_err[rank_id, rep_idx] = 1 / (length(xspan_test) - 1) * test_err_tmp[1] println("normalized L^2 error on training data:", 1 / size(input, 2) * train_err_tmp[1]) - + println("normalized L^2 error on test data:", 1 / (length(xspan_test) - 1) * test_err_tmp[1]) + u_hist_tmp = zeros(3, length(xspan_hist)) u_hist_tmp[:, 1] = sol_hist.u[1] # start at end of previous sim - + for i in 1:(length(xspan_hist) - 1) rf_mean, _ = predict(emulator, u_hist_tmp[:, i:i], transform_to_real = true) # 3x1 matrix u_hist_tmp[:, i + 1] = rf_mean end - + push!(u_test, u_test_tmp) push!(u_hist, u_hist_tmp) - + JLD2.save( - joinpath(output_directory, case * "_l63_rank_test_results.jld2"), - "rank_test", collect(rank_test), - "timings", ttt, - "train_err", train_err, - "test_err", test_err, - ) + joinpath(output_directory, case * "_l63_rank_test_results.jld2"), + "rank_test", + collect(rank_test), + "timings", + ttt, + "train_err", + train_err, + "test_err", + test_err, + ) end - + end # save data @@ -247,18 +261,18 @@ function main() # plot eki convergence plot if length(opt_diagnostics) > 0 - - err_cols = mean(opt_diagnostics, dims=2)[:,1,:]' # average error over repeats for each rank as columns + + err_cols = mean(opt_diagnostics, dims = 2)[:, 1, :]' # average error over repeats for each rank as columns err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 - plot_x = collect(1:size(err_cols,1)) + plot_x = collect(1:size(err_cols, 1)) #save error_filepath = joinpath(output_directory, "eki_diff-rank-test_conv_error.jld2") save(error_filepath, "error", err_cols, "trajectory_error", train_err) f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4) ax_conv = Axis(f5[1, 1], xlabel = "Iteration", ylabel = "max-normalized error", yscale = log10) - for (rank_id,rank_val) in enumerate(rank_test) - lines!(ax_conv, plot_x, err_normalized[:,rank_id], label = "rank $(rank_val)") + for (rank_id, rank_val) in enumerate(rank_test) + lines!(ax_conv, plot_x, err_normalized[:, rank_id], label = "rank $(rank_val)") end axislegend(ax_conv) save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).png"), f5, px_per_unit = 3) @@ -266,9 +280,9 @@ function main() end - - - + + + end main() diff --git a/examples/Emulator/L63/plot_results-diff-rank-test.jl b/examples/Emulator/L63/plot_results-diff-rank-test.jl index 23027940..dfe7d0f6 100644 --- a/examples/Emulator/L63/plot_results-diff-rank-test.jl +++ b/examples/Emulator/L63/plot_results-diff-rank-test.jl @@ -6,75 +6,100 @@ using JLD2 function main() # filepaths output_directory = joinpath(@__DIR__, "output") -# case = "RF-svd-nonsep" + # case = "RF-svd-nonsep" case = "RF-svd-sep" rank_test = 1:9 # must be 1:k for now @info "plotting case $case for rank test $(rank_test)" #for later plots fontsize = 18 - wideticks= WilkinsonTicks(3, k_min=3, k_max=4) # prefer few ticks + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks error_filepath = joinpath(output_directory, "eki_diff-rank-test_conv_error.jld2") error_data = JLD2.load(error_filepath) - err_cols = error_data["error"] # exps are columns - - rank_test_filepath = joinpath(output_directory, case*"_l63_rank_test_results.jld2") + err_cols = error_data["error"] # exps are columns + + rank_test_filepath = joinpath(output_directory, case * "_l63_rank_test_results.jld2") rank_test_data = JLD2.load(rank_test_filepath) train_err = rank_test_data["train_err"] test_err = rank_test_data["test_err"] # Plot convergences err_normalized = (err_cols' ./ err_cols[1, :])' # divide each series by the max, so all errors start at 1 - n_iterations = size(err_cols,1) + n_iterations = size(err_cols, 1) plot_x = collect(1:n_iterations) - - f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4, fontsize=fontsize) - ax_conv = Axis(f5[1, 1], xlabel = "Iteration", ylabel = "max-normalized error", yscale = log10, xticks = 2:2:n_iterations, yticks= ([10^-4,10^-2,10^0, 10^2], [L"10^{-4}",L"10^{-2}", L"10^0",L"10^2"])) + + f5 = Figure(resolution = (1.618 * 300, 300), markersize = 4, fontsize = fontsize) + ax_conv = Axis( + f5[1, 1], + xlabel = "Iteration", + ylabel = "max-normalized error", + yscale = log10, + xticks = 2:2:n_iterations, + yticks = ([10^-4, 10^-2, 10^0, 10^2], [L"10^{-4}", L"10^{-2}", L"10^0", L"10^2"]), + ) plot_to = 10 - for (rank_id,rank_val) in enumerate(rank_test) - lines!(ax_conv, plot_x[1:plot_to], err_normalized[1:plot_to,rank_id], label = "rank $(rank_val)", color=(:blue, 0.5*rank_val/10)) + for (rank_id, rank_val) in enumerate(rank_test) + lines!( + ax_conv, + plot_x[1:plot_to], + err_normalized[1:plot_to, rank_id], + label = "rank $(rank_val)", + color = (:blue, 0.5 * rank_val / 10), + ) end - + #axislegend(ax_conv) save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).png"), f5, px_per_unit = 3) - save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).pdf"), f5, px_per_unit = 3) + save(joinpath(output_directory, "l63_diff-rank-test_eki-conv_$(case).pdf"), f5, px_per_unit = 3) # if including GP - either load or just put in average here: - gp_train_test_err=[0.0154,0.00292] - - f6 = Figure(resolution = (1.618 * 300, 300), markersize = 4, fontsize=fontsize) + gp_train_test_err = [0.0154, 0.00292] + + f6 = Figure(resolution = (1.618 * 300, 300), markersize = 4, fontsize = fontsize) if case == "RF-svd-nonsep" - ax_error = Axis(f6[1, 1], xlabel = "rank", xticks=collect(rank_test), ylabel = "L²-error in trajectory", yscale=log10 ) - mean_train_err = mean(train_err[1:length(rank_test),:],dims=2) - mean_test_err = mean(test_err[1:length(rank_test),:],dims=2) - lines!(ax_error, rank_test, mean_train_err[:], label="RF-train", color=:blue, linestyle=:dash) - lines!(ax_error, rank_test, mean_test_err[:], label="RF-test", color=:blue) - hlines!(ax_error, [gp_train_test_err[1]], label="GP-train", color = :orange, linestyle=:dash) - hlines!(ax_error, [gp_train_test_err[2]], label="GP-test", color = :orange, linestyle=:solid) - + ax_error = Axis( + f6[1, 1], + xlabel = "rank", + xticks = collect(rank_test), + ylabel = "L²-error in trajectory", + yscale = log10, + ) + mean_train_err = mean(train_err[1:length(rank_test), :], dims = 2) + mean_test_err = mean(test_err[1:length(rank_test), :], dims = 2) + lines!(ax_error, rank_test, mean_train_err[:], label = "RF-train", color = :blue, linestyle = :dash) + lines!(ax_error, rank_test, mean_test_err[:], label = "RF-test", color = :blue) + hlines!(ax_error, [gp_train_test_err[1]], label = "GP-train", color = :orange, linestyle = :dash) + hlines!(ax_error, [gp_train_test_err[2]], label = "GP-test", color = :orange, linestyle = :solid) + axislegend(ax_error) elseif case == "RF-svd-sep" rank_labels = [] for rank_val in collect(rank_test) - rank_out = Int(ceil(rank_val/3)) # 1 1 1 2 2 2 - rank_in = rank_val - 3*(rank_out-1) # 1 2 3 1 2 3 + rank_out = Int(ceil(rank_val / 3)) # 1 1 1 2 2 2 + rank_in = rank_val - 3 * (rank_out - 1) # 1 2 3 1 2 3 push!(rank_labels, (rank_in, rank_out)) end - idx = [1,4,7,2,5,8,3,6,9] - - ax_error = Axis(f6[1, 1], xlabel = "rank (in,out)", xticks=(rank_test,["$(rank_labels[i])" for i in idx]), ylabel = "L²-error in trajectory", yscale=log10 ) - mean_train_err = mean(train_err[idx,:],dims=2) - mean_test_err = mean(test_err[idx,:],dims=2) - lines!(ax_error, rank_test, mean_train_err[:], label="RF-train", color=:blue, linestyle=:dash) - lines!(ax_error, rank_test, mean_test_err[:], label="RF-test", color=:blue) - hlines!(ax_error, [gp_train_test_err[1]], label="GP-train", color = :orange, linestyle=:dash) - hlines!(ax_error, [gp_train_test_err[2]], label="GP-test", color = :orange, linestyle=:solid) + idx = [1, 4, 7, 2, 5, 8, 3, 6, 9] + + ax_error = Axis( + f6[1, 1], + xlabel = "rank (in,out)", + xticks = (rank_test, ["$(rank_labels[i])" for i in idx]), + ylabel = "L²-error in trajectory", + yscale = log10, + ) + mean_train_err = mean(train_err[idx, :], dims = 2) + mean_test_err = mean(test_err[idx, :], dims = 2) + lines!(ax_error, rank_test, mean_train_err[:], label = "RF-train", color = :blue, linestyle = :dash) + lines!(ax_error, rank_test, mean_test_err[:], label = "RF-test", color = :blue) + hlines!(ax_error, [gp_train_test_err[1]], label = "GP-train", color = :orange, linestyle = :dash) + hlines!(ax_error, [gp_train_test_err[2]], label = "GP-test", color = :orange, linestyle = :solid) axislegend(ax_error) else throw(ArgumentError("case $(case) not found, check for typos")) end - + save(joinpath(output_directory, "l63_diff-rank-test_train_err_$(case).png"), f6, px_per_unit = 3) save(joinpath(output_directory, "l63_diff-rank-test_train_err_$(case).pdf"), f6, px_per_unit = 3) end diff --git a/examples/Emulator/L63/plot_results.jl b/examples/Emulator/L63/plot_results.jl index 49253b5c..923eccf6 100644 --- a/examples/Emulator/L63/plot_results.jl +++ b/examples/Emulator/L63/plot_results.jl @@ -11,8 +11,8 @@ function main() @info "plotting case $case" #for later plots fontsize = 20 - wideticks= WilkinsonTicks(3, k_min=3, k_max=4) # prefer few ticks - + wideticks = WilkinsonTicks(3, k_min = 3, k_max = 4) # prefer few ticks + # Load from saved files #= config_file = JLD2.load(joinpath(output_directory, case * "_l63_config.jld2")) @@ -37,11 +37,11 @@ function main() tmax_test = 20 #100 xx = 0.0:dt:tmax_test - f = Figure(size = (900, 450), fontsize=fontsize) + f = Figure(size = (900, 450), fontsize = fontsize) axx = Axis(f[1, 1], ylabel = "x", yticks = wideticks) axy = Axis(f[2, 1], ylabel = "y", yticks = wideticks) - axz = Axis(f[3, 1], xlabel = "time", ylabel = "z", yticks = [10,30,50]) - + axz = Axis(f[3, 1], xlabel = "time", ylabel = "z", yticks = [10, 30, 50]) + tt = 1:length(xx) lines!(axx, xx, uplot_tmp[1, tt], color = :blue) lines!(axy, xx, uplot_tmp[2, tt], color = :blue) @@ -56,9 +56,9 @@ function main() save(joinpath(output_directory, case * "_l63_test.png"), f, px_per_unit = 3) save(joinpath(output_directory, case * "_l63_test.pdf"), f, pt_per_unit = 3) @info "plotted trajectory in \n $(case)_l63_test.png, and $(case)_l63_test.pdf" - + # plot attractor - f3 = Figure(fontsize=fontsize) + f3 = Figure(fontsize = fontsize) lines(f3[1, 1], uplot_tmp[1, :], uplot_tmp[3, :], color = :blue) lines(f3[2, 1], solplot[1, :], solplot[3, :], color = :orange) @@ -66,13 +66,13 @@ function main() save(joinpath(output_directory, case * "_l63_attr.png"), f3, px_per_unit = 3) save(joinpath(output_directory, case * "_l63_attr.pdf"), f3, pt_per_unit = 3) @info "plotted attractor in \n $(case)_l63_attr.png, and $(case)_l63_attr.pdf" - + # plotting histograms - f2 = Figure(fontsize=1.25*fontsize) - axx = Axis(f2[1, 1], xlabel = "x", ylabel = "pdf", xticks=wideticks, yticklabelsvisible = false) - axy = Axis(f2[1, 2], xlabel = "y", xticks=wideticks, yticklabelsvisible = false) - axz = Axis(f2[1, 3], xlabel = "z", xticks= [10,30,50], yticklabelsvisible = false) - + f2 = Figure(fontsize = 1.25 * fontsize) + axx = Axis(f2[1, 1], xlabel = "x", ylabel = "pdf", xticks = wideticks, yticklabelsvisible = false) + axy = Axis(f2[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f2[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) + hist!(axx, uhist_tmp[1, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) hist!(axy, uhist_tmp[2, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) hist!(axz, uhist_tmp[3, :], bins = 50, normalization = :pdf, color = (:blue, 0.5)) @@ -95,15 +95,15 @@ function main() ucdf_tmp = sort(u, dims = 2) push!(ucdf, ucdf_tmp) end - - f4 = Figure(size = (900, Int(floor(900 / 1.618))), fontsize = 1.5*fontsize) - axx = Axis(f4[1, 1], xlabel = "x", ylabel = "cdf", xticks=wideticks) - axy = Axis(f4[1, 2], xlabel = "y", xticks=wideticks,yticklabelsvisible = false) - axz = Axis(f4[1, 3], xlabel = "z", xticks= [10,30,50],yticklabelsvisible = false) - + + f4 = Figure(size = (900, Int(floor(900 / 1.618))), fontsize = 1.5 * fontsize) + axx = Axis(f4[1, 1], xlabel = "x", ylabel = "cdf", xticks = wideticks) + axy = Axis(f4[1, 2], xlabel = "y", xticks = wideticks, yticklabelsvisible = false) + axz = Axis(f4[1, 3], xlabel = "z", xticks = [10, 30, 50], yticklabelsvisible = false) + unif_samples = (1:size(solcdf, 2)) / size(solcdf, 2) - + for u in ucdf lines!(axx, u[1, :], unif_samples, color = (:blue, 0.2), linewidth = 4) lines!(axy, u[2, :], unif_samples, color = (:blue, 0.2), linewidth = 4) @@ -113,7 +113,7 @@ function main() lines!(axx, solcdf[1, :], unif_samples, color = (:orange, 1.0), linewidth = 4) lines!(axy, solcdf[2, :], unif_samples, color = (:orange, 1.0), linewidth = 4) lines!(axz, solcdf[3, :], unif_samples, color = (:orange, 1.0), linewidth = 4) - + # save save(joinpath(output_directory, case * "_l63_cdfs.png"), f4, px_per_unit = 3) save(joinpath(output_directory, case * "_l63_cdfs.pdf"), f4, pt_per_unit = 3) diff --git a/examples/Emulator/Regression_2d_2d/compare_regression.jl b/examples/Emulator/Regression_2d_2d/compare_regression.jl index c02b0f17..df9282d3 100644 --- a/examples/Emulator/Regression_2d_2d/compare_regression.jl +++ b/examples/Emulator/Regression_2d_2d/compare_regression.jl @@ -57,8 +57,8 @@ function main() d = 2 # output dim X = 2.0 * π * rand(p, n) # G(x1, x2) - g1(x) = sin.(x[1, :]) .+ 2*cos.(2*x[2, :]) - g2(x) = 3*sin.(3*x[1, :]) .- 4*cos.(4*x[2, :]) + g1(x) = sin.(x[1, :]) .+ 2 * cos.(2 * x[2, :]) + g2(x) = 3 * sin.(3 * x[1, :]) .- 4 * cos.(4 * x[2, :]) g1x = g1(X) g2x = g2(X) gx = zeros(2, n) @@ -147,7 +147,8 @@ function main() # common random feature setup n_features = 300 - optimizer_options = Dict("n_iteration" => 20, "n_features_opt" => 100, "n_ensemble" => 80, "cov_sample_multiplier"=>1.0) + optimizer_options = + Dict("n_iteration" => 20, "n_features_opt" => 100, "n_ensemble" => 80, "cov_sample_multiplier" => 1.0) nugget = 1e-12 diff --git a/src/RandomFeature.jl b/src/RandomFeature.jl index fc65746e..a9e22970 100644 --- a/src/RandomFeature.jl +++ b/src/RandomFeature.jl @@ -461,7 +461,7 @@ function calculate_mean_cov_and_coeffs( input_dim = size(itrain, 1) output_dim = size(otrain, 1) n_test = size(itest, 2) - + # build and fit the RF rfm = RFM_from_hyperparameters( rfi, @@ -625,7 +625,7 @@ function estimate_mean_and_coeffnorm_covariance( output_dim = size(get_outputs(io_pairs), 1) n_test = length(test_idx) - + means = zeros(output_dim, n_samples, n_test) mean_of_covs = zeros(output_dim, output_dim, n_test) moc_tmp = similar(mean_of_covs) @@ -733,7 +733,7 @@ function calculate_ensemble_mean_and_coeffnorm( N_ens = size(lmat, 2) output_dim = size(get_outputs(io_pairs), 1) n_test = length(test_idx) - + means = zeros(output_dim, N_ens, n_test) mean_of_covs = zeros(output_dim, output_dim, n_test) buffer = zeros(n_test, output_dim, n_features) diff --git a/src/RandomFeature_crossval.jl b/src/RandomFeature_crossval.jl deleted file mode 100644 index fc65746e..00000000 --- a/src/RandomFeature_crossval.jl +++ /dev/null @@ -1,1024 +0,0 @@ - -using RandomFeatures -const RF = RandomFeatures -using EnsembleKalmanProcesses -const EKP = EnsembleKalmanProcesses -using EnsembleKalmanProcesses.Localizers -using ..ParameterDistributions -using ..Utilities -using StableRNGs -using Random - -export calculate_n_hyperparameters, hyperparameters_from_flat, build_default_prior, cov_structure_from_string -export CovarianceStructureType, OneDimFactor, DiagonalFactor, CholeskyFactor, LowRankFactor, HierarchicalLowRankFactor -export SeparableKernel, NonseparableKernel -export get_input_cov_structure, get_output_cov_structure, get_cov_structure -export get_eps -export rank -export shrinkage_cov, nice_cov - - -abstract type RandomFeatureInterface <: MachineLearningTool end - -# Different types of threading -abstract type MultithreadType end -struct TullioThreading <: MultithreadType end -struct EnsembleThreading <: MultithreadType end - - -# Different types of covariance representation in the prior - -abstract type CovarianceStructureType end - -""" -$(DocStringExtensions.TYPEDEF) - -covariance structure for a one-dimensional space -""" -struct OneDimFactor <: CovarianceStructureType end - -""" -$(DocStringExtensions.TYPEDEF) - -builds a diagonal covariance structure -""" -struct DiagonalFactor{FT <: AbstractFloat} <: CovarianceStructureType - "nugget term" - eps::FT -end -DiagonalFactor() = DiagonalFactor(Float64(1.0)) -get_eps(df::DiagonalFactor) = df.eps - -""" -$(DocStringExtensions.TYPEDEF) - -builds a general positive-definite covariance structure -""" -struct CholeskyFactor{FT <: AbstractFloat} <: CovarianceStructureType - "nugget term" - eps::FT -end -CholeskyFactor() = CholeskyFactor(Float64(1.0)) -get_eps(cf::CholeskyFactor) = cf.eps - -""" -$(DocStringExtensions.TYPEDEF) - -builds a covariance structure that deviates from the identity with a low-rank perturbation. This perturbation is diagonalized in the low-rank space -""" -struct LowRankFactor{FT <: AbstractFloat} <: CovarianceStructureType - "rank of perturbation" - rank::Int - "nugget term" - eps::FT -end -LowRankFactor(r::Int) = LowRankFactor(r, Float64(1.0)) -get_eps(lrf::LowRankFactor) = lrf.eps - - -""" -$(DocStringExtensions.TYPEDEF) - -builds a covariance structure that deviates from the identity with a more general low-rank perturbation -""" -struct HierarchicalLowRankFactor{FT <: AbstractFloat} <: CovarianceStructureType - "rank of perturbation" - rank::Int - "nugget term" - eps::FT -end - -HierarchicalLowRankFactor(r::Int) = HierarchicalLowRankFactor(r, Float64(1.0)) -get_eps(hlrf::HierarchicalLowRankFactor) = hlrf.eps - -import LinearAlgebra: rank -rank(lrf::LowRankFactor) = lrf.rank -rank(hlrf::HierarchicalLowRankFactor) = hlrf.rank - -# To add a new cov type `T` one must define these 3 methods: -# hyperparameters_from_flat(x::V, d::Int, t::T) where {V <: AbstractVector} -# calculate_n_hyperparameters(d::Int, t::T) -# build_default_prior(name::SS, d::Int, t::T) where {SS <: AbstractString} -# and add a string id to cov_structure_from_string - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -creates some simple covariance structures from strings: ["onedim", "diagonal", "cholesky", "lowrank", hierlowrank"]. See the covariance Structures for more details. -""" -function cov_structure_from_string(s::S, d::Int) where {S <: AbstractString} - if s == "onedim" - return OneDimFactor() - elseif s == "diagonal" - return DiagonalFactor() - elseif s == "cholesky" - return CholeskyFactor() - elseif s == "lowrank" - return LowRankFactor(Int(ceil(sqrt(d)))) - elseif s == "hierlowrank" - return HierarchicalLowRankFactor(Int(ceil(sqrt(d)))) - else - throw( - ArgumentError( - "Recieved unknown `input_cov_structure` keyword $(s), \n please choose from [\"onedim\",\"diagonal\",\"cholesky\", \"lowrank\"] or provide a CovarianceStructureType", - ), - ) - end -end -cov_structure_from_string(cst::CST, args...; kwargs...) where {CST <: CovarianceStructureType} = cst - - -# Different types of kernel -abstract type KernelStructureType end - -""" -$(DocStringExtensions.TYPEDEF) - -Builds a separable kernel, i.e. one that accounts for input and output covariance structure separately -""" -struct SeparableKernel{CST1 <: CovarianceStructureType, CST2 <: CovarianceStructureType} <: KernelStructureType - input_cov_structure::CST1 - output_cov_structure::CST2 -end -get_input_cov_structure(kernel_structure::SeparableKernel) = kernel_structure.input_cov_structure -get_output_cov_structure(kernel_structure::SeparableKernel) = kernel_structure.output_cov_structure - - -""" -$(DocStringExtensions.TYPEDEF) - -Builds a nonseparable kernel, i.e. one that accounts for a joint input and output covariance structure -""" -struct NonseparableKernel{CST <: CovarianceStructureType} <: KernelStructureType - cov_structure::CST -end -get_cov_structure(kernel_structure::NonseparableKernel) = kernel_structure.cov_structure - - -# calculate_n_hyperparameters - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -calculates the number of hyperparameters generated by the choice of covariance structure -""" -calculate_n_hyperparameters(d::Int, odf::OneDimFactor) = 0 -calculate_n_hyperparameters(d::Int, df::DiagonalFactor) = d -calculate_n_hyperparameters(d::Int, cf::CholeskyFactor) = Int(d * (d + 1) / 2) -calculate_n_hyperparameters(d::Int, lrf::LowRankFactor) = Int(rank(lrf) + d * rank(lrf)) -calculate_n_hyperparameters(d::Int, hlrf::HierarchicalLowRankFactor) = - Int(rank(hlrf) * (rank(hlrf) + 1) / 2 + d * rank(hlrf)) - -# build from flat - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -reshapes a list of hyperparameters into a covariance matrix based on the selected structure -""" -hyperparameters_from_flat(x::V, odf::OneDimFactor) where {V <: AbstractVector} = nothing - -function hyperparameters_from_flat(x::V, df::DiagonalFactor) where {V <: AbstractVector} - return convert(Matrix, Diagonal(x) + get_eps(df) * I) -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Converts a flat array into a cholesky factor. -""" -function flat_to_chol(x::V) where {V <: AbstractVector} - choldim = Int(floor(sqrt(2 * length(x)))) - cholmat = zeros(choldim, choldim) - for i in 1:choldim - for j in 1:i - cholmat[i, j] = x[sum(0:(i - 1)) + j] - end - end - return cholmat -end - -function hyperparameters_from_flat(x::V, cf::CholeskyFactor) where {V <: AbstractVector} - chol = flat_to_chol(x) - return chol * permutedims(chol, (2, 1)) + get_eps(cf) * I - -end - -function hyperparameters_from_flat(x::V, lrf::LowRankFactor) where {V <: AbstractVector} - - r = rank(lrf) - d = Int(length(x) / r - 1) # l = r + d*r - - D = Diagonal(x[1:r]) # D acts like sing.vals.^2 - U = reshape(x[(r + 1):end], d, r) - qrU = qr(U) # qrU.Q is semi-orthogonal in first r columns, i.e. Q^T.Q = I - return qrU.Q[:, 1:r] * D * qrU.Q[:, 1:r]' + get_eps(lrf) * I - -end - -function hyperparameters_from_flat(x::V, hlrf::HierarchicalLowRankFactor) where {V <: AbstractVector} - r = rank(hlrf) - d = Int(length(x) / r - (r + 1) / 2) # l = r^2 + d*r - - # Ambikasaran, O'Neil, Singh 2016 - L = flat_to_chol(x[1:Int(r * (r + 1) / 2)]) - U = reshape(x[Int(r * (r + 1) / 2 + 1):end], d, r) - K = L * permutedims(L, (2, 1)) + get_eps(hlrf) * I - - qrU = qr(U) - svdT = svd(I + qrU.R * K * qrU.R') # square so T = Q S Q^t - M = svdT.V * Diagonal(sqrt.(svdT.S)) # M M^t = Q sqrt(S) (Q sqrt(S))^t = Q sqrt(S) sqrt(S) Q^t = T - X = M - I - update_factor = I + qrU.Q * X * qrU.Q' - - return update_factor * update_factor' -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -builds a prior distribution for the kernel hyperparameters to initialize optimization. -""" -build_default_prior(name::SS, n_hp::Int, odf::OneDimFactor) where {SS <: AbstractString} = nothing - -function build_default_prior(name::SS, n_hp::Int, df::DiagonalFactor) where {SS <: AbstractString} - return ParameterDistribution( - Dict( - "name" => "$(name)_diagonal", - "distribution" => VectorOfParameterized(repeat([Normal(0, 3)], n_hp)), - "constraint" => repeat([bounded_below(0.0)], n_hp), - ), - ) -end - -function build_default_prior(name::SS, n_hp::Int, df::CholeskyFactor) where {SS <: AbstractString} - return constrained_gaussian("$(name)_cholesky", 0.0, 5.0, -Inf, Inf, repeats = n_hp) -end - -function build_default_prior(name::SS, n_hp::Int, lrf::LowRankFactor) where {SS <: AbstractString} - r = rank(lrf) - d = Int(n_hp / r - 1) - D = ParameterDistribution( - Dict( - "name" => "$(name)_lowrank_diagonal", - "distribution" => VectorOfParameterized(repeat([Normal(0, 3)], r)), - "constraint" => repeat([bounded_below(0.0)], r), - ), - ) - U = constrained_gaussian("$(name)_lowrank_U", 0.0, 100.0, -Inf, Inf, repeats = Int(d * r)) - return combine_distributions([D, U]) -end - -function build_default_prior(name::SS, n_hp::Int, hlrf::HierarchicalLowRankFactor) where {SS <: AbstractString} - r = rank(hlrf) - d = Int(n_hp / r - (r + 1) / 2) - L = constrained_gaussian("$(name)_lowrank_Kchol", 0.0, 5.0, -Inf, Inf, repeats = Int(r * (r + 1) / 2)) - U = constrained_gaussian("$(name)_lowrank_U", 0.0, 100.0, -Inf, Inf, repeats = Int(d * r)) - return combine_distributions([L, U]) -end - -# combining input and output spaces: -function calculate_n_hyperparameters( - input_dim::Int, - output_dim::Int, - kernel_structure::SK, -) where {SK <: SeparableKernel} - - input_cov_structure = get_input_cov_structure(kernel_structure) - n_hp_in = calculate_n_hyperparameters(input_dim, input_cov_structure) - output_cov_structure = get_output_cov_structure(kernel_structure) - n_hp_out = calculate_n_hyperparameters(output_dim, output_cov_structure) - n_scalings = 1 - - # if both in and output dim are "OneDimFactors" we still learn 1 hp - if n_hp_in + n_hp_out == 0 - return 1 + n_scalings - end - - return n_hp_in + n_hp_out + n_scalings -end - -function calculate_n_hyperparameters(input_dim::Int, kernel_structure::KST) where {KST <: KernelStructureType} - output_dim = 1 - return calculate_n_hyperparameters(input_dim, output_dim, kernel_structure) -end - -function calculate_n_hyperparameters( - input_dim::Int, - output_dim::Int, - kernel_structure::NK, -) where {NK <: NonseparableKernel} - cov_structure = get_cov_structure(kernel_structure) - n_hp = calculate_n_hyperparameters(input_dim * output_dim, cov_structure) - n_scalings = 1 - # if both in and output dim are "OneDimFactors" we still learn 1 hp - if n_hp == 0 - return 1 + n_scalings - end - - return n_hp + n_scalings -end - - - - -function build_default_prior(input_dim::Int, output_dim::Int, kernel_structure::SK) where {SK <: SeparableKernel} - input_cov_structure = get_input_cov_structure(kernel_structure) - output_cov_structure = get_output_cov_structure(kernel_structure) - - n_hp_in = calculate_n_hyperparameters(input_dim, input_cov_structure) - input_prior = build_default_prior("input", n_hp_in, input_cov_structure) - n_hp_out = calculate_n_hyperparameters(output_dim, output_cov_structure) - output_prior = build_default_prior("output", n_hp_out, output_cov_structure) - - scaling_kern = constrained_gaussian("sigma", 1, 5, 0, Inf) - - # We only use OneDimFactor values, default to input if both in and output dimension are one dimensional.Otherwise they are ignored - if isnothing(input_prior) && isnothing(output_prior) - onedim_prior = constrained_gaussian("onedim", 1.0, 1.0, 0.0, Inf) - return combine_distributions([onedim_prior, scaling_kern]) - elseif isnothing(input_prior) - return combine_distributions([output_prior, scaling_kern]) - elseif isnothing(output_prior) - return combine_distributions([input_prior, scaling_kern]) - else - return combine_distributions([input_prior, output_prior, scaling_kern]) - end -end - -function build_default_prior(input_dim::Int, kernel_structure::KST) where {KST <: KernelStructureType} - output_dim = 1 - return build_default_prior(input_dim, output_dim, kernel_structure) -end - -function build_default_prior(input_dim::Int, output_dim::Int, kernel_structure::NK) where {NK <: NonseparableKernel} - cov_structure = get_cov_structure(kernel_structure) - n_hp = calculate_n_hyperparameters(input_dim * output_dim, cov_structure) - if n_hp == 0 - pd_kern = constrained_gaussian("onedim", 1.0, 1.0, 0.0, Inf) - else - pd_kern = build_default_prior("full", n_hp, cov_structure) - end - - scaling_kern = constrained_gaussian("sigma", 1, 5, 0, Inf) - return combine_distributions([pd_kern, scaling_kern]) -end - -function hyperparameters_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - input_cov_structure = get_input_cov_structure(kernel_structure) - output_cov_structure = get_output_cov_structure(kernel_structure) - - n_hp_in = calculate_n_hyperparameters(input_dim, input_cov_structure) - U = hyperparameters_from_flat(x[1:n_hp_in], input_cov_structure) - V = hyperparameters_from_flat(x[(n_hp_in + 1):end], output_cov_structure) - - if isnothing(U) && isnothing(V) #i.e. both are from OneDimFactor cov structures - return x[1] * ones(1, 1), V - elseif isnothing(U) - return U, 0.5 * (V + permutedims(V, (2, 1))) - elseif isnothing(V) - return 0.5 * (U + permutedims(U, (2, 1))), V - else - # symmetrize (sometimes there are numerical artifacts) - U = 0.5 * (U + permutedims(U, (2, 1))) - V = 0.5 * (V + permutedims(V, (2, 1))) - return U, V - end -end - -function hyperparameters_from_flat( - x::VV, - input_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - output_dim = 1 - U, V = hyperparameters_from_flat(x, input_dim, output_dim, kernel_structure) - - return U -end - -function hyperparameters_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::NK, -) where {VV <: AbstractVector, NK <: NonseparableKernel} - cov_structure = get_cov_structure(kernel_structure) - C = hyperparameters_from_flat(x, cov_structure) - if isnothing(C) - return x[1] * ones(1, 1) - else - return 0.5 * (C + permutedims(C, (2, 1))) - end - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Calculates key quantities (mean, covariance, and coefficients) of the objective function to be optimized for hyperparameter training. Buffers: mean_store, cov_store, buffer are provided to aid memory management. -""" -function calculate_mean_cov_and_coeffs( - rfi::RFI, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - train_idx::VV, - test_idx::VV, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - decomp_type::S, - mean_store::M, - cov_store::A, - buffer::A, - multithread_type::MT, -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - VV <: AbstractVector, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, - M <: AbstractMatrix{<:AbstractFloat}, - A <: AbstractArray{<:AbstractFloat, 3}, - MT <: MultithreadType, -} - - # split data into train/test - itrain = get_inputs(io_pairs)[:, train_idx] - otrain = get_outputs(io_pairs)[:, train_idx] - io_train_cost = PairedDataContainer(itrain, otrain) - itest = get_inputs(io_pairs)[:, test_idx] - otest = get_outputs(io_pairs)[:, test_idx] - input_dim = size(itrain, 1) - output_dim = size(otrain, 1) - n_test = size(itest, 2) - - # build and fit the RF - rfm = RFM_from_hyperparameters( - rfi, - rng, - l, - regularization, - n_features, - batch_sizes, - input_dim, - output_dim, - multithread_type, - ) - fitted_features = RF.Methods.fit(rfm, io_train_cost, decomposition_type = decomp_type) - - #we want to calc 1/var(y-mean)^2 + lambda/m * coeffs^2 in the end - thread_opt = isa(multithread_type, TullioThreading) - RF.Methods.predict!( - rfm, - fitted_features, - DataContainer(itest), - mean_store, - cov_store, - buffer, - tullio_threading = thread_opt, - ) - - # sizes (output_dim x n_test), (output_dim x output_dim x n_test) - - ## TODO - the theory states that the following should be set: - scaled_coeffs = sqrt(1 / (n_features)) * RF.Methods.get_coeffs(fitted_features) - - #scaled_coeffs = 1e-3 * rand(n_features)#overwrite with noise... - # However the convergence is much improved with setting this to zero: - #scaled_coeffs = 0 - if decomp_type == "cholesky" - chol_fac = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).L - - complexity = 2 * sum(log(chol_fac[i, i]) for i in 1:size(chol_fac, 1)) - else - svd_singval = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).S - complexity = sum(log, svd_singval) # note this is log(abs(det)) - end - complexity = sqrt(complexity) - return scaled_coeffs, complexity - -end - - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Calculate the empirical covariance, additionally applying a shrinkage operator (here the Ledoit Wolf 2004 shrinkage operation). Known to have better stability properties than Monte-Carlo for low sample sizes -""" -function shrinkage_cov(sample_mat::AA) where {AA <: AbstractMatrix} - n_out, n_sample = size(sample_mat) - - # de-mean (as we will use the samples directly for calculation of β) - sample_mat_zeromean = sample_mat .- mean(sample_mat, dims = 2) - # Ledoit Wolf shrinkage to I - - # get sample covariance - Γ = cov(sample_mat_zeromean, dims = 2) - # estimate opt shrinkage - μ_shrink = 1 / n_out * tr(Γ) - δ_shrink = norm(Γ - μ_shrink * I)^2 / n_out # (scaled) frob norm of Γ_m - #once de-meaning, we need to correct the sample covariance with an n_sample -> n_sample-1 - β_shrink = sum([norm(c * c' - -Γ)^2 / n_out for c in eachcol(sample_mat_zeromean)]) / (n_sample - 1)^2 - - γ_shrink = min(β_shrink / δ_shrink, 1) # clipping is typically rare - # γμI + (1-γ)Γ - Γ .*= (1 - γ_shrink) - for i in 1:n_out - Γ[i, i] += γ_shrink * μ_shrink - end - - @info "Shrinkage scale: $(γ_shrink), (0 = none, 1 = revert to scaled Identity)\n shrinkage covariance condition number: $(cond(Γ))" - return Γ -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Calculate the empirical covariance, additionally applying the Noise Informed Covariance Estimator (NICE) Vishnay et al. 2024. -""" -function nice_cov(sample_mat::AA, n_samples = 400, δ::FT = 1.0) where {AA <: AbstractMatrix, FT <: Real} - - n_sample_cov = size(sample_mat, 2) - Γ = cov(sample_mat, dims = 2) - - bd_tol = 1e8 * eps() - - v = sqrt.(diag(Γ)) - V = Diagonal(v) #stds - V_inv = inv(V) - corr = clamp.(V_inv * Γ * V_inv, -1 + bd_tol, 1 - bd_tol) # full corr - - # parameter sweep over the exponents - max_exponent = 2 * 5 # must be even - interp_steps = 100 - # use find the variability in the corr coeff matrix entries - std_corrs = approximate_corr_std.(corr, n_sample_cov, n_samples) # Found in EKP.Localizers !! slowest part of code -> could speed up by precomputing an interpolation of [-1,1] - - std_tol = sqrt(sum(std_corrs .^ 2)) - α_min_exceeded = [max_exponent] - for α in 2:2:max_exponent # even exponents give a PSD - corr_psd = corr .^ (α + 1) # abs not needed as α even - # find the first exponent that exceeds the noise tolerance in norm - if norm(corr_psd - corr) > δ * std_tol - α_min_exceeded[1] = α - break - end - end - corr_psd = corr .^ α_min_exceeded[1] - corr_psd_prev = corr .^ (α_min_exceeded[1] - 2) # previous PSD correction - - for α in LinRange(1.0, 0.0, interp_steps) - corr_interp = ((1 - α) * (corr_psd_prev) + α * corr_psd) .* corr - if norm(corr_interp - corr) < δ * std_tol - corr[:, :] = corr_interp #update the correlation matrix block - break - end - end - out = posdef_correct(V * corr * V) # rebuild the cov matrix - @info "NICE-adjusted covariance condition number: $(cond(out))" - return out - -end - - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Estimation of objective function (marginal log-likelihood) variability for optimization of hyperparameters. Calculated empirically by calling many samples of the objective at a "reasonable" value. `multithread_type` is used as dispatch, either `ensemble` (thread over the empirical samples) or `tullio` (serial samples, and thread the linear algebra within each sample) -""" -function estimate_mean_and_coeffnorm_covariance( - rfi::RFI, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - train_idx::VV, - test_idx::VV, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - n_samples::Int, - decomp_type::S, - multithread_type::TullioThreading; - repeats::Int = 1, - cov_correction = "shrinkage", -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - VV <: AbstractVector, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - - output_dim = size(get_outputs(io_pairs), 1) - n_test = length(test_idx) - - means = zeros(output_dim, n_samples, n_test) - mean_of_covs = zeros(output_dim, output_dim, n_test) - moc_tmp = similar(mean_of_covs) - mtmp = zeros(output_dim, n_test) - buffer = zeros(n_test, output_dim, n_features) - complexity = zeros(1, n_samples) - coeffl2norm = zeros(1, n_samples) - println("estimate cov with " * string(n_samples) * " iterations...") - - for i in ProgressBar(1:n_samples) - for j in 1:repeats - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rng, - l, - regularization, - n_features, - train_idx, - test_idx, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - # cplxty 1 - - # update vbles needed for cov - means[:, i, :] .+= mtmp ./ repeats - coeffl2norm[1, i] += sqrt(sum(abs2, c)) / repeats - complexity[1, i] += cplxty / repeats - - # update vbles needed for mean - @. mean_of_covs += moc_tmp / (repeats * n_samples) - - end - end - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - - approx_σ2 = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, n_samples) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2. - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - sample_mat = vcat(blockmeans, coeffl2norm, complexity) - - if cov_correction == "shrinkage" - Γ = shrinkage_cov(sample_mat) - elseif cov_correction == "nice" - Γ = nice_cov(sample_mat) - else - Γ = cov(sample_mat, dims = 2) - end - - if !isposdef(approx_σ2) - println("approx_σ2 not posdef") - approx_σ2 = posdef_correct(approx_σ2) - end - - return Γ, approx_σ2 - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Evaluate an objective function (marginal log-likelihood) over an ensemble of hyperparameter values (used for EKI). `multithread_type` is used as dispatch, either `ensemble` (thread over the ensemble members) or `tullio` (serial samples, and thread the linear algebra within each sample) -""" -function calculate_ensemble_mean_and_coeffnorm( - rfi::RFI, - rng::RNG, - lvecormat::VorM, - regularization::MorUSorD, - n_features::Int, - train_idx::VV, - test_idx::VV, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - decomp_type::S, - multithread_type::TullioThreading; - repeats::Int = 1, -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - VorM <: AbstractVecOrMat, - VV <: AbstractVector, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - if isa(lvecormat, AbstractVector) - lmat = reshape(lvecormat, 1, :) - else - lmat = lvecormat - end - N_ens = size(lmat, 2) - output_dim = size(get_outputs(io_pairs), 1) - n_test = length(test_idx) - - means = zeros(output_dim, N_ens, n_test) - mean_of_covs = zeros(output_dim, output_dim, n_test) - buffer = zeros(n_test, output_dim, n_features) - complexity = zeros(1, N_ens) - coeffl2norm = zeros(1, N_ens) - moc_tmp = similar(mean_of_covs) - mtmp = zeros(output_dim, n_test) - - println("calculating " * string(N_ens) * " ensemble members...") - - for i in ProgressBar(1:N_ens) - for j in collect(1:repeats) - l = lmat[:, i] - - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rng, - l, - regularization, - n_features, - train_idx, - test_idx, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - means[:, i, :] += mtmp ./ repeats - @. mean_of_covs += moc_tmp / (repeats * N_ens) - coeffl2norm[1, i] += sqrt(sum(c .^ 2)) / repeats - complexity[1, i] += cplxty / repeats - end - end - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - blockcovmat = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, N_ens) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - blockcovmat[id, id] = mean_of_covs[i, :, :] - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - if !isposdef(blockcovmat) - println("blockcovmat not posdef") - blockcovmat = posdef_correct(blockcovmat) - end - - return vcat(blockmeans, coeffl2norm, complexity), blockcovmat - -end - - -function estimate_mean_and_coeffnorm_covariance( - rfi::RFI, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - train_idx::VV, - test_idx::VV, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - n_samples::Int, - decomp_type::S, - multithread_type::EnsembleThreading; - repeats::Int = 1, - cov_correction = "shrinkage", -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - VV <: AbstractVector, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - - output_dim = size(get_outputs(io_pairs), 1) - n_test = length(test_idx) - println("estimate cov with " * string(n_samples) * " iterations...") - - nthreads = Threads.nthreads() - rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 - rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads] - - c_list = [zeros(1, n_samples) for i in 1:nthreads] - cp_list = [zeros(1, n_samples) for i in 1:nthreads] - m_list = [zeros(output_dim, n_samples, n_test) for i in 1:nthreads] - moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads] - buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads] - - - Threads.@threads for i in ProgressBar(1:n_samples) - tid = Threads.threadid() - rngtmp = rng_list[tid] - mtmp = mtmp_list[tid] - moc_tmp = moc_tmp_list[tid] - buffer = buffer_list[tid] - for j in 1:repeats - - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rngtmp, - l, - regularization, - n_features, - train_idx, - test_idx, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - - - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - # cplxty 1 - - # update vbles needed for cov - # update vbles needed for cov - m_list[tid][:, i, :] += mtmp ./ repeats - c_list[tid][1, i] += sqrt(sum(abs2, c)) / repeats - cp_list[tid][1, i] += cplxty / repeats - - # update vbles needed for mean - @. moc_list[tid] += moc_tmp ./ (repeats * n_samples) - - end - end - - #put back together after threading - mean_of_covs = sum(moc_list) - means = sum(m_list) - coeffl2norm = sum(c_list) - complexity = sum(cp_list) - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - - approx_σ2 = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, n_samples) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2. - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - - sample_mat = vcat(blockmeans, coeffl2norm, complexity) - - if cov_correction == "shrinkage" - Γ = shrinkage_cov(sample_mat) - elseif cov_correction == "nice" - Γ = nice_cov(sample_mat) - else - Γ = cov(sample_mat, dims = 2) - end - - if !isposdef(approx_σ2) - println("approx_σ2 not posdef") - approx_σ2 = posdef_correct(approx_σ2) - end - - return Γ, approx_σ2 - -end - - - - -function calculate_ensemble_mean_and_coeffnorm( - rfi::RFI, - rng::RNG, - lvecormat::VorM, - regularization::MorUSorD, - n_features::Int, - train_idx::VV, - test_idx::VV, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - decomp_type::S, - multithread_type::EnsembleThreading; - repeats::Int = 1, -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - VorM <: AbstractVecOrMat, - VV <: AbstractVector, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - if isa(lvecormat, AbstractVector) - lmat = reshape(lvecormat, 1, :) - else - lmat = lvecormat - end - N_ens = size(lmat, 2) - output_dim = size(get_outputs(io_pairs), 1) - n_test = length(test_idx) - - - println("calculating " * string(N_ens) * " ensemble members...") - - nthreads = Threads.nthreads() - c_list = [zeros(1, N_ens) for i in 1:nthreads] - cp_list = [zeros(1, N_ens) for i in 1:nthreads] - m_list = [zeros(output_dim, N_ens, n_test) for i in 1:nthreads] - moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads] - moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads] - rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 - rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads] - Threads.@threads for i in ProgressBar(1:N_ens) - tid = Threads.threadid() - rngtmp = rng_list[tid] - mtmp = mtmp_list[tid] - moc_tmp = moc_tmp_list[tid] - buffer = buffer_list[tid] - l = lmat[:, i] - for j in collect(1:repeats) - - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rngtmp, - l, - regularization, - n_features, - train_idx, - test_idx, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - m_list[tid][:, i, :] += mtmp ./ repeats - @. moc_list[tid] += moc_tmp ./ (repeats * N_ens) - c_list[tid][1, i] += sqrt(sum(c .^ 2)) / repeats - cp_list[tid][1, i] += cplxty / repeats - - end - end - - # put back together after threading - mean_of_covs = sum(moc_list) - means = sum(m_list) - coeffl2norm = sum(c_list) - complexity = sum(cp_list) - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - blockcovmat = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, N_ens) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - blockcovmat[id, id] = mean_of_covs[i, :, :] - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - if !isposdef(blockcovmat) - println("blockcovmat not posdef") - blockcovmat = posdef_correct(blockcovmat) - end - - return vcat(blockmeans, coeffl2norm, complexity), blockcovmat - -end - -include("ScalarRandomFeature.jl") -include("VectorRandomFeature.jl") diff --git a/src/RandomFeature_nodatasplit.jl b/src/RandomFeature_nodatasplit.jl deleted file mode 100644 index c14e0a64..00000000 --- a/src/RandomFeature_nodatasplit.jl +++ /dev/null @@ -1,1018 +0,0 @@ - -using RandomFeatures -const RF = RandomFeatures -using EnsembleKalmanProcesses -const EKP = EnsembleKalmanProcesses -using EnsembleKalmanProcesses.Localizers -using ..ParameterDistributions -using ..Utilities -using StableRNGs -using Random - -export calculate_n_hyperparameters, hyperparameters_from_flat, build_default_prior, cov_structure_from_string -export CovarianceStructureType, OneDimFactor, DiagonalFactor, CholeskyFactor, LowRankFactor, HierarchicalLowRankFactor -export SeparableKernel, NonseparableKernel -export get_input_cov_structure, get_output_cov_structure, get_cov_structure -export get_eps -export rank -export shrinkage_cov, nice_cov - - -abstract type RandomFeatureInterface <: MachineLearningTool end - -# Different types of threading -abstract type MultithreadType end -struct TullioThreading <: MultithreadType end -struct EnsembleThreading <: MultithreadType end - - -# Different types of covariance representation in the prior - -abstract type CovarianceStructureType end - -""" -$(DocStringExtensions.TYPEDEF) - -covariance structure for a one-dimensional space -""" -struct OneDimFactor <: CovarianceStructureType end - -""" -$(DocStringExtensions.TYPEDEF) - -builds a diagonal covariance structure -""" -struct DiagonalFactor{FT <: AbstractFloat} <: CovarianceStructureType - "nugget term" - eps::FT -end -DiagonalFactor() = DiagonalFactor(Float64(1.0)) -get_eps(df::DiagonalFactor) = df.eps - -""" -$(DocStringExtensions.TYPEDEF) - -builds a general positive-definite covariance structure -""" -struct CholeskyFactor{FT <: AbstractFloat} <: CovarianceStructureType - "nugget term" - eps::FT -end -CholeskyFactor() = CholeskyFactor(Float64(1.0)) -get_eps(cf::CholeskyFactor) = cf.eps - -""" -$(DocStringExtensions.TYPEDEF) - -builds a covariance structure that deviates from the identity with a low-rank perturbation. This perturbation is diagonalized in the low-rank space -""" -struct LowRankFactor{FT <: AbstractFloat} <: CovarianceStructureType - "rank of perturbation" - rank::Int - "nugget term" - eps::FT -end -LowRankFactor(r::Int) = LowRankFactor(r, Float64(1.0)) -get_eps(lrf::LowRankFactor) = lrf.eps - - -""" -$(DocStringExtensions.TYPEDEF) - -builds a covariance structure that deviates from the identity with a more general low-rank perturbation -""" -struct HierarchicalLowRankFactor{FT <: AbstractFloat} <: CovarianceStructureType - "rank of perturbation" - rank::Int - "nugget term" - eps::FT -end - -HierarchicalLowRankFactor(r::Int) = HierarchicalLowRankFactor(r, Float64(1.0)) -get_eps(hlrf::HierarchicalLowRankFactor) = hlrf.eps - -import LinearAlgebra: rank -rank(lrf::LowRankFactor) = lrf.rank -rank(hlrf::HierarchicalLowRankFactor) = hlrf.rank - -# To add a new cov type `T` one must define these 3 methods: -# hyperparameters_from_flat(x::V, d::Int, t::T) where {V <: AbstractVector} -# calculate_n_hyperparameters(d::Int, t::T) -# build_default_prior(name::SS, d::Int, t::T) where {SS <: AbstractString} -# and add a string id to cov_structure_from_string - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -creates some simple covariance structures from strings: ["onedim", "diagonal", "cholesky", "lowrank", hierlowrank"]. See the covariance Structures for more details. -""" -function cov_structure_from_string(s::S, d::Int) where {S <: AbstractString} - if s == "onedim" - return OneDimFactor() - elseif s == "diagonal" - return DiagonalFactor() - elseif s == "cholesky" - return CholeskyFactor() - elseif s == "lowrank" - return LowRankFactor(Int(ceil(sqrt(d)))) - elseif s == "hierlowrank" - return HierarchicalLowRankFactor(Int(ceil(sqrt(d)))) - else - throw( - ArgumentError( - "Recieved unknown `input_cov_structure` keyword $(s), \n please choose from [\"onedim\",\"diagonal\",\"cholesky\", \"lowrank\"] or provide a CovarianceStructureType", - ), - ) - end -end -cov_structure_from_string(cst::CST, args...; kwargs...) where {CST <: CovarianceStructureType} = cst - - -# Different types of kernel -abstract type KernelStructureType end - -""" -$(DocStringExtensions.TYPEDEF) - -Builds a separable kernel, i.e. one that accounts for input and output covariance structure separately -""" -struct SeparableKernel{CST1 <: CovarianceStructureType, CST2 <: CovarianceStructureType} <: KernelStructureType - input_cov_structure::CST1 - output_cov_structure::CST2 -end -get_input_cov_structure(kernel_structure::SeparableKernel) = kernel_structure.input_cov_structure -get_output_cov_structure(kernel_structure::SeparableKernel) = kernel_structure.output_cov_structure - - -""" -$(DocStringExtensions.TYPEDEF) - -Builds a nonseparable kernel, i.e. one that accounts for a joint input and output covariance structure -""" -struct NonseparableKernel{CST <: CovarianceStructureType} <: KernelStructureType - cov_structure::CST -end -get_cov_structure(kernel_structure::NonseparableKernel) = kernel_structure.cov_structure - - -# calculate_n_hyperparameters - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -calculates the number of hyperparameters generated by the choice of covariance structure -""" -calculate_n_hyperparameters(d::Int, odf::OneDimFactor) = 0 -calculate_n_hyperparameters(d::Int, df::DiagonalFactor) = d -calculate_n_hyperparameters(d::Int, cf::CholeskyFactor) = Int(d * (d + 1) / 2) -calculate_n_hyperparameters(d::Int, lrf::LowRankFactor) = Int(rank(lrf) + d * rank(lrf)) -calculate_n_hyperparameters(d::Int, hlrf::HierarchicalLowRankFactor) = - Int(rank(hlrf) * (rank(hlrf) + 1) / 2 + d * rank(hlrf)) - -# build from flat - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -reshapes a list of hyperparameters into a covariance matrix based on the selected structure -""" -hyperparameters_from_flat(x::V, odf::OneDimFactor) where {V <: AbstractVector} = nothing - -function hyperparameters_from_flat(x::V, df::DiagonalFactor) where {V <: AbstractVector} - return convert(Matrix, Diagonal(x) + get_eps(df) * I) -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Converts a flat array into a cholesky factor. -""" -function flat_to_chol(x::V) where {V <: AbstractVector} - choldim = Int(floor(sqrt(2 * length(x)))) - cholmat = zeros(choldim, choldim) - for i in 1:choldim - for j in 1:i - cholmat[i, j] = x[sum(0:(i - 1)) + j] - end - end - return cholmat -end - -function hyperparameters_from_flat(x::V, cf::CholeskyFactor) where {V <: AbstractVector} - chol = flat_to_chol(x) - return chol * permutedims(chol, (2, 1)) + get_eps(cf) * I - -end - -function hyperparameters_from_flat(x::V, lrf::LowRankFactor) where {V <: AbstractVector} - - r = rank(lrf) - d = Int(length(x) / r - 1) # l = r + d*r - - D = Diagonal(x[1:r]) # D acts like sing.vals.^2 - U = reshape(x[(r + 1):end], d, r) - qrU = qr(U) # qrU.Q is semi-orthogonal in first r columns, i.e. Q^T.Q = I - return qrU.Q[:, 1:r] * D * qrU.Q[:, 1:r]' + get_eps(lrf) * I - -end - -function hyperparameters_from_flat(x::V, hlrf::HierarchicalLowRankFactor) where {V <: AbstractVector} - r = rank(hlrf) - d = Int(length(x) / r - (r + 1) / 2) # l = r^2 + d*r - - # Ambikasaran, O'Neil, Singh 2016 - L = flat_to_chol(x[1:Int(r * (r + 1) / 2)]) - U = reshape(x[Int(r * (r + 1) / 2 + 1):end], d, r) - K = L * permutedims(L, (2, 1)) + get_eps(hlrf) * I - - qrU = qr(U) - svdT = svd(I + qrU.R * K * qrU.R') # square so T = Q S Q^t - M = svdT.V * Diagonal(sqrt.(svdT.S)) # M M^t = Q sqrt(S) (Q sqrt(S))^t = Q sqrt(S) sqrt(S) Q^t = T - X = M - I - update_factor = I + qrU.Q * X * qrU.Q' - - return update_factor * update_factor' -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -builds a prior distribution for the kernel hyperparameters to initialize optimization. -""" -build_default_prior(name::SS, n_hp::Int, odf::OneDimFactor) where {SS <: AbstractString} = nothing - -function build_default_prior(name::SS, n_hp::Int, df::DiagonalFactor) where {SS <: AbstractString} - return ParameterDistribution( - Dict( - "name" => "$(name)_diagonal", - "distribution" => VectorOfParameterized(repeat([Normal(0, 3)], n_hp)), - "constraint" => repeat([bounded_below(0.0)], n_hp), - ), - ) -end - -function build_default_prior(name::SS, n_hp::Int, df::CholeskyFactor) where {SS <: AbstractString} - return constrained_gaussian("$(name)_cholesky", 0.0, 5.0, -Inf, Inf, repeats = n_hp) -end - -function build_default_prior(name::SS, n_hp::Int, lrf::LowRankFactor) where {SS <: AbstractString} - r = rank(lrf) - d = Int(n_hp / r - 1) - D = ParameterDistribution( - Dict( - "name" => "$(name)_lowrank_diagonal", - "distribution" => VectorOfParameterized(repeat([Normal(0, 3)], r)), - "constraint" => repeat([bounded_below(0.0)], r), - ), - ) - U = constrained_gaussian("$(name)_lowrank_U", 0.0, 100.0, -Inf, Inf, repeats = Int(d * r)) - return combine_distributions([D, U]) -end - -function build_default_prior(name::SS, n_hp::Int, hlrf::HierarchicalLowRankFactor) where {SS <: AbstractString} - r = rank(hlrf) - d = Int(n_hp / r - (r + 1) / 2) - L = constrained_gaussian("$(name)_lowrank_Kchol", 0.0, 5.0, -Inf, Inf, repeats = Int(r * (r + 1) / 2)) - U = constrained_gaussian("$(name)_lowrank_U", 0.0, 100.0, -Inf, Inf, repeats = Int(d * r)) - return combine_distributions([L, U]) -end - -# combining input and output spaces: -function calculate_n_hyperparameters( - input_dim::Int, - output_dim::Int, - kernel_structure::SK, -) where {SK <: SeparableKernel} - - input_cov_structure = get_input_cov_structure(kernel_structure) - n_hp_in = calculate_n_hyperparameters(input_dim, input_cov_structure) - output_cov_structure = get_output_cov_structure(kernel_structure) - n_hp_out = calculate_n_hyperparameters(output_dim, output_cov_structure) - n_scalings = 1 - - # if both in and output dim are "OneDimFactors" we still learn 1 hp - if n_hp_in + n_hp_out == 0 - return 1 + n_scalings - end - - return n_hp_in + n_hp_out + n_scalings -end - -function calculate_n_hyperparameters(input_dim::Int, kernel_structure::KST) where {KST <: KernelStructureType} - output_dim = 1 - return calculate_n_hyperparameters(input_dim, output_dim, kernel_structure) -end - -function calculate_n_hyperparameters( - input_dim::Int, - output_dim::Int, - kernel_structure::NK, -) where {NK <: NonseparableKernel} - cov_structure = get_cov_structure(kernel_structure) - n_hp = calculate_n_hyperparameters(input_dim * output_dim, cov_structure) - n_scalings = 1 - # if both in and output dim are "OneDimFactors" we still learn 1 hp - if n_hp == 0 - return 1 + n_scalings - end - - return n_hp + n_scalings -end - - - - -function build_default_prior(input_dim::Int, output_dim::Int, kernel_structure::SK) where {SK <: SeparableKernel} - input_cov_structure = get_input_cov_structure(kernel_structure) - output_cov_structure = get_output_cov_structure(kernel_structure) - - n_hp_in = calculate_n_hyperparameters(input_dim, input_cov_structure) - input_prior = build_default_prior("input", n_hp_in, input_cov_structure) - n_hp_out = calculate_n_hyperparameters(output_dim, output_cov_structure) - output_prior = build_default_prior("output", n_hp_out, output_cov_structure) - - scaling_kern = constrained_gaussian("sigma", 1, 5, 0, Inf) - - # We only use OneDimFactor values, default to input if both in and output dimension are one dimensional.Otherwise they are ignored - if isnothing(input_prior) && isnothing(output_prior) - onedim_prior = constrained_gaussian("onedim", 1.0, 1.0, 0.0, Inf) - return combine_distributions([onedim_prior, scaling_kern]) - elseif isnothing(input_prior) - return combine_distributions([output_prior, scaling_kern]) - elseif isnothing(output_prior) - return combine_distributions([input_prior, scaling_kern]) - else - return combine_distributions([input_prior, output_prior, scaling_kern]) - end -end - -function build_default_prior(input_dim::Int, kernel_structure::KST) where {KST <: KernelStructureType} - output_dim = 1 - return build_default_prior(input_dim, output_dim, kernel_structure) -end - -function build_default_prior(input_dim::Int, output_dim::Int, kernel_structure::NK) where {NK <: NonseparableKernel} - cov_structure = get_cov_structure(kernel_structure) - n_hp = calculate_n_hyperparameters(input_dim * output_dim, cov_structure) - if n_hp == 0 - pd_kern = constrained_gaussian("onedim", 1.0, 1.0, 0.0, Inf) - else - pd_kern = build_default_prior("full", n_hp, cov_structure) - end - - scaling_kern = constrained_gaussian("sigma", 1, 5, 0, Inf) - return combine_distributions([pd_kern, scaling_kern]) -end - -function hyperparameters_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - input_cov_structure = get_input_cov_structure(kernel_structure) - output_cov_structure = get_output_cov_structure(kernel_structure) - - n_hp_in = calculate_n_hyperparameters(input_dim, input_cov_structure) - U = hyperparameters_from_flat(x[1:n_hp_in], input_cov_structure) - V = hyperparameters_from_flat(x[(n_hp_in + 1):end], output_cov_structure) - - if isnothing(U) && isnothing(V) #i.e. both are from OneDimFactor cov structures - return x[1] * ones(1, 1), V - elseif isnothing(U) - return U, 0.5 * (V + permutedims(V, (2, 1))) - elseif isnothing(V) - return 0.5 * (U + permutedims(U, (2, 1))), V - else - # symmetrize (sometimes there are numerical artifacts) - U = 0.5 * (U + permutedims(U, (2, 1))) - V = 0.5 * (V + permutedims(V, (2, 1))) - return U, V - end -end - -function hyperparameters_from_flat( - x::VV, - input_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - output_dim = 1 - U, V = hyperparameters_from_flat(x, input_dim, output_dim, kernel_structure) - - return U -end - -function hyperparameters_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::NK, -) where {VV <: AbstractVector, NK <: NonseparableKernel} - cov_structure = get_cov_structure(kernel_structure) - C = hyperparameters_from_flat(x, cov_structure) - if isnothing(C) - return x[1] * ones(1, 1) - else - return 0.5 * (C + permutedims(C, (2, 1))) - end - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Calculates key quantities (mean, covariance, and coefficients) of the objective function to be optimized for hyperparameter training. Buffers: mean_store, cov_store, buffer are provided to aid memory management. -""" -function calculate_mean_cov_and_coeffs( - rfi::RFI, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - n_train::Int, - n_test::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - decomp_type::S, - mean_store::M, - cov_store::A, - buffer::A, - multithread_type::MT, -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, - M <: AbstractMatrix{<:AbstractFloat}, - A <: AbstractArray{<:AbstractFloat, 3}, - MT <: MultithreadType, -} - - # split data into train/test - itrain = get_inputs(io_pairs) - otrain = get_outputs(io_pairs) - io_train_cost = PairedDataContainer(itrain, otrain) -# itest = get_inputs(io_pairs)[:, (n_train + 1):end] -# otest = get_outputs(io_pairs)[:, (n_train + 1):end] - itest = get_inputs(io_pairs) - otest = get_outputs(io_pairs) - input_dim = size(itrain, 1) - output_dim = size(otrain, 1) - n_test = size(itest, 2) - # build and fit the RF - rfm = RFM_from_hyperparameters( - rfi, - rng, - l, - regularization, - n_features, - batch_sizes, - input_dim, - output_dim, - multithread_type, - ) - fitted_features = RF.Methods.fit(rfm, io_train_cost, decomposition_type = decomp_type) - - #we want to calc 1/var(y-mean)^2 + lambda/m * coeffs^2 in the end - thread_opt = isa(multithread_type, TullioThreading) - RF.Methods.predict!( - rfm, - fitted_features, - DataContainer(itest), - mean_store, - cov_store, - buffer, - tullio_threading = thread_opt, - ) - - # sizes (output_dim x n_test), (output_dim x output_dim x n_test) - - ## TODO - the theory states that the following should be set: - scaled_coeffs = sqrt(1 / (n_features)) * RF.Methods.get_coeffs(fitted_features) - - #scaled_coeffs = 1e-3 * rand(n_features)#overwrite with noise... - # However the convergence is much improved with setting this to zero: - #scaled_coeffs = 0 - if decomp_type == "cholesky" - chol_fac = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).L - - complexity = 2 * sum(log(chol_fac[i, i]) for i in 1:size(chol_fac, 1)) - else - svd_singval = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).S - complexity = sum(log, svd_singval) # note this is log(abs(det)) - end - complexity = sqrt(complexity) - return scaled_coeffs, complexity - -end - - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Calculate the empirical covariance, additionally applying a shrinkage operator (here the Ledoit Wolf 2004 shrinkage operation). Known to have better stability properties than Monte-Carlo for low sample sizes -""" -function shrinkage_cov(sample_mat::AA) where {AA <: AbstractMatrix} - n_out, n_sample = size(sample_mat) - - # de-mean (as we will use the samples directly for calculation of β) - sample_mat_zeromean = sample_mat .- mean(sample_mat, dims = 2) - # Ledoit Wolf shrinkage to I - - # get sample covariance - Γ = cov(sample_mat_zeromean, dims = 2) - # estimate opt shrinkage - μ_shrink = 1 / n_out * tr(Γ) - δ_shrink = norm(Γ - μ_shrink * I)^2 / n_out # (scaled) frob norm of Γ_m - #once de-meaning, we need to correct the sample covariance with an n_sample -> n_sample-1 - β_shrink = sum([norm(c * c' - -Γ)^2 / n_out for c in eachcol(sample_mat_zeromean)]) / (n_sample - 1)^2 - - γ_shrink = min(β_shrink / δ_shrink, 1) # clipping is typically rare - # γμI + (1-γ)Γ - Γ .*= (1 - γ_shrink) - for i in 1:n_out - Γ[i, i] += γ_shrink * μ_shrink - end - - @info "Shrinkage scale: $(γ_shrink), (0 = none, 1 = revert to scaled Identity)\n shrinkage covariance condition number: $(cond(Γ))" - return Γ -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Calculate the empirical covariance, additionally applying the Noise Informed Covariance Estimator (NICE) Vishnay et al. 2024. -""" -function nice_cov(sample_mat::AA, n_samples = 400, δ::FT = 1.0) where {AA <: AbstractMatrix, FT <: Real} - - n_sample_cov = size(sample_mat, 2) - Γ = cov(sample_mat, dims = 2) - - bd_tol = 1e8 * eps() - - v = sqrt.(diag(Γ)) - V = Diagonal(v) #stds - V_inv = inv(V) - corr = clamp.(V_inv * Γ * V_inv, -1 + bd_tol, 1 - bd_tol) # full corr - - # parameter sweep over the exponents - max_exponent = 2 * 5 # must be even - interp_steps = 100 - # use find the variability in the corr coeff matrix entries - std_corrs = approximate_corr_std.(corr, n_sample_cov, n_samples) # Found in EKP.Localizers !! slowest part of code -> could speed up by precomputing an interpolation of [-1,1] - - std_tol = sqrt(sum(std_corrs .^ 2)) - α_min_exceeded = [max_exponent] - for α in 2:2:max_exponent # even exponents give a PSD - corr_psd = corr .^ (α + 1) # abs not needed as α even - # find the first exponent that exceeds the noise tolerance in norm - if norm(corr_psd - corr) > δ * std_tol - α_min_exceeded[1] = α - break - end - end - corr_psd = corr .^ α_min_exceeded[1] - corr_psd_prev = corr .^ (α_min_exceeded[1] - 2) # previous PSD correction - - for α in LinRange(1.0, 0.0, interp_steps) - corr_interp = ((1 - α) * (corr_psd_prev) + α * corr_psd) .* corr - if norm(corr_interp - corr) < δ * std_tol - corr[:, :] = corr_interp #update the correlation matrix block - break - end - end - out = posdef_correct(V * corr * V) # rebuild the cov matrix - @info "NICE-adjusted covariance condition number: $(cond(out))" - return out - -end - - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Estimation of objective function (marginal log-likelihood) variability for optimization of hyperparameters. Calculated empirically by calling many samples of the objective at a "reasonable" value. `multithread_type` is used as dispatch, either `ensemble` (thread over the empirical samples) or `tullio` (serial samples, and thread the linear algebra within each sample) -""" -function estimate_mean_and_coeffnorm_covariance( - rfi::RFI, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - n_train::Int, - n_test::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - n_samples::Int, - decomp_type::S, - multithread_type::TullioThreading; - repeats::Int = 1, - cov_correction = "shrinkage", -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - - output_dim = size(get_outputs(io_pairs), 1) - - means = zeros(output_dim, n_samples, n_test) - mean_of_covs = zeros(output_dim, output_dim, n_test) - moc_tmp = similar(mean_of_covs) - mtmp = zeros(output_dim, n_test) - buffer = zeros(n_test, output_dim, n_features) - complexity = zeros(1, n_samples) - coeffl2norm = zeros(1, n_samples) - println("estimate cov with " * string(n_samples * repeats) * " iterations...") - - for i in ProgressBar(1:n_samples) - for j in 1:repeats - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rng, - l, - regularization, - n_features, - n_train, - n_test, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - # cplxty 1 - - # update vbles needed for cov - means[:, i, :] .+= mtmp ./ repeats - coeffl2norm[1, i] += sqrt(sum(abs2, c)) / repeats - complexity[1, i] += cplxty / repeats - - # update vbles needed for mean - @. mean_of_covs += moc_tmp / (repeats * n_samples) - - end - end - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - - approx_σ2 = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, n_samples) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2. - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - sample_mat = vcat(blockmeans, coeffl2norm, complexity) - - if cov_correction == "shrinkage" - Γ = shrinkage_cov(sample_mat) - elseif cov_correction == "nice" - Γ = nice_cov(sample_mat) - else - Γ = cov(sample_mat, dims = 2) - end - - if !isposdef(approx_σ2) - println("approx_σ2 not posdef") - approx_σ2 = posdef_correct(approx_σ2) - end - - return Γ, approx_σ2 - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Evaluate an objective function (marginal log-likelihood) over an ensemble of hyperparameter values (used for EKI). `multithread_type` is used as dispatch, either `ensemble` (thread over the ensemble members) or `tullio` (serial samples, and thread the linear algebra within each sample) -""" -function calculate_ensemble_mean_and_coeffnorm( - rfi::RFI, - rng::RNG, - lvecormat::VorM, - regularization::MorUSorD, - n_features::Int, - n_train::Int, - n_test::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - decomp_type::S, - multithread_type::TullioThreading; - repeats::Int = 1, -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - VorM <: AbstractVecOrMat, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - if isa(lvecormat, AbstractVector) - lmat = reshape(lvecormat, 1, :) - else - lmat = lvecormat - end - N_ens = size(lmat, 2) - output_dim = size(get_outputs(io_pairs), 1) - - means = zeros(output_dim, N_ens, n_test) - mean_of_covs = zeros(output_dim, output_dim, n_test) - buffer = zeros(n_test, output_dim, n_features) - complexity = zeros(1, N_ens) - coeffl2norm = zeros(1, N_ens) - moc_tmp = similar(mean_of_covs) - mtmp = zeros(output_dim, n_test) - - println("calculating " * string(N_ens * repeats) * " ensemble members...") - - for i in ProgressBar(1:N_ens) - for j in collect(1:repeats) - l = lmat[:, i] - - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rng, - l, - regularization, - n_features, - n_train, - n_test, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - means[:, i, :] += mtmp ./ repeats - @. mean_of_covs += moc_tmp / (repeats * N_ens) - coeffl2norm[1, i] += sqrt(sum(c .^ 2)) / repeats - complexity[1, i] += cplxty / repeats - end - end - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - blockcovmat = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, N_ens) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - blockcovmat[id, id] = mean_of_covs[i, :, :] - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - if !isposdef(blockcovmat) - println("blockcovmat not posdef") - blockcovmat = posdef_correct(blockcovmat) - end - - return vcat(blockmeans, coeffl2norm, complexity), blockcovmat - -end - - -function estimate_mean_and_coeffnorm_covariance( - rfi::RFI, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - n_train::Int, - n_test::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - n_samples::Int, - decomp_type::S, - multithread_type::EnsembleThreading; - repeats::Int = 1, - cov_correction = "shrinkage", -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - - output_dim = size(get_outputs(io_pairs), 1) - - println("estimate cov with " * string(n_samples * repeats) * " iterations...") - - nthreads = Threads.nthreads() - rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 - rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads] - - c_list = [zeros(1, n_samples) for i in 1:nthreads] - cp_list = [zeros(1, n_samples) for i in 1:nthreads] - m_list = [zeros(output_dim, n_samples, n_test) for i in 1:nthreads] - moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads] - buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads] - - - Threads.@threads for i in ProgressBar(1:n_samples) - tid = Threads.threadid() - rngtmp = rng_list[tid] - mtmp = mtmp_list[tid] - moc_tmp = moc_tmp_list[tid] - buffer = buffer_list[tid] - for j in 1:repeats - - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rngtmp, - l, - regularization, - n_features, - n_train, - n_test, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - - - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - # cplxty 1 - - # update vbles needed for cov - # update vbles needed for cov - m_list[tid][:, i, :] += mtmp ./ repeats - c_list[tid][1, i] += sqrt(sum(abs2, c)) / repeats - cp_list[tid][1, i] += cplxty / repeats - - # update vbles needed for mean - @. moc_list[tid] += moc_tmp ./ (repeats * n_samples) - - end - end - - #put back together after threading - mean_of_covs = sum(moc_list) - means = sum(m_list) - coeffl2norm = sum(c_list) - complexity = sum(cp_list) - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - - approx_σ2 = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, n_samples) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2. - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - - sample_mat = vcat(blockmeans, coeffl2norm, complexity) - - if cov_correction == "shrinkage" - Γ = shrinkage_cov(sample_mat) - elseif cov_correction == "nice" - Γ = nice_cov(sample_mat) - else - Γ = cov(sample_mat, dims = 2) - end - - if !isposdef(approx_σ2) - println("approx_σ2 not posdef") - approx_σ2 = posdef_correct(approx_σ2) - end - - return Γ, approx_σ2 - -end - - - - -function calculate_ensemble_mean_and_coeffnorm( - rfi::RFI, - rng::RNG, - lvecormat::VorM, - regularization::MorUSorD, - n_features::Int, - n_train::Int, - n_test::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - io_pairs::PairedDataContainer, - decomp_type::S, - multithread_type::EnsembleThreading; - repeats::Int = 1, -) where { - RFI <: RandomFeatureInterface, - RNG <: AbstractRNG, - VorM <: AbstractVecOrMat, - S <: AbstractString, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, -} - if isa(lvecormat, AbstractVector) - lmat = reshape(lvecormat, 1, :) - else - lmat = lvecormat - end - N_ens = size(lmat, 2) - output_dim = size(get_outputs(io_pairs), 1) - - - - println("calculating " * string(N_ens * repeats) * " ensemble members...") - - nthreads = Threads.nthreads() - c_list = [zeros(1, N_ens) for i in 1:nthreads] - cp_list = [zeros(1, N_ens) for i in 1:nthreads] - m_list = [zeros(output_dim, N_ens, n_test) for i in 1:nthreads] - moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads] - moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads] - mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads] - rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5 - rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads] - Threads.@threads for i in ProgressBar(1:N_ens) - tid = Threads.threadid() - rngtmp = rng_list[tid] - mtmp = mtmp_list[tid] - moc_tmp = moc_tmp_list[tid] - buffer = buffer_list[tid] - l = lmat[:, i] - for j in collect(1:repeats) - - c, cplxty = calculate_mean_cov_and_coeffs( - rfi, - rngtmp, - l, - regularization, - n_features, - n_train, - n_test, - batch_sizes, - io_pairs, - decomp_type, - mtmp, - moc_tmp, - buffer, - multithread_type, - ) - - # m output_dim x n_test - # v output_dim x output_dim x n_test - # c n_features - m_list[tid][:, i, :] += mtmp ./ repeats - @. moc_list[tid] += moc_tmp ./ (repeats * N_ens) - c_list[tid][1, i] += sqrt(sum(c .^ 2)) / repeats - cp_list[tid][1, i] += cplxty / repeats - - end - end - - # put back together after threading - mean_of_covs = sum(moc_list) - means = sum(m_list) - coeffl2norm = sum(c_list) - complexity = sum(cp_list) - means = permutedims(means, (3, 2, 1)) - mean_of_covs = permutedims(mean_of_covs, (3, 1, 2)) - blockcovmat = zeros(n_test * output_dim, n_test * output_dim) - blockmeans = zeros(n_test * output_dim, N_ens) - for i in 1:n_test - id = ((i - 1) * output_dim + 1):(i * output_dim) - blockcovmat[id, id] = mean_of_covs[i, :, :] - blockmeans[id, :] = permutedims(means[i, :, :], (2, 1)) - end - - if !isposdef(blockcovmat) - println("blockcovmat not posdef") - blockcovmat = posdef_correct(blockcovmat) - end - - return vcat(blockmeans, coeffl2norm, complexity), blockcovmat - -end - -include("ScalarRandomFeature.jl") -include("VectorRandomFeature.jl") diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl index 3ed90dda..0ba32c63 100644 --- a/src/ScalarRandomFeature.jl +++ b/src/ScalarRandomFeature.jl @@ -157,7 +157,7 @@ function ScalarRandomFeatureInterface( "prior" => prior, #the hyperparameter_prior "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(terminate_at=1000), # Adaptive timestepping, + "scheduler" => EKP.DataMisfitController(terminate_at = 1000), # Adaptive timestepping, "cov_sample_multiplier" => 2.0, # multiplier for samples to estimate covariance in optimization scheme "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation "train_fraction" => 0.8, # 80:20 train - test split @@ -341,33 +341,37 @@ function build_models!( train_idx = [] test_idx = [] - n_train=0 - n_test=0 - if n_cross_val_sets == 0 - push!(train_idx,idx_shuffle) - push!(test_idx,idx_shuffle) + n_train = 0 + n_test = 0 + if n_cross_val_sets == 0 + push!(train_idx, idx_shuffle) + push!(test_idx, idx_shuffle) n_cross_val_sets = 1 # now just pretend there is one partition for looping purposes n_train = n_data - n_test = n_data + n_test = n_data else train_fraction = optimizer_options["train_fraction"] n_train = Int(floor(train_fraction * n_data)) n_test = n_data - n_train - - if n_test*n_cross_val_sets > n_data - throw(ArgumentError("train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets")) + + if n_test * n_cross_val_sets > n_data + throw( + ArgumentError( + "train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets", + ), + ) end - - for i = 1:n_cross_val_sets - tmp = idx_shuffle[(i-1)*n_test+1:i*n_test] + + for i in 1:n_cross_val_sets + tmp = idx_shuffle[((i - 1) * n_test + 1):(i * n_test)] push!(test_idx, tmp) push!(train_idx, setdiff(collect(1:n_data), tmp)) end end - - - + + + #regularization = I = 1.0 in scalar case regularization = I @@ -377,12 +381,9 @@ function build_models!( n_iteration = optimizer_options["n_iteration"] diagnostics = zeros(n_iteration, n_rfms) for i in 1:n_rfms - - io_pairs_opt = PairedDataContainer( - input_values, - reshape(output_values[i,:],1,size(output_values, 2)), - ) - + + io_pairs_opt = PairedDataContainer(input_values, reshape(output_values[i, :], 1, size(output_values, 2))) + multithread = optimizer_options["multithread"] if multithread == "ensemble" multithread_type = EnsembleThreading() @@ -415,7 +416,7 @@ function build_models!( n_cov_samples_min = n_test + 2 n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) observation_vec = [] - for cv_idx = 1:n_cross_val_sets + for cv_idx in 1:n_cross_val_sets internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( srfi, rng, @@ -439,13 +440,9 @@ function build_models!( end data = vcat(get_outputs(io_pairs_opt)[test_idx[cv_idx]], 0.0, 0.0) - push!(observation_vec, EKP.Observation( - Dict( - "names" => "$(cv_idx)", - "samples" => data[:], - "covariances" => Γ, - ), - ), + push!( + observation_vec, + EKP.Observation(Dict("names" => "$(cv_idx)", "samples" => data[:], "covariances" => Γ)), ) end observation = combine_observations(observation_vec) @@ -475,9 +472,9 @@ function build_models!( #get parameters: lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj)) - g_ens = zeros(n_cross_val_sets*(n_test+2),n_ensemble) - for cv_idx = 1:n_cross_val_sets - + g_ens = zeros(n_cross_val_sets * (n_test + 2), n_ensemble) + for cv_idx in 1:n_cross_val_sets + g_ens_tmp, _ = calculate_ensemble_mean_and_coeffnorm( srfi, rng, @@ -491,9 +488,9 @@ function build_models!( decomp_type, multithread_type, ) - g_ens[(cv_idx-1)*(n_test+2) + 1: cv_idx*(n_test+2), :] = g_ens_tmp + g_ens[((cv_idx - 1) * (n_test + 2) + 1):(cv_idx * (n_test + 2)), :] = g_ens_tmp end - + inflation = optimizer_options["inflation"] if inflation > 0 terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation diff --git a/src/ScalarRandomFeature_crossval.jl b/src/ScalarRandomFeature_crossval.jl deleted file mode 100644 index fe770289..00000000 --- a/src/ScalarRandomFeature_crossval.jl +++ /dev/null @@ -1,584 +0,0 @@ -export ScalarRandomFeatureInterface -# getters already exported in VRFI - -""" -$(DocStringExtensions.TYPEDEF) - -Structure holding the Scalar Random Feature models. - -# Fields -$(DocStringExtensions.TYPEDFIELDS) - -""" -struct ScalarRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} <: - RandomFeatureInterface - "vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization" - rfms::Vector{RF.Methods.RandomFeatureMethod} - "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data" - fitted_features::Vector{RF.Methods.Fit} - "batch sizes" - batch_sizes::Union{Dict{S, Int}, Nothing} - "n_features" - n_features::Union{Int, Nothing} - "input dimension" - input_dim::Int - "choice of random number generator" - rng::RNG - "Kernel structure type (e.g. Separable or Nonseparable)" - kernel_structure::KST - "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)" - feature_decomposition::S - "dictionary of options for hyperparameter optimizer" - optimizer_options::Dict{S} - "diagnostics from optimizer" - optimizer::Vector -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the rfms field -""" -get_rfms(srfi::ScalarRandomFeatureInterface) = srfi.rfms - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the fitted_features field -""" -get_fitted_features(srfi::ScalarRandomFeatureInterface) = srfi.fitted_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets batch_sizes the field -""" -get_batch_sizes(srfi::ScalarRandomFeatureInterface) = srfi.batch_sizes - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the n_features field -""" -get_n_features(srfi::ScalarRandomFeatureInterface) = srfi.n_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the input_dim field -""" -get_input_dim(srfi::ScalarRandomFeatureInterface) = srfi.input_dim - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the rng field -""" -EKP.get_rng(srfi::ScalarRandomFeatureInterface) = srfi.rng - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the kernel_structure field -""" -get_kernel_structure(srfi::ScalarRandomFeatureInterface) = srfi.kernel_structure - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the feature_decomposition field -""" -get_feature_decomposition(srfi::ScalarRandomFeatureInterface) = srfi.feature_decomposition - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the optimizer_options field -""" -get_optimizer_options(srfi::ScalarRandomFeatureInterface) = srfi.optimizer_options - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the optimizer field -""" -get_optimizer(srfi::ScalarRandomFeatureInterface) = srfi.optimizer - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and single- (or decorrelated-)output emulators. - - `n_features` - the number of random features - - `input_dim` - the dimension of the input space - - `kernel_structure` - - a prescribed form of kernel structure - - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there) - - `rng = Random.GLOBAL_RNG` - random number generator - - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available - - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `ScalarRandomFeatureInterface` constructor): - - "prior": the prior for the hyperparameter optimization - - "prior_in_scale": use this to tune the input prior scale - - "n_ensemble": number of ensemble members - - "n_iteration": number of eki iterations - - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 0.0) - - "scheduler": Learning rate Scheduler (a.k.a. EKP timestepper) Default: DataMisfitController - - "tikhonov": tikhonov regularization parameter if >0 - - "inflation": additive inflation ∈ [0,1] with 0 being no inflation - - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split - - "n_features_opt": fix the number of features for optimization (default `n_features`, as used for prediction) - - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra - - "accelerator": use EKP accelerators (default is no acceleration) - - "verbose" => false, verbose optimizer statements - - "cov_correction" => "shrinkage", type of conditioning to improve estimated covariance (Ledoit Wolfe 03), also "nice" for (Vishny, Morzfeld et al. 2024) -""" -function ScalarRandomFeatureInterface( - n_features::Int, - input_dim::Int; - kernel_structure::Union{KST, Nothing} = nothing, - batch_sizes::Union{Dict{S, Int}, Nothing} = nothing, - rng::RNG = Random.GLOBAL_RNG, - feature_decomposition::S = "cholesky", - optimizer_options::Union{Dict{S}, Nothing} = nothing, -) where {S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} - # Initialize vector for GP models - rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0) - fitted_features = Vector{RF.Methods.Fit}(undef, 0) - - if isnothing(kernel_structure) - kernel_structure = SeparableKernel(cov_structure_from_string("lowrank", input_dim), OneDimFactor()) - end - KSType = typeof(kernel_structure) - - prior = build_default_prior(input_dim, kernel_structure) - - # default optimizer settings - optimizer_opts = Dict( - "prior" => prior, #the hyperparameter_prior - "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble - "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(terminate_at=1000), # Adaptive timestepping, - "cov_sample_multiplier" => 2.0, # multiplier for samples to estimate covariance in optimization scheme - "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation - "train_fraction" => 0.8, # 80:20 train - test split - "n_features_opt" => n_features, # number of features for the optimization - "multithread" => "ensemble", # instead of "tullio" - "verbose" => false, # verbose optimizer statements - "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum - "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles - "cov_correction" => "shrinkage", # type of conditioning to improve estimated covariance - "n_cross_val_sets" => 1, - ) - - if !isnothing(optimizer_options) - for key in keys(optimizer_options) - optimizer_opts[key] = optimizer_options[key] - end - end - - opt_tmp = Dict() - for key in keys(optimizer_opts) - if key != "prior" - opt_tmp[key] = optimizer_opts[key] - end - end - @info("hyperparameter optimization with EKI configured with $opt_tmp") - - return ScalarRandomFeatureInterface{S, RNG, KSType}( - rfms, - fitted_features, - batch_sizes, - n_features, - input_dim, - rng, - kernel_structure, - feature_decomposition, - optimizer_opts, - [], - ) -end - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - - M = zeros(input_dim) #scalar output - U = hyperparameters_from_flat(x, input_dim, kernel_structure) - - if !isposdef(U) - println("U not posdef - correcting") - U = posdef_correct(U) - end - - dist = MvNormal(M, U) - pd = ParameterDistribution( - Dict( - "distribution" => Parameterized(dist), - "constraint" => repeat([no_constraint()], input_dim), - "name" => "xi", - ), - ) - - return pd -end - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - kernel_structure::NK, -) where {VV <: AbstractVector, NK <: NonseparableKernel} - throw( - ArgumentError( - "Scalar Kernels must be of type `Separable( *** , OneDimFactor())`, received $(kernel_structure)", - ), - ) -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Builds the random feature method from hyperparameters. We use cosine activation functions and a MatrixVariateNormal(M,U,V) distribution (from `Distributions.jl`) with mean M=0, and input covariance U built using a `CovarianceStructureType`. -""" -function RFM_from_hyperparameters( - srfi::ScalarRandomFeatureInterface, - rng::RNG, - l::ForVM, - regularization::MorUSorD, # just a 1x1 matrix though - n_features::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - input_dim::Int, - multithread_type::MT; -) where { - RNG <: AbstractRNG, - ForVM <: Union{Real, AbstractVecOrMat}, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, - S <: AbstractString, - MT <: MultithreadType, -} - - xi_hp = l[1:(end - 1)] - sigma_hp = l[end] - kernel_structure = get_kernel_structure(srfi) - pd = hyperparameter_distribution_from_flat(xi_hp, input_dim, kernel_structure) - - feature_sampler = RF.Samplers.FeatureSampler(pd, rng = rng) - # Learn hyperparameters for different feature types - feature_parameters = Dict("sigma" => sigma_hp) - sff = RF.Features.ScalarFourierFeature(n_features, feature_sampler, feature_parameters = feature_parameters) - thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio - if isnothing(batch_sizes) - return RF.Methods.RandomFeatureMethod(sff, regularization = regularization, tullio_threading = thread_opt) - else - return RF.Methods.RandomFeatureMethod( - sff, - regularization = regularization, - batch_sizes = batch_sizes, - tullio_threading = thread_opt, - ) - end -end - -#removes vector-only input arguments -RFM_from_hyperparameters( - srfi::ScalarRandomFeatureInterface, - rng::RNG, - l::ForVM, - regularization::MorUS, # just a 1x1 matrix though - n_features::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - input_dim::Int, - output_dim::Int, - multithread_type::MT; -) where { - RNG <: AbstractRNG, - ForVM <: Union{Real, AbstractVecOrMat}, - MorUS <: Union{Matrix, UniformScaling}, - S <: AbstractString, - MT <: MultithreadType, -} = RFM_from_hyperparameters(srfi, rng, l, regularization, n_features, batch_sizes, input_dim, multithread_type) - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Builds the random feature method from hyperparameters. We use cosine activation functions and a Multivariate Normal distribution (from `Distributions.jl`) with mean M=0, and input covariance U built with the `CovarianceStructureType`. -""" -function build_models!( - srfi::ScalarRandomFeatureInterface, - input_output_pairs::PairedDataContainer{FT}, -) where {FT <: AbstractFloat} - # get inputs and outputs - input_values = get_inputs(input_output_pairs) - output_values = get_outputs(input_output_pairs) - n_rfms, n_data = size(output_values) - noise_sd = 1.0 - - input_dim = size(input_values, 1) - - kernel_structure = get_kernel_structure(srfi) - n_hp = calculate_n_hyperparameters(input_dim, kernel_structure) - - - rfms = get_rfms(srfi) - if length(rfms) > 0 - @warn "ScalarRandomFeatureInterface already built. skipping..." - return - end - fitted_features = get_fitted_features(srfi) - n_features = get_n_features(srfi) - batch_sizes = get_batch_sizes(srfi) - rng = get_rng(srfi) - decomp_type = get_feature_decomposition(srfi) - optimizer_options = get_optimizer_options(srfi) - optimizer = get_optimizer(srfi) # empty vector - - # Optimize features with EKP for each output dim - # [1.] Split data into test/train 80/20 - train_fraction = optimizer_options["train_fraction"] - n_train = Int(floor(train_fraction * n_data)) - n_test = n_data - n_train - n_features_opt = optimizer_options["n_features_opt"] - - idx_shuffle = randperm(rng, n_data) - n_cross_val_sets = optimizer_options["n_cross_val_sets"] - if n_test*n_cross_val_sets > n_data - throw(ArgumentError("train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets")) - end - train_idx = [] - test_idx = [] - for i = 1:n_cross_val_sets - tmp = idx_shuffle[(i-1)*n_test+1:i*n_test] - push!(test_idx, tmp) - push!(train_idx, setdiff(collect(1:n_data), tmp)) - end - - #regularization = I = 1.0 in scalar case - regularization = I - - @info ( - "hyperparameter learning for $n_rfms models using $n_train training points, $n_test validation points and $n_features_opt features" - ) - n_iteration = optimizer_options["n_iteration"] - diagnostics = zeros(n_iteration, n_rfms) - for i in 1:n_rfms - - io_pairs_opt = PairedDataContainer( - input_values, - reshape(output_values[i,:],1,size(output_values, 2)), - ) - - multithread = optimizer_options["multithread"] - if multithread == "ensemble" - multithread_type = EnsembleThreading() - elseif multithread == "tullio" - multithread_type = TullioThreading() - else - throw( - ArgumentError( - "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl), or \"ensemble\" (threading is done over the ensemble)", - ), - ) - end - # [2.] Estimate covariance at mean value - prior = optimizer_options["prior"] - - # where prior space has changed we need to rebuild the priors - if ndims(prior) > n_hp - - # comes from having a truncated output_dimension - # TODO not really a truncation here, resetting to default - @info "Original input space of dimension $(get_input_dim(srfi)) has been truncated to $(input_dim). \n Rebuilding prior... number of hyperparameters reduced from $(ndims(prior)) to $(n_hp)." - prior = build_default_prior(input_dim, kernel_structure) - - end - - μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) - - cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] - cov_correction = optimizer_options["cov_correction"] - n_cov_samples_min = n_test + 2 - n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) - observation_vec = [] - for cv_idx = 1:n_cross_val_sets - internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( - srfi, - rng, - μ_hp, - regularization, - n_features_opt, - train_idx[cv_idx], - test_idx[cv_idx], - batch_sizes, - io_pairs_opt, - n_cov_samples, - decomp_type, - multithread_type, - cov_correction = cov_correction, - ) - Γ = internal_Γ - Γ[1:n_test, 1:n_test] += regularization # + approx_σ2 - Γ[(n_test + 1):end, (n_test + 1):end] += I - if !isposdef(Γ) - Γ = posdef_correct(Γ) - end - data = vcat(get_outputs(io_pairs_opt)[test_idx[cv_idx]], 0.0, 0.0) - - push!(observation_vec, EKP.Observation( - Dict( - "names" => "$(cv_idx)", - "samples" => data[:], - "covariances" => Γ, - ), - ), - ) - end - observation = combine_observations(observation_vec) - # [3.] set up EKP optimization - n_ensemble = optimizer_options["n_ensemble"] - n_iteration = optimizer_options["n_iteration"] - opt_verbose_flag = optimizer_options["verbose"] - scheduler = optimizer_options["scheduler"] - accelerator = optimizer_options["accelerator"] - localization = optimizer_options["localization"] - - initial_params = construct_initial_ensemble(rng, prior, n_ensemble) - ekiobj = EKP.EnsembleKalmanProcess( - initial_params, - observation, - Inversion(), - scheduler = scheduler, - rng = rng, - accelerator = accelerator, - verbose = opt_verbose_flag, - localization_method = localization, - ) - err = zeros(n_iteration) - - # [4.] optimize with EKP - for i in 1:n_iteration - - #get parameters: - lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj)) - g_ens = zeros(n_cross_val_sets*(n_test+2),n_ensemble) - for cv_idx = 1:n_cross_val_sets - - g_ens_tmp, _ = calculate_ensemble_mean_and_coeffnorm( - srfi, - rng, - lvec, - regularization, - n_features_opt, - train_idx[cv_idx], - test_idx[cv_idx], - batch_sizes, - io_pairs_opt, - decomp_type, - multithread_type, - ) - g_ens[(cv_idx-1)*(n_test+2) + 1: cv_idx*(n_test+2), :] = g_ens_tmp - end - - inflation = optimizer_options["inflation"] - if inflation > 0 - terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation - else - terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation - end - if !isnothing(terminated) - break # if the timestep was terminated due to timestepping condition - end - - err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) - end - diagnostics[:, i] = copy(err) - - # [5.] extract optimal hyperparameters - hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] - - if opt_verbose_flag - names = get_name(prior) - hp_optimal_batch = [hp_optimal[b] for b in batch(prior)] - hp_optimal_range = - [(minimum(hp_optimal_batch[i]), maximum(hp_optimal_batch[i])) for i in 1:length(hp_optimal_batch)] #the min and max of the hparams - prior_conf_interval = [mean(prior) .- 3 * sqrt.(var(prior)), mean(prior) .+ 3 * sqrt.(var(prior))] - pci_constrained = [transform_unconstrained_to_constrained(prior, prior_conf_interval[i]) for i in 1:2] - pcic = [(pci_constrained[1][i], pci_constrained[2][i]) for i in 1:length(pci_constrained[1])] - pcic_batched = [pcic[b][1] for b in batch(prior)] - @info("EKI Optimization result:") - println( - display( - [ - "name" "number of hyperparameters" "optimized value range" "99% prior mass" - names length.(hp_optimal_batch) hp_optimal_range pcic_batched - ], - ), - ) - end - - io_pairs_i = PairedDataContainer(input_values, reshape(output_values[i, :], 1, size(output_values, 2))) - # Now, fit new RF model with the optimized hyperparameters - - rfm_i = RFM_from_hyperparameters( - srfi, - rng, - hp_optimal, - regularization, - n_features, - batch_sizes, - input_dim, - multithread_type, - ) - fitted_features_i = RF.Methods.fit(rfm_i, io_pairs_i, decomposition_type = decomp_type) #fit features - - push!(rfms, rfm_i) - push!(fitted_features, fitted_features_i) - - end - push!(optimizer, diagnostics) - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Empty method, as optimization takes place within the build_models stage -""" -function optimize_hyperparameters!(srfi::ScalarRandomFeatureInterface, args...; kwargs...) - @info("Random Features already trained. continuing...") -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions. -""" -function predict( - srfi::ScalarRandomFeatureInterface, - new_inputs::MM; - multithread = "ensemble", -) where {MM <: AbstractMatrix} - M = length(get_rfms(srfi)) - N_samples = size(new_inputs, 2) - # Predicts columns of inputs: input_dim × N_samples - μ = zeros(M, N_samples) - σ2 = zeros(M, N_samples) - if multithread == "ensemble" - tullio_threading = false - elseif multithread == "tullio" - tullio_threading = true - end - - for i in 1:M - μ[i, :], σ2[i, :] = RF.Methods.predict( - get_rfms(srfi)[i], - get_fitted_features(srfi)[i], - DataContainer(new_inputs), - tullio_threading = tullio_threading, - ) - end - - # add the noise contribution from the regularization - σ2[:, :] = σ2[:, :] .+ 1.0 - - return μ, σ2 -end diff --git a/src/ScalarRandomFeature_nodatasplit.jl b/src/ScalarRandomFeature_nodatasplit.jl deleted file mode 100644 index 1158def1..00000000 --- a/src/ScalarRandomFeature_nodatasplit.jl +++ /dev/null @@ -1,561 +0,0 @@ -export ScalarRandomFeatureInterface -# getters already exported in VRFI - -""" -$(DocStringExtensions.TYPEDEF) - -Structure holding the Scalar Random Feature models. - -# Fields -$(DocStringExtensions.TYPEDFIELDS) - -""" -struct ScalarRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} <: - RandomFeatureInterface - "vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization" - rfms::Vector{RF.Methods.RandomFeatureMethod} - "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data" - fitted_features::Vector{RF.Methods.Fit} - "batch sizes" - batch_sizes::Union{Dict{S, Int}, Nothing} - "n_features" - n_features::Union{Int, Nothing} - "input dimension" - input_dim::Int - "choice of random number generator" - rng::RNG - "Kernel structure type (e.g. Separable or Nonseparable)" - kernel_structure::KST - "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)" - feature_decomposition::S - "dictionary of options for hyperparameter optimizer" - optimizer_options::Dict{S} - "diagnostics from optimizer" - optimizer::Vector -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the rfms field -""" -get_rfms(srfi::ScalarRandomFeatureInterface) = srfi.rfms - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the fitted_features field -""" -get_fitted_features(srfi::ScalarRandomFeatureInterface) = srfi.fitted_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets batch_sizes the field -""" -get_batch_sizes(srfi::ScalarRandomFeatureInterface) = srfi.batch_sizes - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the n_features field -""" -get_n_features(srfi::ScalarRandomFeatureInterface) = srfi.n_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the input_dim field -""" -get_input_dim(srfi::ScalarRandomFeatureInterface) = srfi.input_dim - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the rng field -""" -EKP.get_rng(srfi::ScalarRandomFeatureInterface) = srfi.rng - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the kernel_structure field -""" -get_kernel_structure(srfi::ScalarRandomFeatureInterface) = srfi.kernel_structure - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the feature_decomposition field -""" -get_feature_decomposition(srfi::ScalarRandomFeatureInterface) = srfi.feature_decomposition - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the optimizer_options field -""" -get_optimizer_options(srfi::ScalarRandomFeatureInterface) = srfi.optimizer_options - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the optimizer field -""" -get_optimizer(srfi::ScalarRandomFeatureInterface) = srfi.optimizer - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and single- (or decorrelated-)output emulators. - - `n_features` - the number of random features - - `input_dim` - the dimension of the input space - - `kernel_structure` - - a prescribed form of kernel structure - - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there) - - `rng = Random.GLOBAL_RNG` - random number generator - - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available - - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `ScalarRandomFeatureInterface` constructor): - - "prior": the prior for the hyperparameter optimization - - "prior_in_scale": use this to tune the input prior scale - - "n_ensemble": number of ensemble members - - "n_iteration": number of eki iterations - - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 0.0) - - "scheduler": Learning rate Scheduler (a.k.a. EKP timestepper) Default: DataMisfitController - - "tikhonov": tikhonov regularization parameter if >0 - - "inflation": additive inflation ∈ [0,1] with 0 being no inflation - - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split - - "n_features_opt": fix the number of features for optimization (default `n_features`, as used for prediction) - - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra - - "accelerator": use EKP accelerators (default is no acceleration) - - "verbose" => false, verbose optimizer statements - - "cov_correction" => "shrinkage", type of conditioning to improve estimated covariance (Ledoit Wolfe 03), also "nice" for (Vishny, Morzfeld et al. 2024) -""" -function ScalarRandomFeatureInterface( - n_features::Int, - input_dim::Int; - kernel_structure::Union{KST, Nothing} = nothing, - batch_sizes::Union{Dict{S, Int}, Nothing} = nothing, - rng::RNG = Random.GLOBAL_RNG, - feature_decomposition::S = "cholesky", - optimizer_options::Union{Dict{S}, Nothing} = nothing, -) where {S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} - # Initialize vector for GP models - rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0) - fitted_features = Vector{RF.Methods.Fit}(undef, 0) - - if isnothing(kernel_structure) - kernel_structure = SeparableKernel(cov_structure_from_string("lowrank", input_dim), OneDimFactor()) - end - KSType = typeof(kernel_structure) - - prior = build_default_prior(input_dim, kernel_structure) - - # default optimizer settings - optimizer_opts = Dict( - "prior" => prior, #the hyperparameter_prior - "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble - "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(terminate_at=100), # Adaptive timestepping, - "cov_sample_multiplier" => 2.0, # multiplier for samples to estimate covariance in optimization scheme - "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation - "train_fraction" => 0.8, # 80:20 train - test split - "n_features_opt" => n_features, # number of features for the optimization - "multithread" => "ensemble", # instead of "tullio" - "verbose" => false, # verbose optimizer statements - "accelerator" => EKP.DefaultAccelerator(), # acceleration with momentum - "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles - "cov_correction" => "shrinkage", # type of conditioning to improve estimated covariance - ) - - if !isnothing(optimizer_options) - for key in keys(optimizer_options) - optimizer_opts[key] = optimizer_options[key] - end - end - - opt_tmp = Dict() - for key in keys(optimizer_opts) - if key != "prior" - opt_tmp[key] = optimizer_opts[key] - end - end - @info("hyperparameter optimization with EKI configured with $opt_tmp") - - return ScalarRandomFeatureInterface{S, RNG, KSType}( - rfms, - fitted_features, - batch_sizes, - n_features, - input_dim, - rng, - kernel_structure, - feature_decomposition, - optimizer_opts, - [], - ) -end - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - - M = zeros(input_dim) #scalar output - U = hyperparameters_from_flat(x, input_dim, kernel_structure) - - if !isposdef(U) - println("U not posdef - correcting") - U = posdef_correct(U) - end - - dist = MvNormal(M, U) - pd = ParameterDistribution( - Dict( - "distribution" => Parameterized(dist), - "constraint" => repeat([no_constraint()], input_dim), - "name" => "xi", - ), - ) - - return pd -end - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - kernel_structure::NK, -) where {VV <: AbstractVector, NK <: NonseparableKernel} - throw( - ArgumentError( - "Scalar Kernels must be of type `Separable( *** , OneDimFactor())`, received $(kernel_structure)", - ), - ) -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Builds the random feature method from hyperparameters. We use cosine activation functions and a MatrixVariateNormal(M,U,V) distribution (from `Distributions.jl`) with mean M=0, and input covariance U built using a `CovarianceStructureType`. -""" -function RFM_from_hyperparameters( - srfi::ScalarRandomFeatureInterface, - rng::RNG, - l::ForVM, - regularization::MorUSorD, # just a 1x1 matrix though - n_features::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - input_dim::Int, - multithread_type::MT; -) where { - RNG <: AbstractRNG, - ForVM <: Union{Real, AbstractVecOrMat}, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, - S <: AbstractString, - MT <: MultithreadType, -} - - xi_hp = l[1:(end - 1)] - sigma_hp = l[end] - kernel_structure = get_kernel_structure(srfi) - pd = hyperparameter_distribution_from_flat(xi_hp, input_dim, kernel_structure) - - feature_sampler = RF.Samplers.FeatureSampler(pd, rng = rng) - # Learn hyperparameters for different feature types - feature_parameters = Dict("sigma" => sigma_hp) - sff = RF.Features.ScalarFourierFeature(n_features, feature_sampler, feature_parameters = feature_parameters) - thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio - if isnothing(batch_sizes) - return RF.Methods.RandomFeatureMethod(sff, regularization = regularization, tullio_threading = thread_opt) - else - return RF.Methods.RandomFeatureMethod( - sff, - regularization = regularization, - batch_sizes = batch_sizes, - tullio_threading = thread_opt, - ) - end -end - -#removes vector-only input arguments -RFM_from_hyperparameters( - srfi::ScalarRandomFeatureInterface, - rng::RNG, - l::ForVM, - regularization::MorUS, # just a 1x1 matrix though - n_features::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - input_dim::Int, - output_dim::Int, - multithread_type::MT; -) where { - RNG <: AbstractRNG, - ForVM <: Union{Real, AbstractVecOrMat}, - MorUS <: Union{Matrix, UniformScaling}, - S <: AbstractString, - MT <: MultithreadType, -} = RFM_from_hyperparameters(srfi, rng, l, regularization, n_features, batch_sizes, input_dim, multithread_type) - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Builds the random feature method from hyperparameters. We use cosine activation functions and a Multivariate Normal distribution (from `Distributions.jl`) with mean M=0, and input covariance U built with the `CovarianceStructureType`. -""" -function build_models!( - srfi::ScalarRandomFeatureInterface, - input_output_pairs::PairedDataContainer{FT}, -) where {FT <: AbstractFloat} - # get inputs and outputs - input_values = get_inputs(input_output_pairs) - output_values = get_outputs(input_output_pairs) - n_rfms, n_data = size(output_values) - noise_sd = 1.0 - - input_dim = size(input_values, 1) - - kernel_structure = get_kernel_structure(srfi) - n_hp = calculate_n_hyperparameters(input_dim, kernel_structure) - - - rfms = get_rfms(srfi) - if length(rfms) > 0 - @warn "ScalarRandomFeatureInterface already built. skipping..." - return - end - fitted_features = get_fitted_features(srfi) - n_features = get_n_features(srfi) - batch_sizes = get_batch_sizes(srfi) - rng = get_rng(srfi) - decomp_type = get_feature_decomposition(srfi) - optimizer_options = get_optimizer_options(srfi) - optimizer = get_optimizer(srfi) # empty vector - - # Optimize features with EKP for each output dim - # [1.] Split data into test/train 80/20 - train_fraction = optimizer_options["train_fraction"] - n_train=n_data - n_test = n_data -# n_train = Int(floor(train_fraction * n_data)) -# n_test = n_data - n_train - n_features_opt = optimizer_options["n_features_opt"] - - #idx_shuffle = randperm(rng, n_data) - - #regularization = I = 1.0 in scalar case - regularization = I - - - @info ( - "hyperparameter learning for $n_rfms models using $n_train training points, $n_test validation points and $n_features_opt features" - ) - n_iteration = optimizer_options["n_iteration"] - diagnostics = zeros(n_iteration, n_rfms) - for i in 1:n_rfms - io_pairs_opt = PairedDataContainer( - input_values, - reshape(output_values[i,:],1,size(output_values, 2)), - ) - #io_pairs_opt = PairedDataContainer( - # input_values[:, idx_shuffle], - # reshape(output_values[i, idx_shuffle], 1, size(output_values, 2)), - #) - - multithread = optimizer_options["multithread"] - if multithread == "ensemble" - multithread_type = EnsembleThreading() - elseif multithread == "tullio" - multithread_type = TullioThreading() - else - throw( - ArgumentError( - "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl), or \"ensemble\" (threading is done over the ensemble)", - ), - ) - end - # [2.] Estimate covariance at mean value - prior = optimizer_options["prior"] - - # where prior space has changed we need to rebuild the priors - if ndims(prior) > n_hp - - # comes from having a truncated output_dimension - # TODO not really a truncation here, resetting to default - @info "Original input space of dimension $(get_input_dim(srfi)) has been truncated to $(input_dim). \n Rebuilding prior... number of hyperparameters reduced from $(ndims(prior)) to $(n_hp)." - prior = build_default_prior(input_dim, kernel_structure) - - end - - μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) - - cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] - cov_correction = optimizer_options["cov_correction"] - n_cov_samples_min = n_test + 2 - n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) - - internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( - srfi, - rng, - μ_hp, - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - n_cov_samples, - decomp_type, - multithread_type, - cov_correction = cov_correction, - ) - Γ = internal_Γ - Γ[1:n_test, 1:n_test] += regularization # + approx_σ2 - Γ[(n_test + 1):end, (n_test + 1):end] += I - - # [3.] set up EKP optimization - n_ensemble = optimizer_options["n_ensemble"] - n_iteration = optimizer_options["n_iteration"] - opt_verbose_flag = optimizer_options["verbose"] - scheduler = optimizer_options["scheduler"] - accelerator = optimizer_options["accelerator"] - localization = optimizer_options["localization"] - - initial_params = construct_initial_ensemble(rng, prior, n_ensemble) -# data = vcat(get_outputs(io_pairs_opt)[(n_train + 1):end], 0.0, 0.0) - data = vcat(get_outputs(io_pairs_opt)[:], 0.0, 0.0) - ekiobj = EKP.EnsembleKalmanProcess( - initial_params, - data, - Γ, - Inversion(), - scheduler = scheduler, - rng = rng, - accelerator = accelerator, - verbose = opt_verbose_flag, - localization_method = localization, - ) - err = zeros(n_iteration) - - # [4.] optimize with EKP - for i in 1:n_iteration - - #get parameters: - lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj)) - g_ens, _ = calculate_ensemble_mean_and_coeffnorm( - srfi, - rng, - lvec, - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - decomp_type, - multithread_type, - ) - - inflation = optimizer_options["inflation"] - if inflation > 0 - terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation - else - terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation - end - if !isnothing(terminated) - break # if the timestep was terminated due to timestepping condition - end - - err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) - end - diagnostics[:, i] = copy(err) - - # [5.] extract optimal hyperparameters - hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] - - if opt_verbose_flag - names = get_name(prior) - hp_optimal_batch = [hp_optimal[b] for b in batch(prior)] - hp_optimal_range = - [(minimum(hp_optimal_batch[i]), maximum(hp_optimal_batch[i])) for i in 1:length(hp_optimal_batch)] #the min and max of the hparams - prior_conf_interval = [mean(prior) .- 3 * sqrt.(var(prior)), mean(prior) .+ 3 * sqrt.(var(prior))] - pci_constrained = [transform_unconstrained_to_constrained(prior, prior_conf_interval[i]) for i in 1:2] - pcic = [(pci_constrained[1][i], pci_constrained[2][i]) for i in 1:length(pci_constrained[1])] - pcic_batched = [pcic[b][1] for b in batch(prior)] - @info("EKI Optimization result:") - println( - display( - [ - "name" "number of hyperparameters" "optimized value range" "99% prior mass" - names length.(hp_optimal_batch) hp_optimal_range pcic_batched - ], - ), - ) - end - - io_pairs_i = PairedDataContainer(input_values, reshape(output_values[i, :], 1, size(output_values, 2))) - # Now, fit new RF model with the optimized hyperparameters - - rfm_i = RFM_from_hyperparameters( - srfi, - rng, - hp_optimal, - regularization, - n_features, - batch_sizes, - input_dim, - multithread_type, - ) - fitted_features_i = RF.Methods.fit(rfm_i, io_pairs_i, decomposition_type = decomp_type) #fit features - - push!(rfms, rfm_i) - push!(fitted_features, fitted_features_i) - - end - push!(optimizer, diagnostics) - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Empty method, as optimization takes place within the build_models stage -""" -function optimize_hyperparameters!(srfi::ScalarRandomFeatureInterface, args...; kwargs...) - @info("Random Features already trained. continuing...") -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions. -""" -function predict( - srfi::ScalarRandomFeatureInterface, - new_inputs::MM; - multithread = "ensemble", -) where {MM <: AbstractMatrix} - M = length(get_rfms(srfi)) - N_samples = size(new_inputs, 2) - # Predicts columns of inputs: input_dim × N_samples - μ = zeros(M, N_samples) - σ2 = zeros(M, N_samples) - if multithread == "ensemble" - tullio_threading = false - elseif multithread == "tullio" - tullio_threading = true - end - - for i in 1:M - μ[i, :], σ2[i, :] = RF.Methods.predict( - get_rfms(srfi)[i], - get_fitted_features(srfi)[i], - DataContainer(new_inputs), - tullio_threading = tullio_threading, - ) - end - - # add the noise contribution from the regularization - σ2[:, :] = σ2[:, :] .+ 1.0 - - return μ, σ2 -end diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl index bda02734..d675391d 100644 --- a/src/VectorRandomFeature.jl +++ b/src/VectorRandomFeature.jl @@ -192,7 +192,7 @@ function VectorRandomFeatureInterface( "prior" => prior, #the hyperparameter_prior (note scalings have already been applied) "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(terminate_at=1000), # Adaptive timestepping + "scheduler" => EKP.DataMisfitController(terminate_at = 1000), # Adaptive timestepping "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme "tikhonov" => 0, # tikhonov regularization parameter if >0 "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation @@ -409,27 +409,31 @@ function build_models!( n_features_opt = optimizer_options["n_features_opt"] idx_shuffle = randperm(rng, n_data) n_cross_val_sets = Int(optimizer_options["n_cross_val_sets"]) - + train_idx = [] test_idx = [] - if n_cross_val_sets == 0 - push!(train_idx,idx_shuffle) - push!(test_idx,idx_shuffle) + if n_cross_val_sets == 0 + push!(train_idx, idx_shuffle) + push!(test_idx, idx_shuffle) n_cross_val_sets = 1 # now just pretend there is one partition for looping purposes n_train = n_data - n_test = n_data + n_test = n_data else train_fraction = optimizer_options["train_fraction"] n_train = Int(floor(train_fraction * n_data)) # 20% split n_test = n_data - n_train - if n_test*n_cross_val_sets > n_data - throw(ArgumentError("train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets")) + if n_test * n_cross_val_sets > n_data + throw( + ArgumentError( + "train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets", + ), + ) end - - for i = 1:n_cross_val_sets - tmp = idx_shuffle[(i-1)*n_test+1:i*n_test] + + for i in 1:n_cross_val_sets + tmp = idx_shuffle[((i - 1) * n_test + 1):(i * n_test)] push!(test_idx, tmp) push!(train_idx, setdiff(collect(1:n_data), tmp)) end @@ -453,7 +457,7 @@ function build_models!( regularization = regularization_matrix end - end + end # [2.] Estimate covariance at mean value μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] @@ -470,7 +474,7 @@ function build_models!( n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) observation_vec = [] tikhonov_opt_val = optimizer_options["tikhonov"] - for cv_idx = 1:n_cross_val_sets + for cv_idx in 1:n_cross_val_sets internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( vrfi, rng, @@ -486,7 +490,7 @@ function build_models!( multithread_type, cov_correction = cov_correction, ) - + if tikhonov_opt_val == 0 # Build the covariance Γ = internal_Γ @@ -494,7 +498,7 @@ function build_models!( isa(regularization, UniformScaling) ? regularization : kron(I(n_test), regularization) # + approx_σ2 Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I data = vcat(reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), 0.0, 0.0) #flatten data - + elseif tikhonov_opt_val > 0 # augment the state to add tikhonov outsize = size(internal_Γ, 1) @@ -502,37 +506,30 @@ function build_models!( Γ[1:outsize, 1:outsize] = internal_Γ Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += kron(I(n_test), regularization) # block diag regularization Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I - + Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior) - + data = vcat( - reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), - 0.0, - 0.0, - zeros(size(Γ, 1) - outsize, 1), + reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), + 0.0, + 0.0, + zeros(size(Γ, 1) - outsize, 1), ) #flatten data with additional zeros else throw( - ArgumentError( - "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", - ), + ArgumentError( + "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", + ), ) end if !isposdef(Γ) Γ = posdef_correct(Γ) end - push!(observation_vec, EKP.Observation( - Dict( - "names" => "$(cv_idx)", - "samples" => data[:], - "covariances" => Γ, - ), - ), - ) - + push!(observation_vec, EKP.Observation(Dict("names" => "$(cv_idx)", "samples" => data[:], "covariances" => Γ))) + end observation = combine_observations(observation_vec) - + # [3.] set up EKP optimization n_ensemble = optimizer_options["n_ensemble"] # minimal ensemble size n_hp, n_iteration = optimizer_options["n_iteration"] @@ -562,11 +559,11 @@ function build_models!( lvec = get_ϕ_final(prior, ekiobj) if tikhonov_opt_val > 0 - g_ens = zeros(n_cross_val_sets*(output_dim*n_test+input_dim+2),n_ensemble) + g_ens = zeros(n_cross_val_sets * (output_dim * n_test + input_dim + 2), n_ensemble) else - g_ens = zeros(n_cross_val_sets*(output_dim*n_test+2),n_ensemble) + g_ens = zeros(n_cross_val_sets * (output_dim * n_test + 2), n_ensemble) end - for cv_idx = 1:n_cross_val_sets + for cv_idx in 1:n_cross_val_sets g_ens_tmp, _ = calculate_ensemble_mean_and_coeffnorm( vrfi, rng, @@ -588,11 +585,11 @@ function build_models!( else umat = uvecormat end - + g_ens_tmp = vcat(g_ens_tmp, umat) end - g_ens[(cv_idx-1)*(output_dim*n_test+2) + 1: cv_idx*(output_dim*n_test+2), :] = g_ens_tmp + g_ens[((cv_idx - 1) * (output_dim * n_test + 2) + 1):(cv_idx * (output_dim * n_test + 2)), :] = g_ens_tmp end @@ -605,7 +602,7 @@ function build_models!( if !isnothing(terminated) break # if the timestep was terminated due to timestepping condition end - + err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) end diff --git a/src/VectorRandomFeature_crossval.jl b/src/VectorRandomFeature_crossval.jl deleted file mode 100644 index 6455e259..00000000 --- a/src/VectorRandomFeature_crossval.jl +++ /dev/null @@ -1,696 +0,0 @@ -using ProgressBars - -export VectorRandomFeatureInterface -export get_rfms, - get_fitted_features, - get_batch_sizes, - get_n_features, - get_input_dim, - get_output_dim, - get_rng, - get_kernel_structure, - get_optimizer_options, - get_optimizer - -""" -$(DocStringExtensions.TYPEDEF) - -Structure holding the Vector Random Feature models. - -# Fields -$(DocStringExtensions.TYPEDFIELDS) - -""" -struct VectorRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} <: - RandomFeatureInterface - "A vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization" - rfms::Vector{RF.Methods.RandomFeatureMethod} - "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data" - fitted_features::Vector{RF.Methods.Fit} - "batch sizes" - batch_sizes::Union{Dict{S, Int}, Nothing} - "number of features" - n_features::Union{Int, Nothing} - "input dimension" - input_dim::Int - "output_dimension" - output_dim::Int - "rng" - rng::RNG - "regularization" - regularization::Vector{Union{Matrix, UniformScaling, Diagonal}} - "Kernel structure type (e.g. Separable or Nonseparable)" - kernel_structure::KST - "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)" - feature_decomposition::S - "dictionary of options for hyperparameter optimizer" - optimizer_options::Dict - "diagnostics from optimizer" - optimizer::Vector -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the rfms field -""" -get_rfms(vrfi::VectorRandomFeatureInterface) = vrfi.rfms - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the fitted_features field -""" -get_fitted_features(vrfi::VectorRandomFeatureInterface) = vrfi.fitted_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the batch_sizes field -""" -get_batch_sizes(vrfi::VectorRandomFeatureInterface) = vrfi.batch_sizes - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the n_features field -""" -get_n_features(vrfi::VectorRandomFeatureInterface) = vrfi.n_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the input_dim field -""" -get_input_dim(vrfi::VectorRandomFeatureInterface) = vrfi.input_dim - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the output_dim field -""" -get_output_dim(vrfi::VectorRandomFeatureInterface) = vrfi.output_dim - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the rng field -""" -EKP.get_rng(vrfi::VectorRandomFeatureInterface) = vrfi.rng - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the regularization field -""" -get_regularization(vrfi::VectorRandomFeatureInterface) = vrfi.regularization - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the kernel_structure field -""" -get_kernel_structure(vrfi::VectorRandomFeatureInterface) = vrfi.kernel_structure - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the feature_decomposition field -""" -get_feature_decomposition(vrfi::VectorRandomFeatureInterface) = vrfi.feature_decomposition - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the optimizer_options field -""" -get_optimizer_options(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer_options - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the optimizer field -""" -get_optimizer(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and multi-output emulators. - - `n_features` - the number of random features - - `input_dim` - the dimension of the input space - - `output_dim` - the dimension of the output space - - `kernel_structure` - - a prescribed form of kernel structure - - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there) - - `rng = Random.GLOBAL_RNG` - random number generator - - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available - - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `VectorRandomFeatureInterface` constructor): - - "prior": the prior for the hyperparameter optimization - - "prior_in_scale"/"prior_out_scale": use these to tune the input/output prior scale. - - "n_ensemble": number of ensemble members - - "n_iteration": number of eki iterations - - "scheduler": Learning rate Scheduler (a.k.a. EKP timestepper) Default: DataMisfitController - - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 0.0) - - "tikhonov": tikhonov regularization parameter if > 0 - - "inflation": additive inflation ∈ [0,1] with 0 being no inflation - - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split - - "n_features_opt": fix the number of features for optimization (default `n_features`, as used for prediction) - - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra - - "accelerator": use EKP accelerators (default is no acceleration) - - "verbose" => false, verbose optimizer statements to check convergence, priors and optimal parameters. - - "cov_correction" => "shrinkage", type of conditioning to improve estimated covariance (Ledoit Wolfe 03), also "nice" for (Vishny, Morzfeld et al. 2024) - - -""" -function VectorRandomFeatureInterface( - n_features::Int, - input_dim::Int, - output_dim::Int; - kernel_structure::Union{KST, Nothing} = nothing, - batch_sizes::Union{Dict{S, Int}, Nothing} = nothing, - rng::RNG = Random.GLOBAL_RNG, - feature_decomposition::S = "cholesky", - optimizer_options::Union{Dict{S}, Nothing} = nothing, -) where {S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} - - # Initialize vector for RF model - rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0) - fitted_features = Vector{RF.Methods.Fit}(undef, 0) - regularization = Vector{Union{Matrix, UniformScaling, Nothing}}(undef, 0) - - if isnothing(kernel_structure) - kernel_structure = SeparableKernel( - cov_structure_from_string("lowrank", input_dim), - cov_structure_from_string("lowrank", output_dim), - ) - end - KSType = typeof(kernel_structure) - prior = build_default_prior(input_dim, output_dim, kernel_structure) - - #Optimization Defaults - optimizer_opts = Dict( - "prior" => prior, #the hyperparameter_prior (note scalings have already been applied) - "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble - "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(terminate_at=1000), # Adaptive timestepping - "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme - "tikhonov" => 0, # tikhonov regularization parameter if >0 - "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation - "train_fraction" => 0.8, # 80:20 train - test split - "n_features_opt" => n_features, # number of features for the optimization - "multithread" => "ensemble", # instead of "tullio" - "verbose" => false, # verbose optimizer statements - "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles - "accelerator" => EKP.NesterovAccelerator(), # acceleration with momentum - "cov_correction" => "shrinkage", # type of conditioning to improve estimated covariance - "n_cross_val_sets" => 1, - ) - - if !isnothing(optimizer_options) - for key in keys(optimizer_options) - optimizer_opts[key] = optimizer_options[key] - end - end - - opt_tmp = Dict() - for key in keys(optimizer_opts) - if key != "prior" - opt_tmp[key] = optimizer_opts[key] - end - end - @info("hyperparameter optimization with EKI configured with $opt_tmp") - - return VectorRandomFeatureInterface{S, RNG, KSType}( - rfms, - fitted_features, - batch_sizes, - n_features, - input_dim, - output_dim, - rng, - regularization, - kernel_structure, - feature_decomposition, - optimizer_opts, - [], - ) -end - - - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - M = zeros(input_dim, output_dim) # n x p mean - - U, V = hyperparameters_from_flat(x, input_dim, output_dim, kernel_structure) - - if !isposdef(U) - println("U not posdef - correcting") - U = posdef_correct(U) - end - if !isposdef(V) - println("V not posdef - correcting") - V = posdef_correct(V) - end - - dist = MatrixNormal(M, U, V) - pd = ParameterDistribution( - Dict( - "distribution" => Parameterized(dist), - "constraint" => repeat([no_constraint()], input_dim * output_dim), - "name" => "xi", - ), - ) - return pd -end - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::NK, -) where {VV <: AbstractVector, NK <: NonseparableKernel} - - C = hyperparameters_from_flat(x, input_dim, output_dim, kernel_structure) - if !isposdef(C) - println("C not posdef - correcting") - C = posdef_correct(C) - end - dist = MvNormal(zeros(input_dim * output_dim), C) - pd = ParameterDistribution( - Dict( - "distribution" => Parameterized(dist), - "constraint" => repeat([no_constraint()], input_dim * output_dim), - "name" => "xi", - ), - ) - return pd -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Builds the random feature method from hyperparameters. We use cosine activation functions and a Matrixvariate Normal distribution with mean M=0, and input(output) covariance U(V) built with a `CovarianceStructureType`. -""" -function RFM_from_hyperparameters( - vrfi::VectorRandomFeatureInterface, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - input_dim::Int, - output_dim::Int, - multithread_type::MT; -) where { - S <: AbstractString, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, - MT <: MultithreadType, -} - - xi_hp = l[1:(end - 1)] - sigma_hp = l[end] - - kernel_structure = get_kernel_structure(vrfi) - pd = hyperparameter_distribution_from_flat(xi_hp, input_dim, output_dim, kernel_structure) - - feature_sampler = RF.Samplers.FeatureSampler(pd, output_dim, rng = rng) - - feature_parameters = Dict("sigma" => sigma_hp) - vff = RF.Features.VectorFourierFeature( - n_features, - output_dim, - feature_sampler, - feature_parameters = feature_parameters, - ) - - thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio - if isnothing(batch_sizes) - return RF.Methods.RandomFeatureMethod(vff, regularization = regularization, tullio_threading = thread_opt) - else - return RF.Methods.RandomFeatureMethod( - vff, - regularization = regularization, - batch_sizes = batch_sizes, - tullio_threading = thread_opt, - ) - end -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Build Vector Random Feature model for the input-output pairs subject to regularization, and optimizes the hyperparameters with EKP. -""" -function build_models!( - vrfi::VectorRandomFeatureInterface, - input_output_pairs::PairedDataContainer{FT}; - regularization_matrix::Union{M, Nothing} = nothing, -) where {FT <: AbstractFloat, M <: AbstractMatrix} - - # get inputs and outputs - input_values = get_inputs(input_output_pairs) - output_values = get_outputs(input_output_pairs) - n_rfms, n_data = size(output_values) - - input_dim = size(input_values, 1) - output_dim = size(output_values, 1) - - kernel_structure = get_kernel_structure(vrfi) - - n_hp = calculate_n_hyperparameters(input_dim, output_dim, kernel_structure) - - rfms = get_rfms(vrfi) - if length(rfms) > 0 - @warn "VectorRandomFeatureInterface already built. skipping..." - return - end - - fitted_features = get_fitted_features(vrfi) - n_features = get_n_features(vrfi) - batch_sizes = get_batch_sizes(vrfi) - decomp_type = get_feature_decomposition(vrfi) - optimizer_options = get_optimizer_options(vrfi) - optimizer = get_optimizer(vrfi) - multithread = optimizer_options["multithread"] - if multithread == "ensemble" - multithread_type = EnsembleThreading() - elseif multithread == "tullio" - multithread_type = TullioThreading() - else - throw( - ArgumentError( - "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)", - ), - ) - end - prior = optimizer_options["prior"] - rng = get_rng(vrfi) - - # where prior space has changed we need to rebuild the priors [TODO just build them here in the first place?] - if ndims(prior) > n_hp - - # comes from having a truncated output_dimension - # TODO not really a truncation here, resetting to default - @info "Original input-output space of dimension ($(get_input_dim(vrfi)), $(get_output_dim(vrfi))) has been truncated to ($(input_dim), $(output_dim)). \n Rebuilding prior... number of hyperparameters reduced from $(ndims(prior)) to $(n_hp)." - prior = build_default_prior(input_dim, output_dim, kernel_structure) - end - - # Optimize feature cholesky factors with EKP - # [1.] Split data into test/train (e.g. 80/20) - train_fraction = optimizer_options["train_fraction"] - n_train = Int(floor(train_fraction * n_data)) # 20% split - n_test = n_data - n_train - n_features_opt = optimizer_options["n_features_opt"] - - - @info ( - "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features" - ) - - - # regularization_matrix = nothing when we use scaled SVD to decorrelate the space, - # in this setting, noise = I - if regularization_matrix === nothing - regularization = I - - else - # think of the regularization_matrix as the observational noise covariance, or a related quantity - if !isposdef(regularization_matrix) - regularization = posdef_correct(regularization_matrix) - println("RF regularization matrix is not positive definite, correcting") - - else - regularization = regularization_matrix - end - - end - - idx_shuffle = randperm(rng, n_data) - n_cross_val_sets = optimizer_options["n_cross_val_sets"] - if n_test*n_cross_val_sets > n_data - throw(ArgumentError("train/test split produces cross validation test sets of size $(n_test), out of $(n_data). \"n_cross_val_sets\" optimizer_options keyword < $(Int(floor(n_data/n_test))). Received $n_cross_val_sets")) - end - - train_idx = [] - test_idx = [] - for i = 1:n_cross_val_sets - tmp = idx_shuffle[(i-1)*n_test+1:i*n_test] - push!(test_idx, tmp) - push!(train_idx, setdiff(collect(1:n_data), tmp)) - end - - # [2.] Estimate covariance at mean value - μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) - cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] - cov_correction = optimizer_options["cov_correction"] - if nameof(typeof(kernel_structure)) == :SeparableKernel - if nameof(typeof(get_output_cov_structure(kernel_structure))) == :DiagonalFactor - n_cov_samples_min = n_test + 2 # diagonal case - else - n_cov_samples_min = (n_test * output_dim + 2) - end - else - n_cov_samples_min = (n_test * output_dim + 2) - end - n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) - observation_vec = [] - tikhonov_opt_val = optimizer_options["tikhonov"] - for cv_idx = 1:n_cross_val_sets - internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( - vrfi, - rng, - μ_hp, # take mean values - regularization, - n_features_opt, - train_idx[cv_idx], - test_idx[cv_idx], - batch_sizes, - input_output_pairs, - n_cov_samples, - decomp_type, - multithread_type, - cov_correction = cov_correction, - ) - - if tikhonov_opt_val == 0 - # Build the covariance - Γ = internal_Γ - Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += - isa(regularization, UniformScaling) ? regularization : kron(I(n_test), regularization) # + approx_σ2 - Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I - data = vcat(reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), 0.0, 0.0) #flatten data - - elseif tikhonov_opt_val > 0 - # augment the state to add tikhonov - outsize = size(internal_Γ, 1) - Γ = zeros(outsize + n_hp, outsize + n_hp) - Γ[1:outsize, 1:outsize] = internal_Γ - Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += kron(I(n_test), regularization) # block diag regularization - Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I - - Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior) - - data = vcat( - reshape(get_outputs(input_output_pairs)[:, test_idx[cv_idx]], :, 1), - 0.0, - 0.0, - zeros(size(Γ, 1) - outsize, 1), - ) #flatten data with additional zeros - else - throw( - ArgumentError( - "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", - ), - ) - end - if !isposdef(Γ) - Γ = posdef_correct(Γ) - end - push!(observation_vec, EKP.Observation( - Dict( - "names" => "$(cv_idx)", - "samples" => data[:], - "covariances" => Γ, - ), - ), - ) - - end - observation = combine_observations(observation_vec) - - # [3.] set up EKP optimization - n_ensemble = optimizer_options["n_ensemble"] # minimal ensemble size n_hp, - n_iteration = optimizer_options["n_iteration"] - opt_verbose_flag = optimizer_options["verbose"] - scheduler = optimizer_options["scheduler"] - localization = optimizer_options["localization"] - accelerator = optimizer_options["accelerator"] - - - initial_params = construct_initial_ensemble(rng, prior, n_ensemble) - - ekiobj = EKP.EnsembleKalmanProcess( - initial_params, - observation, - Inversion(), - scheduler = scheduler, - rng = rng, - verbose = opt_verbose_flag, - localization_method = localization, - accelerator = accelerator, - ) - err = zeros(n_iteration) - - # [4.] optimize with EKP - for i in 1:n_iteration - #get parameters: - lvec = get_ϕ_final(prior, ekiobj) - - if tikhonov_opt_val > 0 - g_ens = zeros(n_cross_val_sets*(output_dim*n_test+input_dim+2),n_ensemble) - else - g_ens = zeros(n_cross_val_sets*(output_dim*n_test+2),n_ensemble) - end - for cv_idx = 1:n_cross_val_sets - g_ens_tmp, _ = calculate_ensemble_mean_and_coeffnorm( - vrfi, - rng, - lvec, - regularization, - n_features_opt, - train_idx[cv_idx], - test_idx[cv_idx], - batch_sizes, - input_output_pairs, - decomp_type, - multithread_type, - ) - if tikhonov_opt_val > 0 - # augment with the computational parameters (u not ϕ) - uvecormat = get_u_final(ekiobj) - if isa(uvecormat, AbstractVector) - umat = reshape(uvecormat, 1, :) - else - umat = uvecormat - end - - g_ens_tmp = vcat(g_ens_tmp, umat) - end - - g_ens[(cv_idx-1)*(output_dim*n_test+2) + 1: cv_idx*(output_dim*n_test+2), :] = g_ens_tmp - - end - - inflation = optimizer_options["inflation"] - if inflation > 0 - terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation - else - terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation - end - if !isnothing(terminated) - break # if the timestep was terminated due to timestepping condition - end - - err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) - - end - push!(optimizer, err) - - # [5.] extract optimal hyperparameters - hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] - - # Now, fit new RF model with the optimized hyperparameters - if opt_verbose_flag - names = get_name(prior) - hp_optimal_batch = [hp_optimal[b] for b in batch(prior)] - hp_optimal_range = - [(minimum(hp_optimal_batch[i]), maximum(hp_optimal_batch[i])) for i in 1:length(hp_optimal_batch)] #the min and max of the hparams - prior_conf_interval = [mean(prior) .- 3 * sqrt.(var(prior)), mean(prior) .+ 3 * sqrt.(var(prior))] - pci_constrained = [transform_unconstrained_to_constrained(prior, prior_conf_interval[i]) for i in 1:2] - pcic = [(pci_constrained[1][i], pci_constrained[2][i]) for i in 1:length(pci_constrained[1])] - pcic_batched = [pcic[b][1] for b in batch(prior)] - @info("EKI Optimization result:") - - println( - display( - [ - "name" "number of hyperparameters" "optimized value range" "99% prior mass" - names length.(hp_optimal_batch) hp_optimal_range pcic_batched - ], - ), - ) - end - - rfm_tmp = RFM_from_hyperparameters( - vrfi, - rng, - hp_optimal, - regularization, - n_features, - batch_sizes, - input_dim, - output_dim, - multithread_type, - ) - fitted_features_tmp = fit(rfm_tmp, input_output_pairs, decomposition_type = decomp_type) - - - push!(rfms, rfm_tmp) - push!(fitted_features, fitted_features_tmp) - push!(get_regularization(vrfi), regularization) - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Empty method, as optimization takes place within the build_models stage -""" -function optimize_hyperparameters!(vrfi::VectorRandomFeatureInterface, args...; kwargs...) - @info("Random Features already trained. continuing...") -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions. -""" -function predict(vrfi::VectorRandomFeatureInterface, new_inputs::M) where {M <: AbstractMatrix} - input_dim = get_input_dim(vrfi) - output_dim = get_output_dim(vrfi) - rfm = get_rfms(vrfi)[1] - ff = get_fitted_features(vrfi)[1] - optimizer_options = get_optimizer_options(vrfi) - multithread = optimizer_options["multithread"] - if multithread == "ensemble" - tullio_threading = false - elseif multithread == "tullio" - tullio_threading = true - end - - N_samples = size(new_inputs, 2) - # Predicts columns of inputs: input_dim × N_samples - μ, σ2 = RF.Methods.predict(rfm, ff, DataContainer(new_inputs), tullio_threading = "tullio") - # μ, σ2 = RF.Methods.predict(rfm, ff, DataContainer(new_inputs), tullio_threading = tullio_threading) - # sizes (output_dim x n_test), (output_dim x output_dim x n_test) - # add the noise contribution from the regularization - # note this is because we are predicting the data here, not the latent function. - lambda = get_regularization(vrfi)[1] - for i in 1:N_samples - σ2[:, :, i] = 0.5 * (σ2[:, :, i] + permutedims(σ2[:, :, i], (2, 1))) + lambda - - if !isposdef(σ2[:, :, i]) - σ2[:, :, i] = posdef_correct(σ2[:, :, i]) - end - end - return μ, σ2 -end diff --git a/src/VectorRandomFeature_nodatasplit.jl b/src/VectorRandomFeature_nodatasplit.jl deleted file mode 100644 index eca77b9a..00000000 --- a/src/VectorRandomFeature_nodatasplit.jl +++ /dev/null @@ -1,671 +0,0 @@ -using ProgressBars - -export VectorRandomFeatureInterface -export get_rfms, - get_fitted_features, - get_batch_sizes, - get_n_features, - get_input_dim, - get_output_dim, - get_rng, - get_kernel_structure, - get_optimizer_options, - get_optimizer - -""" -$(DocStringExtensions.TYPEDEF) - -Structure holding the Vector Random Feature models. - -# Fields -$(DocStringExtensions.TYPEDFIELDS) - -""" -struct VectorRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} <: - RandomFeatureInterface - "A vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization" - rfms::Vector{RF.Methods.RandomFeatureMethod} - "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data" - fitted_features::Vector{RF.Methods.Fit} - "batch sizes" - batch_sizes::Union{Dict{S, Int}, Nothing} - "number of features" - n_features::Union{Int, Nothing} - "input dimension" - input_dim::Int - "output_dimension" - output_dim::Int - "rng" - rng::RNG - "regularization" - regularization::Vector{Union{Matrix, UniformScaling, Diagonal}} - "Kernel structure type (e.g. Separable or Nonseparable)" - kernel_structure::KST - "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)" - feature_decomposition::S - "dictionary of options for hyperparameter optimizer" - optimizer_options::Dict - "diagnostics from optimizer" - optimizer::Vector -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the rfms field -""" -get_rfms(vrfi::VectorRandomFeatureInterface) = vrfi.rfms - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the fitted_features field -""" -get_fitted_features(vrfi::VectorRandomFeatureInterface) = vrfi.fitted_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the batch_sizes field -""" -get_batch_sizes(vrfi::VectorRandomFeatureInterface) = vrfi.batch_sizes - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the n_features field -""" -get_n_features(vrfi::VectorRandomFeatureInterface) = vrfi.n_features - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the input_dim field -""" -get_input_dim(vrfi::VectorRandomFeatureInterface) = vrfi.input_dim - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the output_dim field -""" -get_output_dim(vrfi::VectorRandomFeatureInterface) = vrfi.output_dim - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the rng field -""" -EKP.get_rng(vrfi::VectorRandomFeatureInterface) = vrfi.rng - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the regularization field -""" -get_regularization(vrfi::VectorRandomFeatureInterface) = vrfi.regularization - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the kernel_structure field -""" -get_kernel_structure(vrfi::VectorRandomFeatureInterface) = vrfi.kernel_structure - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the feature_decomposition field -""" -get_feature_decomposition(vrfi::VectorRandomFeatureInterface) = vrfi.feature_decomposition - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Gets the optimizer_options field -""" -get_optimizer_options(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer_options - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -gets the optimizer field -""" -get_optimizer(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and multi-output emulators. - - `n_features` - the number of random features - - `input_dim` - the dimension of the input space - - `output_dim` - the dimension of the output space - - `kernel_structure` - - a prescribed form of kernel structure - - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there) - - `rng = Random.GLOBAL_RNG` - random number generator - - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available - - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `VectorRandomFeatureInterface` constructor): - - "prior": the prior for the hyperparameter optimization - - "prior_in_scale"/"prior_out_scale": use these to tune the input/output prior scale. - - "n_ensemble": number of ensemble members - - "n_iteration": number of eki iterations - - "scheduler": Learning rate Scheduler (a.k.a. EKP timestepper) Default: DataMisfitController - - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 0.0) - - "tikhonov": tikhonov regularization parameter if > 0 - - "inflation": additive inflation ∈ [0,1] with 0 being no inflation - - "train_fraction": e.g. 0.8 (default) means 80:20 train - test split - - "n_features_opt": fix the number of features for optimization (default `n_features`, as used for prediction) - - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra - - "accelerator": use EKP accelerators (default is no acceleration) - - "verbose" => false, verbose optimizer statements to check convergence, priors and optimal parameters. - - "cov_correction" => "shrinkage", type of conditioning to improve estimated covariance (Ledoit Wolfe 03), also "nice" for (Vishny, Morzfeld et al. 2024) - - -""" -function VectorRandomFeatureInterface( - n_features::Int, - input_dim::Int, - output_dim::Int; - kernel_structure::Union{KST, Nothing} = nothing, - batch_sizes::Union{Dict{S, Int}, Nothing} = nothing, - rng::RNG = Random.GLOBAL_RNG, - feature_decomposition::S = "cholesky", - optimizer_options::Union{Dict{S}, Nothing} = nothing, -) where {S <: AbstractString, RNG <: AbstractRNG, KST <: KernelStructureType} - - # Initialize vector for RF model - rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0) - fitted_features = Vector{RF.Methods.Fit}(undef, 0) - regularization = Vector{Union{Matrix, UniformScaling, Nothing}}(undef, 0) - - if isnothing(kernel_structure) - kernel_structure = SeparableKernel( - cov_structure_from_string("lowrank", input_dim), - cov_structure_from_string("lowrank", output_dim), - ) - end - KSType = typeof(kernel_structure) - prior = build_default_prior(input_dim, output_dim, kernel_structure) - - #Optimization Defaults - optimizer_opts = Dict( - "prior" => prior, #the hyperparameter_prior (note scalings have already been applied) - "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble - "n_iteration" => 5, # number of eki iterations - "scheduler" => EKP.DataMisfitController(terminate_at=100), # Adaptive timestepping - "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme - "tikhonov" => 0, # tikhonov regularization parameter if >0 - "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation - "train_fraction" => 0.8, # 80:20 train - test split - "n_features_opt" => n_features, # number of features for the optimization - "multithread" => "ensemble", # instead of "tullio" - "verbose" => false, # verbose optimizer statements - "localization" => EKP.Localizers.NoLocalization(), # localization / sample error correction for small ensembles - "accelerator" => EKP.NesterovAccelerator(), # acceleration with momentum - "cov_correction" => "shrinkage", # type of conditioning to improve estimated covariance - ) - - if !isnothing(optimizer_options) - for key in keys(optimizer_options) - optimizer_opts[key] = optimizer_options[key] - end - end - - opt_tmp = Dict() - for key in keys(optimizer_opts) - if key != "prior" - opt_tmp[key] = optimizer_opts[key] - end - end - @info("hyperparameter optimization with EKI configured with $opt_tmp") - - return VectorRandomFeatureInterface{S, RNG, KSType}( - rfms, - fitted_features, - batch_sizes, - n_features, - input_dim, - output_dim, - rng, - regularization, - kernel_structure, - feature_decomposition, - optimizer_opts, - [], - ) -end - - - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::SK, -) where {VV <: AbstractVector, SK <: SeparableKernel} - M = zeros(input_dim, output_dim) # n x p mean - - U, V = hyperparameters_from_flat(x, input_dim, output_dim, kernel_structure) - - if !isposdef(U) - println("U not posdef - correcting") - U = posdef_correct(U) - end - if !isposdef(V) - println("V not posdef - correcting") - V = posdef_correct(V) - end - - dist = MatrixNormal(M, U, V) - pd = ParameterDistribution( - Dict( - "distribution" => Parameterized(dist), - "constraint" => repeat([no_constraint()], input_dim * output_dim), - "name" => "xi", - ), - ) - return pd -end - -function hyperparameter_distribution_from_flat( - x::VV, - input_dim::Int, - output_dim::Int, - kernel_structure::NK, -) where {VV <: AbstractVector, NK <: NonseparableKernel} - - C = hyperparameters_from_flat(x, input_dim, output_dim, kernel_structure) - if !isposdef(C) - println("C not posdef - correcting") - C = posdef_correct(C) - end - dist = MvNormal(zeros(input_dim * output_dim), C) - pd = ParameterDistribution( - Dict( - "distribution" => Parameterized(dist), - "constraint" => repeat([no_constraint()], input_dim * output_dim), - "name" => "xi", - ), - ) - return pd -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Builds the random feature method from hyperparameters. We use cosine activation functions and a Matrixvariate Normal distribution with mean M=0, and input(output) covariance U(V) built with a `CovarianceStructureType`. -""" -function RFM_from_hyperparameters( - vrfi::VectorRandomFeatureInterface, - rng::RNG, - l::ForVM, - regularization::MorUSorD, - n_features::Int, - batch_sizes::Union{Dict{S, Int}, Nothing}, - input_dim::Int, - output_dim::Int, - multithread_type::MT; -) where { - S <: AbstractString, - RNG <: AbstractRNG, - ForVM <: Union{AbstractFloat, AbstractVecOrMat}, - MorUSorD <: Union{Matrix, UniformScaling, Diagonal}, - MT <: MultithreadType, -} - - xi_hp = l[1:(end - 1)] - sigma_hp = l[end] - - kernel_structure = get_kernel_structure(vrfi) - pd = hyperparameter_distribution_from_flat(xi_hp, input_dim, output_dim, kernel_structure) - - feature_sampler = RF.Samplers.FeatureSampler(pd, output_dim, rng = rng) - - feature_parameters = Dict("sigma" => sigma_hp) - vff = RF.Features.VectorFourierFeature( - n_features, - output_dim, - feature_sampler, - feature_parameters = feature_parameters, - ) - - thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio - if isnothing(batch_sizes) - return RF.Methods.RandomFeatureMethod(vff, regularization = regularization, tullio_threading = thread_opt) - else - return RF.Methods.RandomFeatureMethod( - vff, - regularization = regularization, - batch_sizes = batch_sizes, - tullio_threading = thread_opt, - ) - end -end - - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Build Vector Random Feature model for the input-output pairs subject to regularization, and optimizes the hyperparameters with EKP. -""" -function build_models!( - vrfi::VectorRandomFeatureInterface, - input_output_pairs::PairedDataContainer{FT}; - regularization_matrix::Union{M, Nothing} = nothing, -) where {FT <: AbstractFloat, M <: AbstractMatrix} - - # get inputs and outputs - input_values = get_inputs(input_output_pairs) - output_values = get_outputs(input_output_pairs) - n_rfms, n_data = size(output_values) - - input_dim = size(input_values, 1) - output_dim = size(output_values, 1) - - kernel_structure = get_kernel_structure(vrfi) - - n_hp = calculate_n_hyperparameters(input_dim, output_dim, kernel_structure) - - rfms = get_rfms(vrfi) - if length(rfms) > 0 - @warn "VectorRandomFeatureInterface already built. skipping..." - return - end - - fitted_features = get_fitted_features(vrfi) - n_features = get_n_features(vrfi) - batch_sizes = get_batch_sizes(vrfi) - decomp_type = get_feature_decomposition(vrfi) - optimizer_options = get_optimizer_options(vrfi) - optimizer = get_optimizer(vrfi) - multithread = optimizer_options["multithread"] - if multithread == "ensemble" - multithread_type = EnsembleThreading() - elseif multithread == "tullio" - multithread_type = TullioThreading() - else - throw( - ArgumentError( - "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)", - ), - ) - end - prior = optimizer_options["prior"] - rng = get_rng(vrfi) - - # where prior space has changed we need to rebuild the priors [TODO just build them here in the first place?] - if ndims(prior) > n_hp - - # comes from having a truncated output_dimension - # TODO not really a truncation here, resetting to default - @info "Original input-output space of dimension ($(get_input_dim(vrfi)), $(get_output_dim(vrfi))) has been truncated to ($(input_dim), $(output_dim)). \n Rebuilding prior... number of hyperparameters reduced from $(ndims(prior)) to $(n_hp)." - prior = build_default_prior(input_dim, output_dim, kernel_structure) - end - - # Optimize feature cholesky factors with EKP - # [1.] Split data into test/train (e.g. 80/20) - train_fraction = optimizer_options["train_fraction"] - n_train = n_data - n_test = n_data -# n_train = Int(floor(train_fraction * n_data)) # 20% split -# n_test = n_data - n_train - n_features_opt = optimizer_options["n_features_opt"] - - @info ( - "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features" - ) - - - # regularization_matrix = nothing when we use scaled SVD to decorrelate the space, - # in this setting, noise = I - if regularization_matrix === nothing - regularization = I - - else - # think of the regularization_matrix as the observational noise covariance, or a related quantity - if !isposdef(regularization_matrix) - regularization = posdef_correct(regularization_matrix) - println("RF regularization matrix is not positive definite, correcting") - - else - regularization = regularization_matrix - end - - end - - #idx_shuffle = randperm(rng, n_data) - io_pairs_opt = PairedDataContainer(input_values,output_values) - - #io_pairs_opt = PairedDataContainer( - # input_values[:, idx_shuffle], - # reshape(output_values[:, idx_shuffle], :, size(output_values, 2)), - #) - - # [2.] Estimate covariance at mean value - μ_hp = transform_unconstrained_to_constrained(prior, mean(prior)) - cov_sample_multiplier = optimizer_options["cov_sample_multiplier"] - cov_correction = optimizer_options["cov_correction"] - - if nameof(typeof(kernel_structure)) == :SeparableKernel - if nameof(typeof(get_output_cov_structure(kernel_structure))) == :DiagonalFactor - n_cov_samples_min = n_test + 2 # diagonal case - else - n_cov_samples_min = (n_test * output_dim + 2) - end - else - n_cov_samples_min = (n_test * output_dim + 2) - end - n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 0.0))) - - internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance( - vrfi, - rng, - μ_hp, # take mean values - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - n_cov_samples, - decomp_type, - multithread_type, - cov_correction = cov_correction, - ) - - tikhonov_opt_val = optimizer_options["tikhonov"] - if tikhonov_opt_val == 0 - # Build the covariance - Γ = internal_Γ - Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += - isa(regularization, UniformScaling) ? regularization : kron(I(n_test), regularization) # + approx_σ2 - Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I -# data = vcat(reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), 0.0, 0.0) #flatten data - data = vcat(get_outputs(io_pairs_opt)[:], 0.0, 0.0) #flatten data - - elseif tikhonov_opt_val > 0 - # augment the state to add tikhonov - outsize = size(internal_Γ, 1) - Γ = zeros(outsize + n_hp, outsize + n_hp) - Γ[1:outsize, 1:outsize] = internal_Γ - Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += kron(I(n_test), regularization) # block diag regularization - Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I - - Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior) - - data = vcat( -# reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), - get_outputs(io_pairs_opt)[:], - 0.0, - 0.0, - zeros(size(Γ, 1) - outsize, 1), - ) #flatten data with additional zeros - else - throw( - ArgumentError( - "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val", - ), - ) - end - - # [3.] set up EKP optimization - n_ensemble = optimizer_options["n_ensemble"] # minimal ensemble size n_hp, - n_iteration = optimizer_options["n_iteration"] - opt_verbose_flag = optimizer_options["verbose"] - scheduler = optimizer_options["scheduler"] - localization = optimizer_options["localization"] - accelerator = optimizer_options["accelerator"] - - if !isposdef(Γ) - Γ = posdef_correct(Γ) - end - - initial_params = construct_initial_ensemble(rng, prior, n_ensemble) - - ekiobj = EKP.EnsembleKalmanProcess( - initial_params, - data[:], - Γ, - Inversion(), - scheduler = scheduler, - rng = rng, - verbose = opt_verbose_flag, - localization_method = localization, - accelerator = accelerator, - ) - err = zeros(n_iteration) - - # [4.] optimize with EKP - for i in 1:n_iteration - #get parameters: - lvec = get_ϕ_final(prior, ekiobj) - - g_ens, _ = calculate_ensemble_mean_and_coeffnorm( - vrfi, - rng, - lvec, - regularization, - n_features_opt, - n_train, - n_test, - batch_sizes, - io_pairs_opt, - decomp_type, - multithread_type, - ) - if tikhonov_opt_val > 0 - # augment with the computational parameters (u not ϕ) - uvecormat = get_u_final(ekiobj) - if isa(uvecormat, AbstractVector) - umat = reshape(uvecormat, 1, :) - else - umat = uvecormat - end - - g_ens = vcat(g_ens, umat) - end - inflation = optimizer_options["inflation"] - if inflation > 0 - terminated = EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, s = inflation) # small regularizing inflation - else - terminated = EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation - end - if !isnothing(terminated) - break # if the timestep was terminated due to timestepping condition - end - - err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2) - - end - push!(optimizer, err) - - # [5.] extract optimal hyperparameters - hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:] - - # Now, fit new RF model with the optimized hyperparameters - if opt_verbose_flag - names = get_name(prior) - hp_optimal_batch = [hp_optimal[b] for b in batch(prior)] - hp_optimal_range = - [(minimum(hp_optimal_batch[i]), maximum(hp_optimal_batch[i])) for i in 1:length(hp_optimal_batch)] #the min and max of the hparams - prior_conf_interval = [mean(prior) .- 3 * sqrt.(var(prior)), mean(prior) .+ 3 * sqrt.(var(prior))] - pci_constrained = [transform_unconstrained_to_constrained(prior, prior_conf_interval[i]) for i in 1:2] - pcic = [(pci_constrained[1][i], pci_constrained[2][i]) for i in 1:length(pci_constrained[1])] - pcic_batched = [pcic[b][1] for b in batch(prior)] - @info("EKI Optimization result:") - - println( - display( - [ - "name" "number of hyperparameters" "optimized value range" "99% prior mass" - names length.(hp_optimal_batch) hp_optimal_range pcic_batched - ], - ), - ) - end - - rfm_tmp = RFM_from_hyperparameters( - vrfi, - rng, - hp_optimal, - regularization, - n_features, - batch_sizes, - input_dim, - output_dim, - multithread_type, - ) - fitted_features_tmp = fit(rfm_tmp, input_output_pairs, decomposition_type = decomp_type) - - - push!(rfms, rfm_tmp) - push!(fitted_features, fitted_features_tmp) - push!(get_regularization(vrfi), regularization) - -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Empty method, as optimization takes place within the build_models stage -""" -function optimize_hyperparameters!(vrfi::VectorRandomFeatureInterface, args...; kwargs...) - @info("Random Features already trained. continuing...") -end - -""" -$(DocStringExtensions.TYPEDSIGNATURES) - -Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions. -""" -function predict(vrfi::VectorRandomFeatureInterface, new_inputs::M) where {M <: AbstractMatrix} - input_dim = get_input_dim(vrfi) - output_dim = get_output_dim(vrfi) - rfm = get_rfms(vrfi)[1] - ff = get_fitted_features(vrfi)[1] - optimizer_options = get_optimizer_options(vrfi) - multithread = optimizer_options["multithread"] - if multithread == "ensemble" - tullio_threading = false - elseif multithread == "tullio" - tullio_threading = true - end - - N_samples = size(new_inputs, 2) - # Predicts columns of inputs: input_dim × N_samples - μ, σ2 = RF.Methods.predict(rfm, ff, DataContainer(new_inputs), tullio_threading = "tullio") - # μ, σ2 = RF.Methods.predict(rfm, ff, DataContainer(new_inputs), tullio_threading = tullio_threading) - # sizes (output_dim x n_test), (output_dim x output_dim x n_test) - # add the noise contribution from the regularization - # note this is because we are predicting the data here, not the latent function. - lambda = get_regularization(vrfi)[1] - for i in 1:N_samples - σ2[:, :, i] = 0.5 * (σ2[:, :, i] + permutedims(σ2[:, :, i], (2, 1))) + lambda - - if !isposdef(σ2[:, :, i]) - σ2[:, :, i] = posdef_correct(σ2[:, :, i]) - end - end - return μ, σ2 -end