Skip to content

Commit

Permalink
Add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Melanie Bieli committed Jun 18, 2020
1 parent bb93635 commit ba9b037
Show file tree
Hide file tree
Showing 14 changed files with 1,152 additions and 78 deletions.
859 changes: 829 additions & 30 deletions Manifest.toml

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,17 @@ authors = ["Charles Kawczynski <[email protected]>"]
version = "0.1.0"

[deps]
Cloudy = "9e3b23bb-e7cc-4b94-886c-65de2234ba87"
DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
GaussianProcesses = "891a1506-143c-57d2-908e-e1f8e92e6de9"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6 changes: 3 additions & 3 deletions src/CalibrateEmulateSample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ include("EKS.jl")

# No internal deps, heavy external deps
include("EKI.jl")
# include("GModel.jl")
# include("GPEmulator.jl")
include("GModel.jl")
include("GPEmulator.jl")

# Internal deps, light external deps
include("Utilities.jl")

# Internal deps, light external deps
# include("MCMC.jl")
include("MCMC.jl")
# include("Histograms.jl")
# include("GPR.jl")

Expand Down
21 changes: 15 additions & 6 deletions src/GPEmulator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ models.
# Fields
$(DocStringExtensions.FIELDS)
"""
struct GPObj{FT<:AbstractFloat, GPM}
"training inputs/parameters; N_samples x N_parameters"
inputs::Array{FT, 2}
inputs::Array{FT}
"training data/targets; N_samples x N_data"
data::Array{FT, 2}
data::Array{FT}
"mean of input; 1 x N_parameters"
input_mean::Array{FT, 2}
input_mean::Array{FT}
"square root of the inverse of the input covariance matrix; N_parameters x N_parameters"
sqrt_inv_input_cov::Array{FT, 2}
sqrt_inv_input_cov::Union{Nothing, Array{FT, 2}}
"the Gaussian Process (GP) Regression model(s) that are fitted to the given input-data pairs"
models::Vector
"whether to fit GP models on normalized inputs ((inputs - input_mean) * sqrt_inv_input_cov)"
Expand Down Expand Up @@ -86,8 +87,12 @@ function GPObj(inputs, data, package::GPJL; GPkernel::Union{K, KPy, Nothing}=not
data = convert(Array{FT}, data)

input_mean = reshape(mean(inputs, dims=1), 1, :)
sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1))))
sqrt_inv_input_cov = nothing
if normalized
if ndims(inputs) == 1
error("Cov not defined for 1d input; can't normalize")
end
sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1))))
inputs = (inputs .- input_mean) * sqrt_inv_input_cov
end

Expand Down Expand Up @@ -144,8 +149,12 @@ function GPObj(inputs, data, package::SKLJL; GPkernel::Union{K, KPy, Nothing}=no
data = convert(Array{FT}, data)

input_mean = reshape(mean(inputs, dims=1), 1, :)
sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1))))
sqrt_inv_input_cov = nothing
if normalized
if ndims(inputs) == 1
error("Cov not defined for 1d input; can't normalize")
end
sqrt_inv_input_cov = convert(Array{FT}, sqrt(inv(cov(inputs, dims=1))))
inputs = (inputs .- input_mean) * sqrt_inv_input_cov
end

Expand Down
14 changes: 1 addition & 13 deletions src/MCMC.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ function MCMCObj(obs_sample::Vector{FT},
obs_covinv = inv(obs_cov)
accept = [0]
if algtype != "rwm"
println("only random walk metropolis 'rwm' is implemented so far")
sys.exit()
error("only random walk metropolis 'rwm' is implemented so far")
end
MCMCObj{FT,IT}(obs_sample, obs_cov, obs_covinv, priors, [step], burnin,
param, posterior, log_posterior, iter, accept, algtype)
Expand Down Expand Up @@ -258,9 +257,6 @@ function sample_posterior!(mcmc::MCMCObj{FT,IT},
gpobj::GPObj{FT},
max_iter::IT) where {FT,IT<:Int}

println("iteration 0; current parameters ", mcmc.param')
flush(stdout)

for mcmcit in 1:max_iter
param = convert(Array{FT, 2}, mcmc.param')
# test predictions param' is 1xN_params
Expand All @@ -270,14 +266,6 @@ function sample_posterior!(mcmc::MCMCObj{FT,IT},

mcmc_sample!(mcmc, vec(gp_pred), vec(gp_predvar))

if mcmcit % 1000 == 0
acc_ratio = accept_ratio(mcmc)
# TODO: Add callbacks, remove print statements
println("iteration ", mcmcit ," of ", max_iter,
"; acceptance rate = ", acc_ratio,
", current parameters ", param)
flush(stdout)
end
end
end

Expand Down
31 changes: 20 additions & 11 deletions src/Observations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct Obs{FT<:AbstractFloat}
"covariance of the observational noise (assumed to be normally
distributed); N_data x N_data. If not supplied, cov is set to a
diagonal matrix whose non-zero elements are the sample variances"
cov::Array{FT, 2}
cov::Union{Nothing, Array{FT, 2}}
"sample mean"
mean::Vector{FT}
"names of the data"
Expand All @@ -31,29 +31,38 @@ end
function Obs(samples::Vector{Vector{FT}},
data_names::Vector{String}) where {FT<:AbstractFloat}
N_samples = length(samples)

# convert to N_samples x N_data to determine sample covariance
if N_samples == 1
# only one sample - this requires a bit more data massaging
temp1 = convert(Array, reshape(hcat(samples...)', N_samples, :))
temp = dropdims(temp1, dims=1)
samplemean = mean(temp, dims=2)
samplemean = vec(mean(temp, dims=2))
cov = nothing
else
temp = convert(Array, reshape(hcat(samples...)', N_samples, :))
samplemean = mean(temp, dims=1)
samplemean = vec(mean(temp, dims=1))
cov = Statistics.cov(temp)
end
cov = cov(temp, dims=1)
samplemean = mean(temp, dims=2)
Obs(samples, cov, samplemean, data_names)
end

function Obs(samples::Array{FT, 2},
function Obs(samples::Array{FT},
data_names::Vector{String}) where {FT<:AbstractFloat}
# samples is of size N_samples x N_data
N_samples = size(samples, 1)
cov = cov(samples, dims=1)
samplemean = vec(mean(samples, dims=1))
# convert matrix of samples to a vector of vectors
samples = [samples[i, :] for i in 1:N_samples]
if ndims(samples) == 1
# Only one sample, so there is no covariance to be computed and the
# sample mean equals the sample itself
cov = nothing
samplemean = vec(samples)
samples = vec([vec(samples)])
else
cov = Statistics.cov(samples)
# convert matrix of samples to a vector of vectors
N_samples = size(samples, 1)
samplemean = vec(mean(samples, dims=1))
samples = [samples[i, :] for i in 1:N_samples]
end
Obs(samples, cov, samplemean, data_names)
end

Expand Down
2 changes: 1 addition & 1 deletion src/Utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using ..Observations
using ..EKI

export extract_GP_tp
export extract_obs_data
export get_obs_sample
export orig2zscore
export zscore2orig
export RPAD, name, warn
Expand Down
5 changes: 2 additions & 3 deletions test/Cloudy/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ using LinearAlgebra
using StatsPlots
using GaussianProcesses
using Plots
using StatsPlots

# Import Calibrate-Emulate-Sample modules
using CalibrateEmulateSample.EKI
Expand Down Expand Up @@ -203,7 +204,7 @@ u0 = vec(mean(u_tp, dims=1))

# reset parameters
burnin = 1000
max_iter = 500000
max_iter = 100000

mcmc = MCMC.MCMCObj(yt_sample, truth.cov, priors,
new_step, u0, max_iter, mcmc_alg, burnin)
Expand All @@ -222,8 +223,6 @@ println(det(inv(post_cov)))
println(" ")

# Plot the posteriors together with the priors and the true parameter values
using StatsPlots;

true_values = [log(N0_true) log(θ_true) log(k_true)]
n_params = length(true_values)

Expand Down
64 changes: 64 additions & 0 deletions test/EKI/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using Distributions
using LinearAlgebra
using Random
using Test

using CalibrateEmulateSample.EKI

@testset "EKI" begin

# Seed for pseudo-random number generator
rng_seed = 41
Random.seed!(rng_seed)

### Define priors on the parameters u
umin = 0.0
umax = 20.0
priors = [Uniform(umin, umax), # prior on u1
Uniform(umin, umax)] # prior on u2

### Define forward model G
function G(u)
3.0 .* u
end

### Generate (artificial) truth samples
npar = 2 # number of parameters
ut = rand(Uniform(umin, umax), npar)
param_names = ["u1", "u2"]
yt = G(ut)
n_samples = 100
truth_samples = zeros(n_samples, length(yt))
noise_level = 0.9
Γy = noise_level^2 * convert(Array, Diagonal(yt))
μ = zeros(length(yt))
for i in 1:n_samples
truth_samples[i, :] = yt + noise_level^2 * rand(MvNormal(μ, Γy))
end

### Calibrate: Ensemble Kalman Inversion
N_ens = 50 # number of ensemble members
N_iter = 5 # number of EKI iterations
initial_params = EKI.construct_initial_ensemble(N_ens, priors;
rng_seed=rng_seed)
ekiobj = EKI.EKIObj(initial_params, param_names,
vec(mean(truth_samples, dims=1)), Γy)

@test size(initial_params) == (N_ens, npar)
@test size(ekiobj.u[end]) == (N_ens, npar)
@test ekiobj.unames == ["u1", "u2"]
@test ekiobj.g_t [37.29, 15.49] atol=1e-1
@test ekiobj.cov [30.29 0.0; 0.0 12.29] atol=1e-1
@test ekiobj.N_ens == N_ens

# EKI iterations
for i in 1:N_iter
params_i = ekiobj.u[end]
g_ens = G(params_i)
EKI.update_ensemble!(ekiobj, g_ens)
end

# EKI results: Test if ensemble has collapsed toward the true parameter values
eki_final_result = vec(mean(ekiobj.u[end], dims=1))
@test norm(ut - eki_final_result) < 0.5
end
46 changes: 46 additions & 0 deletions test/GPEmulator/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Import modules
using Random
using GaussianProcesses
using Test

using CalibrateEmulateSample.GPEmulator

@testset "GPEmulator" begin

# Seed for pseudo-random number generator
rng_seed = 41
Random.seed!(rng_seed)

# Training data
n = 10 # number of training points
x = 2.0 * π * rand(n) # predictors/features
y = sin.(x) + 0.05 * randn(n) # predictands/targets

gppackage = GPJL()
pred_type = YType()

# Construct kernel:
# Squared exponential kernel (note that hyperparameters are on log scale)
kern = SE(0.0, 0.0)
# log standard deviation of observational noise
logObsNoise = -1.0
white = Noise(logObsNoise)
GPkernel = kern + white

# Fit Gaussian Process Regression model
gpobj = GPObj(x, y, gppackage; GPkernel=GPkernel, normalized=false,
prediction_type=pred_type)
new_inputs = [0.0, π/2, π, 3*π/2, 2*π]
μ, σ² = GPEmulator.predict(gpobj, new_inputs)

@test_throws Exception GPObj(x, y, gppackage; GPkernel=GPkernel,
normalized=true, prediction_type=pred_type)
@test gpobj.inputs == x
@test gpobj.data == y
@test gpobj.input_mean[1] 3.524 atol=1e-2
@test gpobj.sqrt_inv_input_cov == nothing
@test gpobj.prediction_type == pred_type
@test μ [0.0, 1.0, 0.0, -1.0, 0.0] atol=1.0
@test σ² [0.859, 0.591, 0.686, 0.645, 0.647] atol=1e-2

end
Loading

0 comments on commit ba9b037

Please sign in to comment.