diff --git a/Project.toml b/Project.toml index a4c8527c..93366856 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ authors = ["jeremiedb "] name = "EvoTrees" uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -version = "0.12.0" +version = "0.12.1" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" diff --git a/experiments/benchmarks_v2-MLE.jl b/experiments/benchmarks_v2-MLE.jl new file mode 100644 index 00000000..647b8300 --- /dev/null +++ b/experiments/benchmarks_v2-MLE.jl @@ -0,0 +1,79 @@ +using Revise +using Statistics +using StatsBase: sample +using XGBoost +using EvoTrees +using BenchmarkTools +using CUDA + +nrounds = 200 +nobs = Int(1e6) +num_feat = Int(100) +nthread = Base.Threads.nthreads() + +# EvoTrees params +params_evo = EvoTreeMLE( + T=Float64, + loss=:gaussian, + nrounds=nrounds, + lambda=0.0, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=100.0, + rowsample=0.5, + colsample=0.5, + nbins=64, +) + +@info "testing with: $nobs observations | $num_feat features." +x_train = rand(nobs, num_feat) +y_train = rand(size(x_train, 1)) + +@info "evotrees train CPU:" +params_evo.device = "cpu" +@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=:gaussian, print_every_n=100); +@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=:gaussian); +@info "evotrees predict CPU:" +@time pred_evo = EvoTrees.predict(m_evo, x_train); +@btime EvoTrees.predict($m_evo, $x_train); + +CUDA.allowscalar(true) +@info "evotrees train GPU:" +params_evo.device = "gpu" +@time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); +@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=:gaussian, print_every_n=100); +@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=:gaussian); +@info "evotrees predict GPU:" +@time pred_evo = EvoTrees.predict(m_evo_gpu, x_train); +@btime EvoTrees.predict($m_evo_gpu, $x_train); + + +################################ +# Logistic +################################ +params_evo = EvoTreeMLE( + T=Float64, + loss=:logistic, + nrounds=nrounds, + lambda=0.0, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=100.0, + rowsample=0.5, + colsample=0.5, + nbins=64, +) + +@info "testing with: $nobs observations | $num_feat features." +x_train = rand(nobs, num_feat) +y_train = rand(size(x_train, 1)) + +@info "evotrees train CPU:" +params_evo.device = "cpu" +@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=:logistic, print_every_n=100); +@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=:logistic); +@info "evotrees predict CPU:" +@time pred_evo = EvoTrees.predict(m_evo, x_train); +@btime EvoTrees.predict($m_evo, $x_train); \ No newline at end of file diff --git a/experiments/benchmarks_v2.jl b/experiments/benchmarks_v2.jl index 42c0e683..14f66020 100644 --- a/experiments/benchmarks_v2.jl +++ b/experiments/benchmarks_v2.jl @@ -10,7 +10,7 @@ nrounds = 200 nthread = Base.Threads.nthreads() @info nthread -loss = "logistic" +loss = "linear" if loss == "linear" loss_xgb = "reg:squarederror" metric_xgb = "mae" @@ -40,7 +40,6 @@ metrics = [metric_xgb] params_evo = EvoTreeRegressor( T=Float32, loss=loss_evo, - metric=metric_evo, nrounds=nrounds, alpha=0.5, lambda=0.0, @@ -68,8 +67,9 @@ y_train = rand(size(x_train, 1)) @info "evotrees train CPU:" params_evo.device = "cpu" -@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=50); +@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100); @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo); +@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train); @info "evotrees predict CPU:" @time pred_evo = EvoTrees.predict(m_evo, x_train); @btime EvoTrees.predict($m_evo, $x_train); @@ -78,12 +78,8 @@ CUDA.allowscalar(true) @info "evotrees train GPU:" params_evo.device = "gpu" @time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); -@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=50); +@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100); @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo); @info "evotrees predict GPU:" @time pred_evo = EvoTrees.predict(m_evo_gpu, x_train); -@btime EvoTrees.predict($m_evo_gpu, $x_train); - -# w_train = ones(length(y_train)) -# @time m_evo_gpu = fit_evotree(params_evo, x_train, y_train); -# @time m_evo_gpu = fit_evotree(params_evo, x_train, y_train, w_train); \ No newline at end of file +@btime EvoTrees.predict($m_evo_gpu, $x_train); \ No newline at end of file diff --git a/experiments/logistic_tests.jl b/experiments/logistic_tests.jl new file mode 100644 index 00000000..d4f507de --- /dev/null +++ b/experiments/logistic_tests.jl @@ -0,0 +1,142 @@ +using Statistics +using StatsBase: sample, sample! +using EvoTrees +using BenchmarkTools +using CUDA + +# prepare a dataset +features = rand(Int(1.25e4), 100) +# features = rand(100, 10) +X = features +Y = rand(size(X, 1)) +𝑖 = collect(1:size(X, 1)) + +# train-eval split +𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) +train_size = 0.8 +𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] +𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] + +x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :] +y_train, y_eval = Y[𝑖_train], Y[𝑖_eval] + + +########################### +# Tree CPU +########################### +params_c = EvoTrees.EvoTreeMLE(T=Float32, + loss=:logistic, + nrounds=200, + lambda=0.1, gamma=0.0, eta=0.1, + max_depth=5, min_weight=1.0, + rowsample=0.5, colsample=0.5, nbins=16); + +model_c, cache_c = EvoTrees.init_evotree(params_c, x_train, y_train); +EvoTrees.grow_evotree!(model_c, cache_c) +p = model_c(x_train) +sort(p[:,1]) +sort(p[:,2]) + +# initialize from cache +params_c = model_c.params +x_size = size(cache_c.X_bin) + +# select random rows and cols +sample!(params_c.rng, cache_c.𝑖_, cache_c.nodes[1].𝑖, replace=false, ordered=true); +sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true); +# @btime sample!(params_c.rng, cache_c.𝑖_, cache_c.nodes[1].𝑖, replace=false, ordered=true); +# @btime sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true); + +𝑖 = cache_c.nodes[1].𝑖 +𝑗 = cache_c.𝑗 + +# build a new tree +# 897.800 ΞΌs (6 allocations: 736 bytes) +get_loss_type(m::EvoTreeGaussian{L,T,S}) where {L,T,S} = L +get_loss_type(m::EvoTrees.EvoTreeLogistic{L,T,S}) where {L,T,S} = L + +L = get_loss_type(params_c) +@time EvoTrees.update_grads!(L, cache_c.δ𝑀, cache_c.pred, cache_c.Y; alpha=params_c.alpha) +cache_c.δ𝑀 + +sort(cache_c.δ𝑀[1, :]) +sort(cache_c.δ𝑀[2, :]) +sort(cache_c.δ𝑀[3, :]) +sort(cache_c.δ𝑀[4, :]) + +p = collect(-3:0.5:3) +y = collect(-3:0.5:3) + +function get_grads(p, y) + grad = zeros(length(p), length(y)) + for i in eachindex(p) + for j in eachindex(y) + # alternate from 1 + # grad[i, j] = -(exp(-2s) * (u - y) * (u - y + exp(s) * sinh(exp(-s) * (u - y)))) / (1 + cosh(exp(-s) * (u - y))) + grad[i, j] = (exp(-2 * p[i]) * (0.0 - y[j]) * (0.0 - y[j] + exp(p[i]) * sinh(exp(-p[i]) * (0.0 - y[j])))) / (1 + cosh(exp(-p[i]) * (0.0 - y[j]))) + end + end + return grad +end + +grads = get_grads(p, y) +heatmap(grads) +# @btime EvoTrees.update_grads!($params_c.loss, $cache_c.δ𝑀, $cache_c.pred_cpu, $cache_c.Y_cpu, $params_c.Ξ±) +# βˆ‘ = vec(sum(cache_c.Ξ΄[𝑖,:], dims=1)) +# gain = EvoTrees.get_gain(params_c.loss, βˆ‘, params_c.Ξ») +# assign a root and grow tree +# train_nodes[1] = EvoTrees.TrainNode(UInt32(0), UInt32(1), βˆ‘, gain) + +# 62.530 ms (7229 allocations: 17.43 MiB) +# 1.25e5: 9.187 ms (7358 allocations: 2.46 MiB) +tree = EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.Ξ»))) +@time EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑀, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.X_bin, cache_c.K) +@btime EvoTrees.grow_tree!($EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.Ξ»))), $cache_c.nodes, $params_c, $cache_c.δ𝑀, $cache_c.edges, $cache_c.𝑗, $cache_c.left, $cache_c.left, $cache_c.right, $cache_c.X_bin, $cache_c.K) + +@time EvoTrees.grow_tree!(EvoTrees.Tree(params_c.max_depth, model_c.K, params_c.Ξ»), params_c, cache_c.Ξ΄, cache_c.hist, cache_c.histL, cache_c.histR, cache_c.gains, cache_c.edges, 𝑖, 𝑗, 𝑛, cache_c.X_bin); +@btime EvoTrees.grow_tree!(EvoTrees.Tree($params_c.max_depth, $model_c.K, $params_c.Ξ»), $params_c, $cache_c.Ξ΄, $cache_c.hist, $cache_c.histL, $cache_c.histR, $cache_c.gains, $cache_c.edges, $𝑖, $𝑗, $𝑛, $cache_c.X_bin); +@code_warntype EvoTrees.grow_tree!(EvoTrees.Tree(params_c.max_depth, model_c.K, params_c.Ξ»), params_c, cache_c.Ξ΄, cache_c.hist, cache_c.histL, cache_c.histR, cache_c.gains, cache_c.edges, 𝑖, 𝑗, 𝑛, cache_c.X_bin); + +# push!(model_c.trees, tree) +# 1.883 ms (83 allocations: 13.77 KiB) +@btime EvoTrees.predict!(model_c.params.loss, cache_c.pred_cpu, tree, cache_c.X, model_c.K) + +δ𝑀, K, edges, X_bin, nodes, out, left, right = cache_c.δ𝑀, cache_c.K, cache_c.edges, cache_c.X_bin, cache_c.nodes, cache_c.out, cache_c.left, cache_c.right; + +# 9.613 ms (81 allocations: 13.55 KiB) +# 1.25e5: 899.200 ΞΌs (81 allocations: 8.22 KiB) +@time EvoTrees.update_hist!(params_c.loss, nodes[1].h, δ𝑀, X_bin, 𝑖, 𝑗, K) +@btime EvoTrees.update_hist!($params_c.loss, $nodes[1].h, $δ𝑀, $X_bin, $𝑖, $𝑗, $K) +@btime EvoTrees.update_hist!($nodes[1].h, $δ𝑀, $X_bin, $nodes[1].𝑖, $𝑗) +@code_warntype EvoTrees.update_hist!(hist, Ξ΄, X_bin, 𝑖, 𝑗, 𝑛) + +j = 1 +# 8.399 ΞΌs (80 allocations: 13.42 KiB) +n = 1 +nodes[1].βˆ‘ .= vec(sum(δ𝑀[:, 𝑖], dims=2)) +EvoTrees.update_gains!(params_c.loss, nodes[n], 𝑗, params_c, K) +nodes[1].gains +# findmax(nodes[1].gains) #1.25e5: 36.500 ΞΌs (81 allocations: 8.22 KiB) +@btime EvoTrees.update_gains!($params_c.loss, $nodes[n], $𝑗, $params_c, $K) +@code_warntype EvoTrees.update_gains!(params_c.loss, nodes[n], 𝑗, params_c, K) + +#1.25e5: 14.100 ΞΌs (1 allocation: 32 bytes) +best = findmax(nodes[n].gains) +@btime best = findmax(nodes[n].gains) +@btime best = findmax(view(nodes[n].gains, :, 𝑗)) + +tree.cond_bin[n] = best[2][1] +tree.feat[n] = best[2][2] + +Int.(tree.cond_bin[n]) +# tree.cond_bin[n] = 32 + +# 204.900 ΞΌs (1 allocation: 96 bytes) +offset = 0 +@time EvoTrees.split_set!(left, right, 𝑖, X_bin, tree.feat[n], tree.cond_bin[n], offset) +@btime EvoTrees.split_set!($left, $right, $𝑖, $X_bin, $tree.feat[n], $tree.cond_bin[n], $offset) +@code_warntype EvoTrees.split_set!(left, right, 𝑖, X_bin, tree.feat[n], tree.cond_bin[n]) + +# 1.25e5: 227.200 ΞΌs (22 allocations: 1.44 KiB) +@time EvoTrees.split_set_threads!(out, left, right, 𝑖, X_bin, tree.feat[n], tree.cond_bin[n], offset) +@btime EvoTrees.split_set_threads!($out, $left, $right, $𝑖, $X_bin, $tree.feat[n], $tree.cond_bin[n], $offset, Int(2e15)) diff --git a/experiments/random.jl b/experiments/random.jl index 4aad5bfd..63e93c49 100644 --- a/experiments/random.jl +++ b/experiments/random.jl @@ -33,13 +33,13 @@ params1 = EvoTreeRegressor(T=Float32, # asus laptopt: for 1.25e6 no eval: 9.650007 seconds (893.53 k allocations: 2.391 GiB, 5.52% gc time) @time model = fit_evotree(params1; x_train, y_train); -@time model = fit_evotree(params1; x_train, y_train, metric=:mse, x_eval, y_eval, print_every_n=10); -@btime model = fit_evotree($params1; $x_train, $y_train); +@time model = fit_evotree(params1; x_train, y_train, metric=:mse, x_eval, y_eval, print_every_n=100); +@btime model = fit_evotree(params1; x_train, y_train); @time pred_train = predict(model, x_train); @btime pred_train = predict(model, x_train); -gain = importance(model, 1:100) +gain = importance(model) -@time model, cache = EvoTrees.init_evotree(params1, x_train, y_train); +@time model, cache = EvoTrees.init_evotree(params1; x_train, y_train); @time EvoTrees.grow_evotree!(model, cache); ############################# @@ -84,11 +84,17 @@ params1 = EvoTreeRegressor(T=Float32, device="gpu") # Asus laptop: 10.015568 seconds (13.80 M allocations: 1.844 GiB, 4.00% gc time) -@time model = EvoTrees.fit_evotree(params1, X_train, Y_train); -@btime model = EvoTrees.fit_evotree(params1, X_train, Y_train); -@time model, cache = EvoTrees.init_evotree_gpu(params1, X_train, Y_train); +@time model = EvoTrees.fit_evotree(params1; x_train, y_train); +@btime model = EvoTrees.fit_evotree(params1; x_train, y_train); +@time model, cache = EvoTrees.init_evotree_gpu(params1; x_train, y_train); @time EvoTrees.grow_evotree!(model, cache); +using MLJBase +mach1 = machine(EvoTreeRegressor(loss=:linear, device="gpu", max_depth=5, eta=0.01, nrounds=10), x_train, y_train, cache=true) +mach2 = machine(EvoTreeRegressor(loss=:linear, device="gpu", max_depth=5, eta=0.01, nrounds=10), x_train, y_train, cache=false) +mach3 = machine(EvoTreeRegressor(loss=:linear, device="gpu", max_depth=5, eta=0.01, nrounds=10), x_train, y_train, cache=false) +fit!(mach1) + # X_train_32 = Float32.(X_train) @time pred_train = EvoTrees.predict(model, X_train); @btime pred_train = EvoTrees.predict(model, X_train); @@ -112,14 +118,14 @@ params1 = EvoTreeRegressor(T=Float32, # GPU - Gaussian ################################ params1 = EvoTreeGaussian(T=Float32, - loss=:gaussian, metric=:gaussian, + loss=:gaussian, nrounds=100, lambda=1.0, gamma=0, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=0.5, nbins=32, device="gpu") # Asus laptop: 14.304369 seconds (24.81 M allocations: 2.011 GiB, 1.90% gc time) -@time model = EvoTrees.fit_evotree(params1, X_train, Y_train); +@time model = EvoTrees.fit_evotree(params1; x_train, y_train); # Auss laptop: 1.888472 seconds (8.40 k allocations: 1.613 GiB, 14.86% gc time) @time model, cache = EvoTrees.init_evotree(params1, X_train, Y_train); diff --git a/experiments/readme_plots_cpu.jl b/experiments/readme_plots_cpu.jl index de2a4864..b660a375 100644 --- a/experiments/readme_plots_cpu.jl +++ b/experiments/readme_plots_cpu.jl @@ -51,7 +51,7 @@ sqrt(mean((pred_train_linear .- y_train) .^ 2)) # linear weighted params1 = EvoTreeRegressor(T=Float64, - loss=:linear, metric=:mse, + loss=:linear, nrounds=200, nbins=64, lambda=0.1, gamma=0.1, eta=0.05, max_depth=6, min_weight=1.0, @@ -61,7 +61,7 @@ params1 = EvoTreeRegressor(T=Float64, # W_train = ones(eltype(Y_train), size(Y_train)) .* 5 w_train = rand(eltype(y_train), size(y_train)) .+ 0 -@time model = fit_evotree(params1; x_train, y_train, w_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, w_train, x_eval, y_eval, print_every_n=25, metric=:mse); # 67.159 ms (77252 allocations: 28.06 MiB) # @time model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 999); # @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval); @@ -77,13 +77,13 @@ sqrt(mean((pred_train_linear_w .- y_train) .^ 2)) # logistic / cross-entropy params1 = EvoTreeRegressor( - loss=:logistic, metric=:logloss, + loss=:logistic, nrounds=200, nbins=64, lambda=0.1, gamma=0.1, eta=0.05, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=1.0) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:logloss); # 218.040 ms (123372 allocations: 34.71 MiB) # @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval) @time pred_train_logistic = predict(model, x_train); @@ -97,7 +97,7 @@ params1 = EvoTreeRegressor( lambda=0.0, gamma=0.0, eta=0.05, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=1.0) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:mae); @time pred_train_L1 = predict(model, x_train) @time pred_eval_L1 = predict(model, x_eval) sqrt(mean((pred_train_L1 .- y_train) .^ 2)) @@ -112,34 +112,34 @@ savefig("figures/regression_sinus.png") # Poisson params1 = EvoTreeCount( - loss=:poisson, metric=:poisson, + loss=:poisson, nrounds=200, nbins=64, lambda=0.5, gamma=0.1, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=1.0) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:poisson); @time pred_train_poisson = predict(model, x_train); sqrt(mean((pred_train_poisson .- y_train) .^ 2)) # Gamma params1 = EvoTreeRegressor( - loss=:gamma, metric=:gamma, + loss=:gamma, nrounds=200, nbins=64, lambda=0.5, gamma=0.1, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=1.0) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:gamma); @time pred_train_gamma = predict(model, x_train); sqrt(mean((pred_train_gamma .- y_train) .^ 2)) # Tweedie params1 = EvoTreeRegressor( - loss=:tweedie, metric=:tweedie, + loss=:tweedie, nrounds=200, nbins=64, lambda=0.5, gamma=0.1, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=1.0) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:tweedie); @time pred_train_tweedie = predict(model, x_train); sqrt(mean((pred_train_tweedie .- y_train) .^ 2)) @@ -180,7 +180,7 @@ sum(pred_train_q20 .< y_train) / length(y_train) # q80 params1 = EvoTreeRegressor( - loss=:quantile, alpha=0.8, metric=:none, + loss=:quantile, alpha=0.8, nrounds=200, nbins=64, lambda=1.0, gamma=0.0, eta=0.05, max_depth=6, min_weight=1.0, @@ -200,14 +200,14 @@ savefig("figures/quantiles_sinus.png") ############################### ## gaussian ############################### -params1 = EvoTreeGaussian( - loss=:gaussian, metric=:gaussian, +params1 = EvoTreeMLE( + loss=:gaussian, nrounds=200, nbins=64, lambda=0.1, gamma=0.1, eta=0.05, max_depth=6, min_weight=1.0, rowsample=1.0, colsample=1.0, rng=123) -@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=10); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=10, metric=:gaussian); # @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); @time pred_train = EvoTrees.predict(model, x_train); # @btime pred_train = EvoTrees.predict(model, X_train); @@ -225,4 +225,35 @@ plot!(x_train[:, 1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, plot!(x_train[:, 1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="sigma") plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="darkgreen", linewidth=1.5, label="q20") plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="darkgreen", linewidth=1.5, label="q80") -savefig("figures/gaussian_sinus.png") \ No newline at end of file +savefig("figures/gaussian-sinus.png") + + +############################### +## Logistic +############################### +params1 = EvoTrees.EvoTreeMLE( + loss = :logistic, + nrounds=200, nbins=64, + lambda=1.0, gamma=0.1, eta=0.05, + max_depth=6, min_weight=1.0, + rowsample=1.0, colsample=1.0, rng=123) + +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=10, metric=:logistic); +# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); +@time pred_train = EvoTrees.predict(model, x_train); +# @btime pred_train = EvoTrees.predict(model, X_train); + +pred_logistic = [Distributions.Logistic(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1)] +pred_q80 = quantile.(pred_logistic, 0.8) +pred_q20 = quantile.(pred_logistic, 0.2) + +mean(y_train .< pred_q80) +mean(y_train .< pred_q20) + +x_perm = sortperm(x_train[:, 1]) +plot(x_train[:, 1], y_train, ms=0.5, mcolor="darkgray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") +plot!(x_train[:, 1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, label="mu") +plot!(x_train[:, 1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="s") +plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="darkgreen", linewidth=1.5, label="q20") +plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="darkgreen", linewidth=1.5, label="q80") +savefig("figures/logistic-sinus.png") \ No newline at end of file diff --git a/experiments/readme_plots_gpu.jl b/experiments/readme_plots_gpu.jl index 20ba04ff..e4298c3c 100644 --- a/experiments/readme_plots_gpu.jl +++ b/experiments/readme_plots_gpu.jl @@ -37,6 +37,7 @@ params1 = EvoTreeRegressor(T=Float32, device="gpu") @time model = fit_evotree(params1; x_train, y_train); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, metric=:mse, print_every_n=25); model_cpu = convert(EvoTrees.GBTree, model); pred_train_linear_gpu = predict(model, x_train) pred_train_linear_cpu = predict(model_cpu, x_train) @@ -115,7 +116,6 @@ savefig("figures/regression_sinus_gpu.png") ############################### EvoTrees.CUDA.allowscalar(false) params1 = EvoTreeGaussian(T=Float32, - loss=:gaussian, metric=:gaussian, nrounds=200, nbins=64, lambda=1.0, gamma=0.1, eta=0.05, max_depth=6, min_weight=5, @@ -123,7 +123,7 @@ params1 = EvoTreeGaussian(T=Float32, device="gpu") @time model = fit_evotree(params1; x_train, y_train); -# @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25); +@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:gaussian); # @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10); @time pred_train_gaussian = EvoTrees.predict(model, x_train) @@ -140,4 +140,4 @@ plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linew plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma") plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20") plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80") -savefig("figures/gaussian_sinus_gpu.png") \ No newline at end of file +savefig("figures/gaussian-sinus-gpu.png") \ No newline at end of file diff --git a/experiments/speed_cpu_gpu.jl b/experiments/speed_cpu_gpu.jl index 91f2c657..3bbf032b 100644 --- a/experiments/speed_cpu_gpu.jl +++ b/experiments/speed_cpu_gpu.jl @@ -1,5 +1,5 @@ using Statistics -using StatsBase:sample, sample! +using StatsBase: sample, sample! using EvoTrees using BenchmarkTools using CUDA @@ -15,22 +15,29 @@ Y = rand(size(X, 1)) 𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] -𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end] +𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] -X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :] -Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval] +x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :] +y_train, y_eval = Y[𝑖_train], Y[𝑖_eval] ########################### # Tree CPU ########################### params_c = EvoTreeRegressor(T=Float32, - loss=:linear, metric=:none, + loss=:linear, nrounds=100, - Ξ»=1.0, Ξ³=0.1, Ξ·=0.1, + lambda=0.1, gamma=0.0, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=0.5, nbins=64); +# params_c = EvoTrees.EvoTreeLogistic(T=Float32, +# loss=:linear, +# nrounds=100, +# lambda=1.0, gamma=0.0, eta=0.1, +# max_depth=6, min_weight=1.0, +# rowsample=0.5, colsample=0.5, nbins=64); + # params_c = EvoTreeGaussian(T=Float32, # loss=:gaussian, metric=:none, # nrounds=100, @@ -38,11 +45,11 @@ params_c = EvoTreeRegressor(T=Float32, # max_depth=6, min_weight=1.0, # rowsample=0.5, colsample=0.5, nbins=64); -model_c, cache_c = EvoTrees.init_evotree(params_c, X_train, Y_train); +model_c, cache_c = EvoTrees.init_evotree(params_c; x_train, y_train); # initialize from cache params_c = model_c.params -X_size = size(cache_c.X_bin) +X_size = size(cache_c.x_bin) # select random rows and cols sample!(params_c.rng, cache_c.𝑖_, cache_c.nodes[1].𝑖, replace=false, ordered=true); @@ -53,9 +60,11 @@ sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true); 𝑖 = cache_c.nodes[1].𝑖 𝑗 = cache_c.𝑗 +L = EvoTrees.Linear +T = Float32 # build a new tree # 897.800 ΞΌs (6 allocations: 736 bytes) -@time EvoTrees.update_grads!(params_c.loss, cache_c.δ𝑀, cache_c.pred, cache_c.Y, params_c.Ξ±) +@time EvoTrees.update_grads!(L, cache_c.δ𝑀, cache_c.pred, cache_c.y; alpha = params_c.alpha) # @btime EvoTrees.update_grads!($params_c.loss, $cache_c.δ𝑀, $cache_c.pred_cpu, $cache_c.Y_cpu, $params_c.Ξ±) # βˆ‘ = vec(sum(cache_c.Ξ΄[𝑖,:], dims=1)) # gain = EvoTrees.get_gain(params_c.loss, βˆ‘, params_c.Ξ») @@ -64,8 +73,10 @@ sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true); # 62.530 ms (7229 allocations: 17.43 MiB) # 1.25e5: 9.187 ms (7358 allocations: 2.46 MiB) -tree = EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.Ξ»))) -@time EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑀, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.X_bin, cache_c.K) +tree = EvoTrees.Tree{L,T}(params_c.max_depth, model_c.K, zero(typeof(params_c.lambda))) +@time EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑀, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.x_bin, cache_c.K, cache_c.monotone_constraints) +@code_warntype EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑀, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.x_bin, cache_c.K, cache_c.monotone_constraints) + @btime EvoTrees.grow_tree!($EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.Ξ»))), $cache_c.nodes, $params_c, $cache_c.δ𝑀, $cache_c.edges, $cache_c.𝑗, $cache_c.left, $cache_c.left, $cache_c.right, $cache_c.X_bin, $cache_c.K) @time EvoTrees.grow_tree!(EvoTrees.Tree(params_c.max_depth, model_c.K, params_c.Ξ»), params_c, cache_c.Ξ΄, cache_c.hist, cache_c.histL, cache_c.histR, cache_c.gains, cache_c.edges, 𝑖, 𝑗, 𝑛, cache_c.X_bin); diff --git a/figures/gaussian_sinus.png b/figures/gaussian-sinus.png similarity index 100% rename from figures/gaussian_sinus.png rename to figures/gaussian-sinus.png diff --git a/figures/logistic-sinus.png b/figures/logistic-sinus.png new file mode 100644 index 00000000..b1f511dc Binary files /dev/null and b/figures/logistic-sinus.png differ diff --git a/src/EvoTrees.jl b/src/EvoTrees.jl index 9eaa9eea..ab59633a 100644 --- a/src/EvoTrees.jl +++ b/src/EvoTrees.jl @@ -1,7 +1,7 @@ module EvoTrees export init_evotree, grow_evotree!, grow_tree, fit_evotree, predict -export EvoTreeRegressor, EvoTreeCount, EvoTreeClassifier, EvoTreeGaussian, +export EvoTreeRegressor, EvoTreeCount, EvoTreeClassifier, EvoTreeMLE, EvoTreeGaussian, importance, Random using Base.Threads: @threads diff --git a/src/MLJ.jl b/src/MLJ.jl index 7599c2c5..72875d3d 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -1,121 +1,174 @@ function MMI.fit(model::EvoTypes, verbosity::Int, A, y) - if model.device == "gpu" - fitresult, cache = init_evotree_gpu(model, A.matrix, y) - else - fitresult, cache = init_evotree(model, A.matrix, y) - end - grow_evotree!(fitresult, cache) - report = (features=A.names,) - return fitresult, cache, report + if model.device == "gpu" + fitresult, cache = init_evotree_gpu(model; x_train = A.matrix, y_train = y) + else + fitresult, cache = init_evotree(model; x_train = A.matrix, y_train = y) + end + grow_evotree!(fitresult, cache) + report = (features = A.names,) + return fitresult, cache, report end function okay_to_continue(new, old) - new.nrounds - old.nrounds >= 0 && - new.lambda == old.lambda && - new.gamma == old.gamma && - new.max_depth == old.max_depth && - new.min_weight == old.min_weight && - new.rowsample == old.rowsample && - new.colsample == old.colsample && - new.nbins == old.nbins && - new.alpha == old.alpha && - new.device == old.device + new.nrounds - old.nrounds >= 0 && + new.lambda == old.lambda && + new.gamma == old.gamma && + new.max_depth == old.max_depth && + new.min_weight == old.min_weight && + new.rowsample == old.rowsample && + new.colsample == old.colsample && + new.nbins == old.nbins && + new.alpha == old.alpha && + new.device == old.device end # Generate names to be used by feature_importances in the report -MMI.reformat(::EvoTypes, X, y) = ((matrix=MMI.matrix(X), names=[name for name ∈ schema(X).names]), y) -MMI.reformat(::EvoTypes, X) = ((matrix=MMI.matrix(X), names=[name for name ∈ schema(X).names]),) -MMI.reformat(::EvoTypes, X::AbstractMatrix, y) = ((matrix=X, names=["feat_$i" for i = 1:size(X, 2)]), y) -MMI.reformat(::EvoTypes, X::AbstractMatrix) = ((matrix=X, names=["feat_$i" for i = 1:size(X, 2)]),) -MMI.selectrows(::EvoTypes, I, A, y) = ((matrix=view(A.matrix, I, :), names=A.names), view(y, I)) -MMI.selectrows(::EvoTypes, I, A) = ((matrix=view(A.matrix, I, :), names=A.names),) +MMI.reformat(::EvoTypes, X, y) = + ((matrix = MMI.matrix(X), names = [name for name ∈ schema(X).names]), y) +MMI.reformat(::EvoTypes, X) = + ((matrix = MMI.matrix(X), names = [name for name ∈ schema(X).names]),) +MMI.reformat(::EvoTypes, X::AbstractMatrix, y) = + ((matrix = X, names = ["feat_$i" for i = 1:size(X, 2)]), y) +MMI.reformat(::EvoTypes, X::AbstractMatrix) = + ((matrix = X, names = ["feat_$i" for i = 1:size(X, 2)]),) +MMI.selectrows(::EvoTypes, I, A, y) = + ((matrix = view(A.matrix, I, :), names = A.names), view(y, I)) +MMI.selectrows(::EvoTypes, I, A) = ((matrix = view(A.matrix, I, :), names = A.names),) # For EarlyStopping.jl support MMI.iteration_parameter(::Type{<:EvoTypes}) = :nrounds function MMI.update(model::EvoTypes, verbosity::Integer, fitresult, cache, A, y) - if okay_to_continue(model, cache.params) - grow_evotree!(fitresult, cache) - else - fitresult, cache = init_evotree(model, A.matrix, y) - grow_evotree!(fitresult, cache) - end - report = (features=A.names,) - return fitresult, cache, report + if okay_to_continue(model, cache.params) + grow_evotree!(fitresult, cache) + else + if model.device == "gpu" + fitresult, cache = init_evotree_gpu(model; x_train = A.matrix, y_train = y) + else + fitresult, cache = init_evotree(model; x_train = A.matrix, y_train = y) + end + grow_evotree!(fitresult, cache) + end + report = (features = A.names,) + return fitresult, cache, report end function predict(::EvoTreeRegressor, fitresult, A) - pred = vec(predict(fitresult, A.matrix)) - return pred + pred = vec(predict(fitresult, A.matrix)) + return pred end function predict(::EvoTreeClassifier, fitresult, A) - pred = predict(fitresult, A.matrix) - return MMI.UnivariateFinite(fitresult.info[:levels], pred, pool=missing) + pred = predict(fitresult, A.matrix) + return MMI.UnivariateFinite(fitresult.info[:levels], pred, pool = missing) end function predict(::EvoTreeCount, fitresult, A) - Ξ»s = vec(predict(fitresult, A.matrix)) - return [Distributions.Poisson(Ξ») for Ξ» ∈ Ξ»s] + Ξ»s = vec(predict(fitresult, A.matrix)) + return [Distributions.Poisson(Ξ») for Ξ» ∈ Ξ»s] end function predict(::EvoTreeGaussian, fitresult, A) - pred = predict(fitresult, A.matrix) - return [Distributions.Normal(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)] + pred = predict(fitresult, A.matrix) + return [Distributions.Normal(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)] +end + +function predict(::EvoTreeMLE{L,T,S}, fitresult, A) where {L<:GaussianDist,T,S} + pred = predict(fitresult, A.matrix) + return [Distributions.Normal(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)] end +function predict(::EvoTreeMLE{L,T,S}, fitresult, A) where {L<:LogisticDist,T,S} + pred = predict(fitresult, A.matrix) + return [Distributions.Logistic(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)] +end # Feature Importances MMI.reports_feature_importances(::Type{<:EvoTypes}) = true function MMI.feature_importances(m::EvoTypes, fitresult, report) - fi_pairs = importance(fitresult) - return fi_pairs + fi_pairs = importance(fitresult) + return fi_pairs end # Metadata -const EvoTreeRegressor_desc = "Regression models with various underlying methods: least square, quantile, logistic, gamma, tweedie." +const EvoTreeRegressor_desc = "Regression models with various underlying methods: least square, quantile, logistic, gamma (deviance), tweedie (deviance)." const EvoTreeClassifier_desc = "Multi-classification with softmax and cross-entropy loss." -const EvoTreeCount_desc = "Poisson regression fitting Ξ» with max likelihood." -const EvoTreeGaussian_desc = "Gaussian maximum likelihood of ΞΌ and Οƒ." - -MMI.metadata_pkg.((EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount, EvoTreeGaussian), - name="EvoTrees", - uuid="f6006082-12f8-11e9-0c9c-0d5d367ab1e5", - url="https://github.com/Evovest/EvoTrees.jl", - julia=true, - license="Apache", - is_wrapper=false) - -MMI.metadata_model(EvoTreeRegressor, - input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}}, - target_scitype=AbstractVector{<:MMI.Continuous}, - weights=false, - path="EvoTrees.EvoTreeRegressor", - descr=EvoTreeRegressor_desc) - -MMI.metadata_model(EvoTreeClassifier, - input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}}, - target_scitype=AbstractVector{<:MMI.Finite}, - weights=false, - path="EvoTrees.EvoTreeClassifier", - descr=EvoTreeClassifier_desc) - -MMI.metadata_model(EvoTreeCount, - input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}}, - target_scitype=AbstractVector{<:MMI.Count}, - weights=false, - path="EvoTrees.EvoTreeCount", - descr=EvoTreeCount_desc) - -MMI.metadata_model(EvoTreeGaussian, - input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}}, - target_scitype=AbstractVector{<:MMI.Continuous}, - weights=false, - path="EvoTrees.EvoTreeGaussian", - descr=EvoTreeGaussian_desc) +const EvoTreeCount_desc = "Poisson regression fitting Ξ» with deviance minimization." +const EvoTreeGaussian_desc = "Deprecated - Use EvoTreeMLE with `loss=:normal` instead. Gaussian maximum likelihood of ΞΌ and Οƒ." +const EvoTreeMLE_desc = "Maximum likelihood methods supporting Normal/Gaussian and Logistic distributions." + +MMI.metadata_pkg.( + (EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount, EvoTreeGaussian), + name = "EvoTrees", + uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5", + url = "https://github.com/Evovest/EvoTrees.jl", + julia = true, + license = "Apache", + is_wrapper = false, +) + +MMI.metadata_model( + EvoTreeRegressor, + input_scitype = Union{ + MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), + AbstractMatrix{MMI.Continuous}, + }, + target_scitype = AbstractVector{<:MMI.Continuous}, + weights = false, + path = "EvoTrees.EvoTreeRegressor", + descr = EvoTreeRegressor_desc, +) + +MMI.metadata_model( + EvoTreeClassifier, + input_scitype = Union{ + MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), + AbstractMatrix{MMI.Continuous}, + }, + target_scitype = AbstractVector{<:MMI.Finite}, + weights = false, + path = "EvoTrees.EvoTreeClassifier", + descr = EvoTreeClassifier_desc, +) + +MMI.metadata_model( + EvoTreeCount, + input_scitype = Union{ + MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), + AbstractMatrix{MMI.Continuous}, + }, + target_scitype = AbstractVector{<:MMI.Count}, + weights = false, + path = "EvoTrees.EvoTreeCount", + descr = EvoTreeCount_desc, +) + +MMI.metadata_model( + EvoTreeGaussian, + input_scitype = Union{ + MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), + AbstractMatrix{MMI.Continuous}, + }, + target_scitype = AbstractVector{<:MMI.Continuous}, + weights = false, + path = "EvoTrees.EvoTreeGaussian", + descr = EvoTreeGaussian_desc, +) + +MMI.metadata_model( + EvoTreeMLE, + input_scitype = Union{ + MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), + AbstractMatrix{MMI.Continuous}, + }, + target_scitype = AbstractVector{<:MMI.Continuous}, + weights = false, + path = "EvoTrees.EvoTreeMLE", + descr = EvoTreeMLE_desc, +) """ EvoTreeRegressor(;kwargs...) @@ -173,6 +226,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz EvoTrees.predict(model, X) ``` +Alternatively, models act as a functor, returning predictions when called as a function with features as argument: + +```julia +model(X) +``` + # MLJ Interface From MLJ, the type can be imported using: @@ -280,6 +339,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz EvoTrees.predict(model, X) ``` +Alternatively, models act as a functor, returning predictions when called as a function with features as argument: + +```julia +model(X) +``` + # MLJ From MLJ, the type can be imported using: @@ -395,6 +460,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz EvoTrees.predict(model, X) ``` +Alternatively, models act as a functor, returning predictions when called as a function with features as argument: + +```julia +model(X) +``` + # MLJ From MLJ, the type can be imported using: @@ -475,7 +546,7 @@ EvoTreeCount EvoTreeGaussian(;kwargs...) A model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. -EvoTreeGaussian is used to perform Gaussain probabilistic regression, fitting ΞΌ and Οƒ parameters to maximize likelihood. +EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting ΞΌ and Οƒ parameters to maximize likelihood. # Hyper-parameters @@ -516,6 +587,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz EvoTrees.predict(model, X) ``` +Alternatively, models act as a functor, returning predictions when called as a function with features as argument: + +```julia +model(X) +``` + # MLJ From MLJ, the type can be imported using: @@ -594,6 +671,140 @@ preds = predict_median(mach, X) """ EvoTreeGaussian + + +""" + EvoTreeMLE(;kwargs...) + +A model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. +EvoTreeMLE performs maximum likelihood estimation. Assumed distributions is specified through `loss` kwargs. Both Normal/Gaussian and Logistic distributions are supported. + +# Hyper-parameters + +`loss=:gaussian`: Loss to be be minimized during training. One of: + - `:normal`/ `gaussian` + - `:logistic` +- `nrounds=10`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. +- `lambda::T=0.0`: L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model. +- `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. +- `max_depth=5`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. + A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. + Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range. +- `min_weight=0.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. +- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`. +- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`. +- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. +- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). + !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically. +- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`). +- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`. +- `device="cpu"`: Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`. + +# Internal API + +Do `params = EvoTreeMLE()` to construct an instance with default hyper-parameters. +Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...). + +## Training model + +A model is built using [`fit_evotree`](@ref): + +```julia +fit_evotree(params, X_train, Y_train, W_train=nothing; kwargs...). +``` + +## Inference + +Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `ΞΌ` & `Οƒ` for Normal/Gaussian and `ΞΌ` & `s` for Logistic. + +```julia +EvoTrees.predict(model, X) +``` + +Alternatively, models act as a functor, returning predictions when called as a function with features as argument: + +```julia +model(X) +``` + +# MLJ + +From MLJ, the type can be imported using: + +```julia +EvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees +``` + +Do `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. +Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`. + +## Training data + +In MLJ or MLJBase, bind an instance `model` to data with + + mach = machine(model, X, y) + +where + +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have one of the following element scitypes: `Continuous`, + `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)` + +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `<:Continuous`; check the scitype + with `scitype(y)` + +Train the machine using `fit!(mach, rows=...)`. + +## Operations + +- `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above. +Predictions are probabilistic. + +Specific metrics can also be predicted using: + + - `predict_mean(mach, Xnew)` + - `predict_mode(mach, Xnew)` + - `predict_median(mach, Xnew)` + +## Fitted parameters + +The fields of `fitted_params(mach)` are: + + - `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm. + +## Report + +The fields of `report(mach)` are: + - `:features`: The names of the features encountered in training. + +# Examples + +``` +# Internal API +using EvoTrees +params = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100) +nobs, nfeats = 1_000, 5 +X, y = randn(nobs, nfeats), rand(nobs) +model = fit_evotree(params, X, y) +preds = EvoTrees.predict(model, X) +``` + +``` +# MLJ Interface +using MLJ +EvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees +model = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100) +X, y = @load_boston +mach = machine(model, X, y) |> fit! +preds = predict(mach, X) +preds = predict_mean(mach, X) +preds = predict_mode(mach, X) +preds = predict_median(mach, X) +``` +""" +EvoTreeMLE + # function MLJ.clean!(model::EvoTreeRegressor) # warning = "" # if model.nrounds < 1 diff --git a/src/eval.jl b/src/eval.jl index 87872292..9db5f99d 100644 --- a/src/eval.jl +++ b/src/eval.jl @@ -86,6 +86,15 @@ function eval_metric(::Val{:gaussian}, p::AbstractMatrix{T}, y::AbstractVector{T return eval end +function eval_metric(::Val{:logistic}, p::AbstractMatrix{T}, y::AbstractVector{T}, w::AbstractVector{T}, alpha=0.0) where {T<:AbstractFloat} + eval = zero(T) + @inbounds for i in eachindex(y) + eval += -w[i] * (log(1 / 4 * sech(exp(-p[2, i]) * (y[i] - p[1, i]))^2) - p[2, i]) + end + eval /= sum(w) + return eval +end + function eval_metric(::Val{:quantile}, p::AbstractMatrix{T}, y::AbstractVector{T}, w::AbstractVector{T}, alpha=0.0) where {T<:AbstractFloat} eval = zero(T) for i in eachindex(y) diff --git a/src/find_split.jl b/src/find_split.jl index 62302566..7e233a2c 100644 --- a/src/find_split.jl +++ b/src/find_split.jl @@ -30,7 +30,20 @@ end Multi-threaded split_set! Take a view into left and right placeholders. Right ids are assigned at the end of the length of the current node set. """ -function split_set_chunk!(left, right, 𝑖, bid, nblocks, X_bin, feat, cond_bin, offset, chunk_size, lefts, rights) +function split_set_chunk!( + left, + right, + 𝑖, + bid, + nblocks, + X_bin, + feat, + cond_bin, + offset, + chunk_size, + lefts, + rights, +) left_count = 0 right_count = 0 @@ -53,7 +66,19 @@ function split_set_chunk!(left, right, 𝑖, bid, nblocks, X_bin, feat, cond_bin return nothing end -function split_views_kernel!(out::Vector{S}, left::Vector{S}, right::Vector{S}, bid, offset, chunk_size, lefts, rights, sum_lefts, cumsum_lefts, cumsum_rights) where {S} +function split_views_kernel!( + out::Vector{S}, + left::Vector{S}, + right::Vector{S}, + bid, + offset, + chunk_size, + lefts, + rights, + sum_lefts, + cumsum_lefts, + cumsum_rights, +) where {S} iter = 1 i_max = lefts[bid] bid == 1 ? cumsum_left = 0 : cumsum_left = cumsum_lefts[bid-1] @@ -72,7 +97,16 @@ function split_views_kernel!(out::Vector{S}, left::Vector{S}, right::Vector{S}, return nothing end -function split_set_threads!(out, left, right, 𝑖, X_bin::Matrix{S}, feat, cond_bin, offset) where {S} +function split_set_threads!( + out, + left, + right, + 𝑖, + X_bin::Matrix{S}, + feat, + cond_bin, + offset, +) where {S} # iter = Iterators.partition(𝑖, chunk_size) nblocks = ceil(Int, min(length(𝑖) / 1024, Threads.nthreads())) @@ -82,17 +116,45 @@ function split_set_threads!(out, left, right, 𝑖, X_bin::Matrix{S}, feat, cond rights = zeros(Int, nblocks) @threads for bid = 1:nblocks - split_set_chunk!(left, right, 𝑖, bid, nblocks, X_bin, feat, cond_bin, offset, chunk_size, lefts, rights) + split_set_chunk!( + left, + right, + 𝑖, + bid, + nblocks, + X_bin, + feat, + cond_bin, + offset, + chunk_size, + lefts, + rights, + ) end sum_lefts = sum(lefts) cumsum_lefts = cumsum(lefts) cumsum_rights = cumsum(rights) @threads for bid = 1:nblocks - split_views_kernel!(out, left, right, bid, offset, chunk_size, lefts, rights, sum_lefts, cumsum_lefts, cumsum_rights) + split_views_kernel!( + out, + left, + right, + bid, + offset, + chunk_size, + lefts, + rights, + sum_lefts, + cumsum_lefts, + cumsum_rights, + ) end - return (view(out, offset+1:offset+sum_lefts), view(out, offset+sum_lefts+1:offset+length(𝑖))) + return ( + view(out, offset+1:offset+sum_lefts), + view(out, offset+sum_lefts+1:offset+length(𝑖)), + ) end @@ -106,8 +168,9 @@ function update_hist!( δ𝑀::Matrix{T}, X_bin::Matrix{UInt8}, 𝑖::AbstractVector{S}, - 𝑗::AbstractVector{S}, K) where {L<:GradientRegression,T,S} - + 𝑗::AbstractVector{S}, + K, +) where {L<:GradientRegression,T,S} @threads for j in 𝑗 @inbounds @simd for i in 𝑖 hid = 3 * X_bin[i, j] - 2 @@ -121,7 +184,7 @@ end """ update_hist! - GaussianRegression + MLE2P """ function update_hist!( ::Type{L}, @@ -129,8 +192,9 @@ function update_hist!( δ𝑀::Matrix{T}, X_bin::Matrix{UInt8}, 𝑖::AbstractVector{S}, - 𝑗::AbstractVector{S}, K) where {L<:GaussianRegression,T,S} - + 𝑗::AbstractVector{S}, + K, +) where {L<:MLE2P,T,S} @threads for j in 𝑗 @inbounds @simd for i in 𝑖 hid = 5 * X_bin[i, j] - 4 @@ -154,12 +218,13 @@ function update_hist!( δ𝑀::Matrix{T}, X_bin::Matrix{UInt8}, 𝑖::AbstractVector{S}, - 𝑗::AbstractVector{S}, K) where {L,T,S} - + 𝑗::AbstractVector{S}, + K, +) where {L,T,S} @threads for j in 𝑗 @inbounds for i in 𝑖 hid = (2 * K + 1) * (X_bin[i, j] - 1) - for k in 1:(2*K+1) + for k = 1:(2*K+1) hist[j][hid+k] += δ𝑀[k, i] end end @@ -180,24 +245,35 @@ Generic fallback function update_gains!( node::TrainNode, 𝑗::Vector, - params::EvoTypes{L,T,S}, K, monotone_constraints) where {L,T,S} + params::EvoTypes{L,T,S}, + K, + monotone_constraints, +) where {L,T,S} KK = 2 * K + 1 @inbounds @threads for j in 𝑗 - @inbounds for k in 1:KK + @inbounds for k = 1:KK node.hL[j][k] = node.h[j][k] node.hR[j][k] = node.βˆ‘[k] - node.h[j][k] end - @inbounds for bin in 2:params.nbins + @inbounds for bin = 2:params.nbins @inbounds for k = 1:KK binid = KK * (bin - 1) node.hL[j][binid+k] = node.hL[j][binid-KK+k] + node.h[j][binid+k] node.hR[j][binid+k] = node.hR[j][binid-KK+k] - node.h[j][binid+k] end end - hist_gains_cpu!(L, view(node.gains, :, j), node.hL[j], node.hR[j], params, K, monotone_constraints[j]) + hist_gains_cpu!( + L, + view(node.gains, :, j), + node.hL[j], + node.hR[j], + params, + K, + monotone_constraints[j], + ) end return nothing end @@ -207,20 +283,33 @@ end hist_gains_cpu! GradientRegression """ -function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L<:GradientRegression,T} - @inbounds for bin in 1:params.nbins +function hist_gains_cpu!( + ::Type{L}, + gains::AbstractVector{T}, + hL::Vector{T}, + hR::Vector{T}, + params, + K, + monotone_constraint, +) where {L<:GradientRegression,T} + @inbounds for bin = 1:params.nbins i = 3 * bin - 2 # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split) if bin == params.nbins gains[bin] = hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) / 2 elseif hL[i+2] > params.min_weight && hR[i+2] > params.min_weight - predL = pred_scalar_cpu!(hL[i:i+2], params, K) - predR = pred_scalar_cpu!(hR[i:i+2], params, K) + if monotone_constraint != 0 + predL = pred_scalar_cpu!(view(hL, i:i+2), params, K) + predR = pred_scalar_cpu!(view(hR, i:i+2), params, K) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) - gains[bin] = (hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) + - hR[i]^2 / (hR[i+1] + params.lambda * hR[i+2])) / 2 + gains[bin] = + ( + hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) + + hR[i]^2 / (hR[i+1] + params.lambda * hR[i+2]) + ) / 2 end end end @@ -231,8 +320,16 @@ end hist_gains_cpu! QuantileRegression/L1Regression """ -function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L<:Union{QuantileRegression,L1Regression},T} - @inbounds for bin in 1:params.nbins +function hist_gains_cpu!( + ::Type{L}, + gains::AbstractVector{T}, + hL::Vector{T}, + hR::Vector{T}, + params, + K, + monotone_constraint, +) where {L<:Union{QuantileRegression,L1Regression},T} + @inbounds for bin = 1:params.nbins i = 3 * bin - 2 # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split) if bin == params.nbins @@ -246,25 +343,43 @@ end """ hist_gains_cpu! - GaussianRegression + MLE2P """ -function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L<:GaussianRegression,T} - @inbounds for bin in 1:params.nbins +function hist_gains_cpu!( + ::Type{L}, + gains::AbstractVector{T}, + hL::Vector{T}, + hR::Vector{T}, + params, + K, + monotone_constraint, +) where {L<:MLE2P,T} + @inbounds for bin = 1:params.nbins i = 5 * bin - 4 # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split) if bin == params.nbins - gains[bin] = (hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) + - hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4])) / 2 + gains[bin] = + ( + hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) + + hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4]) + ) / 2 elseif hL[i+4] > params.min_weight && hR[i+4] > params.min_weight - predL = pred_scalar_cpu!(hL[i:i+4], params, K) - predR = pred_scalar_cpu!(hR[i:i+4], params, K) + if monotone_constraint != 0 + predL = pred_scalar_cpu!(view(hL, i:i+4), params, K) + predR = pred_scalar_cpu!(view(hR, i:i+4), params, K) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) - gains[bin] = (hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) + - hR[i]^2 / (hR[i+2] + params.lambda * hR[i+4])) / 2 + - (hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4]) + - hR[i+1]^2 / (hR[i+3] + params.lambda * hR[i+4])) / 2 + gains[bin] = + ( + hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) + + hR[i]^2 / (hR[i+2] + params.lambda * hR[i+4]) + ) / 2 + + ( + hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4]) + + hR[i+1]^2 / (hR[i+3] + params.lambda * hR[i+4]) + ) / 2 end end end @@ -275,8 +390,16 @@ end hist_gains_cpu! Generic """ -function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L,T} - @inbounds for bin in 1:params.nbins +function hist_gains_cpu!( + ::Type{L}, + gains::AbstractVector{T}, + hL::Vector{T}, + hR::Vector{T}, + params, + K, + monotone_constraint, +) where {L,T} + @inbounds for bin = 1:params.nbins i = (2 * K + 1) * (bin - 1) # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split) if bin == params.nbins @@ -290,11 +413,17 @@ function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR: elseif hL[i+2*K+1] > params.min_weight && hR[i+2*K+1] > params.min_weight for k = 1:K if k == 1 - gains[bin] = (hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) + - hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1])) / 2 + gains[bin] = + ( + hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) + + hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1]) + ) / 2 else - gains[bin] += (hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) + - hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1])) / 2 + gains[bin] += + ( + hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) + + hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1]) + ) / 2 end end end diff --git a/src/fit.jl b/src/fit.jl index 762ef05f..abbe9ee2 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -3,98 +3,119 @@ Initialise EvoTree """ -function init_evotree(params::EvoTypes{L,T,S}, X::AbstractMatrix, Y::AbstractVector, W=nothing, offset=nothing; fnames=nothing) where {L,T,S} +function init_evotree( + params::EvoTypes{L,T,S}; + x_train::AbstractMatrix, + y_train::AbstractVector, + w_train = nothing, + offset_train = nothing, + fnames = nothing, +) where {L,T,S} K = 1 levels = nothing - X = convert(Matrix{T}, X) - + x = convert(Matrix{T}, x_train) + offset = !isnothing(offset_train) ? T.(offset_train) : nothing if L == Logistic - Y = T.(Y) - ΞΌ = [logit(mean(Y))] + y = T.(y_train) + ΞΌ = [logit(mean(y))] !isnothing(offset) && (offset .= logit.(offset)) - elseif L ∈ [Poisson, Gamma, Tweedie] - Y = T.(Y) - ΞΌ = fill(log(mean(Y)), 1) + elseif L in [Poisson, Gamma, Tweedie] + y = T.(y_train) + ΞΌ = fill(log(mean(y)), 1) !isnothing(offset) && (offset .= log.(offset)) elseif L == Softmax - if eltype(Y) <: CategoricalValue - levels = CategoricalArrays.levels(Y) + if eltype(y_train) <: CategoricalValue + levels = CategoricalArrays.levels(y_train) K = length(levels) ΞΌ = zeros(T, K) - Y = UInt32.(CategoricalArrays.levelcode.(Y)) + y = UInt32.(CategoricalArrays.levelcode.(y_train)) else - levels = sort(unique(Y)) - yc = CategoricalVector(Y, levels=levels) + levels = sort(unique(y_train)) + yc = CategoricalVector(y_train, levels = levels) K = length(levels) ΞΌ = zeros(T, K) - Y = UInt32.(CategoricalArrays.levelcode.(yc)) + y = UInt32.(CategoricalArrays.levelcode.(yc)) end !isnothing(offset) && (offset .= log.(offset)) - elseif L == Gaussian + elseif L == GaussianDist + K = 2 + y = T.(y_train) + ΞΌ = [mean(y), log(std(y))] + !isnothing(offset) && (offset[:, 2] .= log.(offset[:, 2])) + elseif L == LogisticDist K = 2 - Y = T.(Y) - ΞΌ = [mean(Y), log(std(Y))] + y = T.(y_train) + ΞΌ = [mean(y), log(std(y) * sqrt(3) / Ο€)] !isnothing(offset) && (offset[:, 2] .= log.(offset[:, 2])) else - Y = T.(Y) - ΞΌ = [mean(Y)] + y = T.(y_train) + ΞΌ = [mean(y)] end + ΞΌ = T.(ΞΌ) # force a neutral bias/initial tree when offset is specified !isnothing(offset) && (ΞΌ .= 0) # initialize preds - X_size = size(X) - pred = zeros(T, K, X_size[1]) - @inbounds for i = 1:X_size[1] + x_size = size(x) + pred = zeros(T, K, x_size[1]) + @inbounds for i = 1:x_size[1] pred[:, i] .= ΞΌ end !isnothing(offset) && (pred .+= offset') # init GBTree bias = [Tree{L,T}(ΞΌ)] - fnames = isnothing(fnames) ? ["feat_$i" for i in axes(X, 2)] : string.(fnames) - @assert length(fnames) == size(X, 2) + fnames = isnothing(fnames) ? ["feat_$i" for i in axes(x, 2)] : string.(fnames) + @assert length(fnames) == size(x, 2) info = Dict(:fnames => fnames, :levels => levels) evotree = GBTree{L,T,S}(bias, params, Metric(), K, info) # initialize gradients and weights - δ𝑀 = zeros(T, 2 * K + 1, X_size[1]) - W = isnothing(W) ? ones(T, size(Y)) : Vector{T}(W) - @assert (length(Y) == length(W) && minimum(W) > 0) - δ𝑀[end, :] .= W + δ𝑀 = zeros(T, 2 * K + 1, x_size[1]) + w = isnothing(w_train) ? ones(T, size(y)) : Vector{T}(w_train) + @assert (length(y) == length(w) && minimum(w) > 0) + δ𝑀[end, :] .= w # binarize data into quantiles - edges = get_edges(X, params.nbins) - X_bin = binarize(X, edges) + edges = get_edges(x, params.nbins) + x_bin = binarize(x, edges) - 𝑖_ = UInt32.(collect(1:X_size[1])) - 𝑗_ = UInt32.(collect(1:X_size[2])) - 𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * X_size[2])) + 𝑖_ = UInt32.(collect(1:x_size[1])) + 𝑗_ = UInt32.(collect(1:x_size[2])) + 𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * x_size[2])) # initialize histograms - nodes = [TrainNode(X_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1] - nodes[1].𝑖 = zeros(eltype(𝑖_), ceil(Int, params.rowsample * X_size[1])) + nodes = [TrainNode(x_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1] + nodes[1].𝑖 = zeros(eltype(𝑖_), ceil(Int, params.rowsample * x_size[1])) out = zeros(UInt32, length(nodes[1].𝑖)) left = zeros(UInt32, length(nodes[1].𝑖)) right = zeros(UInt32, length(nodes[1].𝑖)) # assign monotone contraints in constraints vector - monotone_constraints = zeros(Int32, X_size[2]) - hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints + monotone_constraints = zeros(Int32, x_size[2]) + hasproperty(params, :monotone_constraints) && for (k, v) in params.monotone_constraints monotone_constraints[k] = v end - cache = (params=deepcopy(params), - X=X, Y=Y, K=K, - nodes=nodes, - pred=pred, - 𝑖_=𝑖_, 𝑗_=𝑗_, 𝑗=𝑗, - out=out, left=left, right=right, - δ𝑀=δ𝑀, - edges=edges, - X_bin=X_bin, - monotone_constraints=monotone_constraints) + cache = ( + params = deepcopy(params), + x = x, + y = y, + K = K, + nodes = nodes, + pred = pred, + 𝑖_ = 𝑖_, + 𝑗_ = 𝑗_, + 𝑗 = 𝑗, + out = out, + left = left, + right = right, + δ𝑀 = δ𝑀, + edges = edges, + x_bin = x_bin, + monotone_constraints = monotone_constraints, + ) cache.params.nrounds = 0 @@ -111,16 +132,29 @@ function grow_evotree!(evotree::GBTree{L,T,S}, cache) where {L,T,S} # loop over nrounds for i = 1:Ξ΄nrounds # select random rows and cols - sample!(params.rng, cache.𝑖_, cache.nodes[1].𝑖, replace=false, ordered=true) - sample!(params.rng, cache.𝑗_, cache.𝑗, replace=false, ordered=true) + sample!(params.rng, cache.𝑖_, cache.nodes[1].𝑖, replace = false, ordered = true) + sample!(params.rng, cache.𝑗_, cache.𝑗, replace = false, ordered = true) # build a new tree - update_grads!(L, cache.δ𝑀, cache.pred, cache.Y; alpha=params.alpha) + update_grads!(L, cache.δ𝑀, cache.pred, cache.y; alpha = params.alpha) # assign a root and grow tree tree = Tree{L,T}(params.max_depth, evotree.K, zero(T)) - grow_tree!(tree, cache.nodes, params, cache.δ𝑀, cache.edges, cache.𝑗, cache.out, cache.left, cache.right, cache.X_bin, cache.K, cache.monotone_constraints) + grow_tree!( + tree, + cache.nodes, + params, + cache.δ𝑀, + cache.edges, + cache.𝑗, + cache.out, + cache.left, + cache.right, + cache.x_bin, + cache.K, + cache.monotone_constraints, + ) push!(evotree.trees, tree) - predict!(cache.pred, tree, cache.X, cache.K) + predict!(cache.pred, tree, cache.x, cache.K) end # end of nrounds cache.params.nrounds = params.nrounds @@ -134,8 +168,14 @@ function grow_tree!( params::EvoTypes{L,T,S}, δ𝑀::Matrix{T}, edges, - 𝑗, out, left, right, - X_bin::AbstractMatrix, K, monotone_constraints) where {L,T,S} + 𝑗, + out, + left, + right, + x_bin::AbstractMatrix, + K, + monotone_constraints, +) where {L,T,S} # reset nodes @threads for n in eachindex(nodes) @@ -151,7 +191,7 @@ function grow_tree!( depth = 1 # initialize summary stats - nodes[1].βˆ‘ .= vec(sum(δ𝑀[:, nodes[1].𝑖], dims=2)) + nodes[1].βˆ‘ .= vec(sum(δ𝑀[:, nodes[1].𝑖], dims = 2)) nodes[1].gain = get_gain(L, nodes[1].βˆ‘, params.lambda, K) # grow while there are remaining active nodes while length(n_current) > 0 && depth <= params.max_depth @@ -167,7 +207,7 @@ function grow_tree!( nodes[n].h .= nodes[n>>1].h .- nodes[n-1].h end else - update_hist!(L, nodes[n].h, δ𝑀, X_bin, nodes[n].𝑖, 𝑗, K) + update_hist!(L, nodes[n].h, δ𝑀, x_bin, nodes[n].𝑖, 𝑗, K) end end end @@ -191,7 +231,16 @@ function grow_tree!( popfirst!(n_next) else # println("typeof(nodes[n].𝑖): ", typeof(nodes[n].𝑖)) - _left, _right = split_set_threads!(out, left, right, nodes[n].𝑖, X_bin, tree.feat[n], tree.cond_bin[n], offset) + _left, _right = split_set_threads!( + out, + left, + right, + nodes[n].𝑖, + x_bin, + tree.feat[n], + tree.cond_bin[n], + offset, + ) nodes[n<<1].𝑖, nodes[n<<1+1].𝑖 = _left, _right offset += length(nodes[n].𝑖) update_childs_βˆ‘!(L, nodes, n, best[2][1], best[2][2], K) @@ -229,7 +278,7 @@ Main training function. Performs model fitting given configuration `params`, `x_ # Arguments -- `params::EvoTypes`: configuration info providing hyper-paramters. `EvoTypes` comprises EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount or EvoTreeGaussian +- `params::EvoTypes`: configuration info providing hyper-paramters. `EvoTypes` comprises EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount and EvoTreeMLE. # Keyword arguments @@ -241,44 +290,70 @@ Main training function. Performs model fitting given configuration `params`, `x_ - `y_eval::Vector`: vector of evaluation targets of length `#observations`. - `w_eval::Vector`: vector of evaluation weights of length `#observations`. Defaults to `nothing` (assumes a vector of 1s). - `offset_eval::VecOrMat`: evaluation data offset. Should match the size of the predictions. +- `metric`: The evaluation metric that wil be tracked on `x_eval`, `y_eval` and optionally `w_eval` / `offset_eval` data. + Supported metrics are: + + - `:mse`: mean-squared error. Adapted for general regression models. + - `:rmse`: root-mean-squared error (CPU only). Adapted for general regression models. + - `:mae`: mean absolute error. Adapted for general regression models. + - `:logloss`: Adapted for `:logistic` regression models. + - `:mlogloss`: Multi-class cross entropy. Adapted to `EvoTreeClassifier` classification models. + - `:poisson`: Poisson deviance. Adapted to `EvoTreeCount` count models. + - `:gamma`: Gamma deviance. Adapted to regression problem on Gamma like, positively distributed targets. + - `:tweedie`: Tweedie deviance. Adapted to regression problem on Tweedie like, positively distributed targets with probability mass at `y == 0`. - `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped. - `print_every_n`: sets at which frequency logging info should be printed. - `verbosity`: set to 1 to print logging info during training. +- `fnames`: the names of the `x_train` features. If provided, should be a vector of string with `length(fnames) = size(x_train, 2)`. """ -function fit_evotree(params::EvoTypes{L,T,S}; - x_train::AbstractMatrix, y_train::AbstractVector, w_train=nothing, offset_train=nothing, - x_eval=nothing, y_eval=nothing, w_eval=nothing, offset_eval=nothing, - metric=nothing, early_stopping_rounds=9999, print_every_n=9999, verbosity=1, fnames=nothing) where {L,T,S} +function fit_evotree( + params::EvoTypes{L,T,S}; + x_train::AbstractMatrix, + y_train::AbstractVector, + w_train = nothing, + offset_train = nothing, + x_eval = nothing, + y_eval = nothing, + w_eval = nothing, + offset_eval = nothing, + metric = nothing, + early_stopping_rounds = 9999, + print_every_n = 9999, + verbosity = 1, + fnames = nothing, +) where {L,T,S} nrounds_max = params.nrounds params.nrounds = 0 iter_since_best = 0 if params.device == "gpu" - model, cache = init_evotree_gpu(params, x_train, y_train, w_train, offset_train; fnames) + model, cache = + init_evotree_gpu(params; x_train, y_train, w_train, offset_train, fnames) else - model, cache = init_evotree(params, x_train, y_train, w_train, offset_train; fnames) + model, cache = init_evotree(params; x_train, y_train, w_train, offset_train, fnames) end if !isnothing(offset_eval) L == Logistic && (offset_eval .= logit.(offset_eval)) L in [Poisson, Gamma, Tweedie] && (offset_eval .= log.(offset_eval)) L == Softmax && (offset_eval .= log.(offset_eval)) - L == Gaussian && (offset_eval[:, 2] .= log.(offset_eval[:, 2])) + L in [GaussianDist, LogisticDist] && (offset_eval[:, 2] .= log.(offset_eval[:, 2])) offset_eval = T.(offset_eval) end + !isnothing(metric) ? metric = Symbol(metric) : nothing if !isnothing(metric) && !isnothing(x_eval) && !isnothing(y_eval) if params.device == "gpu" x_eval = CuArray(T.(x_eval)) - y_eval = CuArray(eltype(cache.Y).(y_eval)) + y_eval = CuArray(eltype(cache.y).(y_eval)) w_eval = isnothing(w_eval) ? CUDA.ones(T, size(y_eval)) : CuArray(T.(w_eval)) p_eval = predict(model.trees[1], x_eval, model.K) !isnothing(offset_eval) && (p_eval .+= CuArray(offset_eval')) eval_vec = CUDA.zeros(T, size(y_eval, 1)) else # params.device == "cpu" x_eval = T.(x_eval) - y_eval = eltype(cache.Y).(y_eval) + y_eval = eltype(cache.y).(y_eval) w_eval = isnothing(w_eval) ? ones(T, size(y_eval)) : T.(w_eval) p_eval = predict(model.trees[1], x_eval, model.K) !isnothing(offset_eval) && (p_eval .+= offset_eval') @@ -286,9 +361,11 @@ function fit_evotree(params::EvoTypes{L,T,S}; # initialize metric metric_track = Metric() metric_best = Metric() - metric_track.metric = eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha) - tracker = (iter=[0], metric=[metric_track.metric]) - @info "Initial tracking info" iter = model.params.nrounds metric = metric_track.metric + metric_track.metric = + eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha) + tracker = (iter = [0], metric = [metric_track.metric]) + @info "Initial tracking info" iter = model.params.nrounds metric = + metric_track.metric end while model.params.nrounds < nrounds_max && iter_since_best < early_stopping_rounds @@ -297,7 +374,8 @@ function fit_evotree(params::EvoTypes{L,T,S}; # callback function if !isnothing(metric) && !isnothing(x_eval) && !isnothing(y_eval) predict!(p_eval, model.trees[model.params.nrounds+1], x_eval, model.K) - metric_track.metric = eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha) + metric_track.metric = + eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha) if metric_track.metric < metric_best.metric metric_best.metric = metric_track.metric metric_best.iter = model.params.nrounds @@ -306,7 +384,8 @@ function fit_evotree(params::EvoTypes{L,T,S}; iter_since_best += 1 end if model.params.nrounds % print_every_n == 0 && verbosity > 0 - @info "Tracking info" iter = model.params.nrounds metric = metric_track.metric + @info "Tracking info" iter = model.params.nrounds metric = + metric_track.metric end end # end of callback end diff --git a/src/gpu/find_split_gpu.jl b/src/gpu/find_split_gpu.jl index 58278ffb..833d8749 100644 --- a/src/gpu/find_split_gpu.jl +++ b/src/gpu/find_split_gpu.jl @@ -111,7 +111,7 @@ function update_hist_gpu!( X_bin::CuMatrix{UInt8}, 𝑖::CuVector{S}, 𝑗::CuVector{S}, K; - MAX_THREADS=128) where {L<:GaussianRegression,T,S} + MAX_THREADS=128) where {L<:MLE2P,T,S} nbins = size(h, 2) thread_i = max(nbins, min(MAX_THREADS, length(𝑖))) @@ -237,8 +237,10 @@ function hist_gains_gpu_kernel!(gains::CuDeviceMatrix{T}, hL::CuDeviceArray{T,3} if i == nbins gains[i, j] = hL[1, i, j]^2 / (hL[2, i, j] + lambda * hL[3, i, j]) / 2 elseif hL[3, i, j] > min_weight && hR[3, i, j] > min_weight - predL = -hL[1, i, j] / (hL[2, i, j] + lambda * hL[3, i, j]) - predR = -hR[1, i, j] / (hR[2, i, j] + lambda * hR[3, i, j]) + if monotone_constraint != 0 + predL = -hL[1, i, j] / (hL[2, i, j] + lambda * hL[3, i, j]) + predR = -hR[1, i, j] / (hR[2, i, j] + lambda * hR[3, i, j]) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) @@ -253,13 +255,13 @@ end """ update_gains! - GaussianRegression + MLE2P """ function update_gains_gpu!( node::TrainNodeGPU, 𝑗::AbstractVector, params::EvoTypes{L,T,S}, K, monotone_constraints; - MAX_THREADS=512) where {L<:GaussianRegression,T,S} + MAX_THREADS=512) where {L<:MLE2P,T,S} cumsum!(node.hL, node.h, dims=2) node.hR .= view(node.hL, :, params.nbins:params.nbins, :) .- node.hL @@ -281,8 +283,10 @@ function hist_gains_gpu_kernel_gauss!(gains::CuDeviceMatrix{T}, hL::CuDeviceArra if i == nbins gains[i, j] = (hL[1, i, j]^2 / (hL[3, i, j] + lambda * hL[5, i, j]) + hL[2, i, j]^2 / (hL[4, i, j] + lambda * hL[5, i, j])) / 2 elseif hL[5, i, j] > min_weight && hR[5, i, j] > min_weight - predL = -hL[1, i, j] / (hL[3, i, j] + lambda * hL[5, i, j]) - predR = -hR[1, i, j] / (hR[3, i, j] + lambda * hR[5, i, j]) + if monotone_constraint != 0 + predL = -hL[1, i, j] / (hL[3, i, j] + lambda * hL[5, i, j]) + predR = -hR[1, i, j] / (hR[3, i, j] + lambda * hR[5, i, j]) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) diff --git a/src/gpu/fit_gpu.jl b/src/gpu/fit_gpu.jl index da6c33d4..d26cb3ed 100644 --- a/src/gpu/fit_gpu.jl +++ b/src/gpu/fit_gpu.jl @@ -1,80 +1,97 @@ -function init_evotree_gpu(params::EvoTypes{L,T,S}, - X::AbstractMatrix, Y::AbstractVector, W=nothing, offset=nothing; fnames) where {L,T,S} +function init_evotree_gpu( + params::EvoTypes{L,T,S}; + x_train::AbstractMatrix, + y_train::AbstractVector, + w_train = nothing, + offset_train = nothing, + fnames = nothing, +) where {L,T,S} K = 1 levels = nothing - X = convert(Matrix{T}, X) + x = convert(Matrix{T}, x_train) + offset = !isnothing(offset_train) ? T.(offset_train) : nothing if L == Logistic - Y = CuArray(T.(Y)) - ΞΌ = [logit(mean(Y))] + y = CuArray(T.(y_train)) + ΞΌ = [logit(mean(y))] !isnothing(offset) && (offset .= logit.(offset)) elseif L ∈ [Poisson, Gamma, Tweedie] - Y = CuArray(T.(Y)) - ΞΌ = fill(log(mean(Y)), 1) + y = CuArray(T.(y_train)) + ΞΌ = fill(log(mean(y)), 1) !isnothing(offset) && (offset .= log.(offset)) - elseif L == Gaussian + elseif L == GaussianDist K = 2 - Y = CuArray(T.(Y)) - ΞΌ = [mean(Y), log(std(Y))] + y = CuArray(T.(y_train)) + ΞΌ = [mean(y), log(std(y))] !isnothing(offset) && (offset[:, 2] .= log.(offset[:, 2])) else - Y = CuArray(T.(Y)) - ΞΌ = [mean(Y)] + y = CuArray(T.(y_train)) + ΞΌ = [mean(y)] end # force a neutral bias/initial tree when offset is specified !isnothing(offset) && (ΞΌ .= 0) # initialize preds - X_size = size(X) - pred = CUDA.zeros(T, K, X_size[1]) + x_size = size(x) + pred = CUDA.zeros(T, K, x_size[1]) pred .= CuArray(ΞΌ) !isnothing(offset) && (pred .+= CuArray(offset')) # init GBTree bias = [TreeGPU{L,T}(CuArray(ΞΌ))] - fnames = isnothing(fnames) ? ["feat_$i" for i in axes(X, 2)] : string.(fnames) - @assert length(fnames) == size(X, 2) + fnames = isnothing(fnames) ? ["feat_$i" for i in axes(x, 2)] : string.(fnames) + @assert length(fnames) == size(x, 2) info = Dict(:fnames => fnames, :levels => levels) evotree = GBTreeGPU{L,T,S}(bias, params, Metric(), K, info) # initialize gradients and weights - δ𝑀 = CUDA.zeros(T, 2 * K + 1, X_size[1]) - W = isnothing(W) ? CUDA.ones(T, size(Y)) : CuVector{T}(W) - @assert (length(Y) == length(W) && minimum(W) > 0) - δ𝑀[end, :] .= W + δ𝑀 = CUDA.zeros(T, 2 * K + 1, x_size[1]) + w = isnothing(w_train) ? CUDA.ones(T, size(y)) : CuVector{T}(w_train) + @assert (length(y) == length(w) && minimum(w) > 0) + δ𝑀[end, :] .= w # binarize data into quantiles - edges = get_edges(X, params.nbins) - X_bin = CuArray(binarize(X, edges)) + edges = get_edges(x, params.nbins) + x_bin = CuArray(binarize(x, edges)) - 𝑖_ = UInt32.(collect(1:X_size[1])) - 𝑗_ = UInt32.(collect(1:X_size[2])) - 𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * X_size[2])) + 𝑖_ = UInt32.(collect(1:x_size[1])) + 𝑗_ = UInt32.(collect(1:x_size[2])) + 𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * x_size[2])) # initializde histograms - nodes = [TrainNodeGPU(X_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1] - nodes[1].𝑖 = CUDA.zeros(eltype(𝑖_), ceil(Int, params.rowsample * X_size[1])) + nodes = [TrainNodeGPU(x_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1] + nodes[1].𝑖 = CUDA.zeros(eltype(𝑖_), ceil(Int, params.rowsample * x_size[1])) out = CUDA.zeros(UInt32, length(nodes[1].𝑖)) left = CUDA.zeros(UInt32, length(nodes[1].𝑖)) right = CUDA.zeros(UInt32, length(nodes[1].𝑖)) # assign monotone contraints in constraints vector - monotone_constraints = zeros(Int32, X_size[2]) - hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints + monotone_constraints = zeros(Int32, x_size[2]) + hasproperty(params, :monotone_constraints) && for (k, v) in params.monotone_constraints monotone_constraints[k] = v end # store cache - cache = (params=deepcopy(params), - X=CuArray(X), X_bin=X_bin, Y=Y, K=K, - nodes=nodes, - pred=pred, - 𝑖_=𝑖_, 𝑗_=𝑗_, 𝑗=𝑗, 𝑖=Array(nodes[1].𝑖), - out=out, left=left, right=right, - δ𝑀=δ𝑀, - edges=edges, - monotone_constraints=CuArray(monotone_constraints)) + cache = ( + params = deepcopy(params), + x = CuArray(x), + x_bin = x_bin, + y = y, + K = K, + nodes = nodes, + pred = pred, + 𝑖_ = 𝑖_, + 𝑗_ = 𝑗_, + 𝑗 = 𝑗, + 𝑖 = Array(nodes[1].𝑖), + out = out, + left = left, + right = right, + δ𝑀 = δ𝑀, + edges = edges, + monotone_constraints = CuArray(monotone_constraints), + ) cache.params.nrounds = 0 @@ -86,27 +103,39 @@ function grow_evotree!(evotree::GBTreeGPU{L,T,S}, cache) where {L,T,S} # initialize from cache params = evotree.params - X_size = size(cache.X_bin) Ξ΄nrounds = params.nrounds - cache.params.nrounds # loop over nrounds for i = 1:Ξ΄nrounds # select random rows and cols - sample!(params.rng, cache.𝑖_, cache.𝑖, replace=false, ordered=true) - sample!(params.rng, cache.𝑗_, cache.𝑗, replace=false, ordered=true) + sample!(params.rng, cache.𝑖_, cache.𝑖, replace = false, ordered = true) + sample!(params.rng, cache.𝑗_, cache.𝑗, replace = false, ordered = true) cache.nodes[1].𝑖 .= CuArray(cache.𝑖) # build a new tree - update_grads_gpu!(L, cache.δ𝑀, cache.pred, cache.Y) + update_grads_gpu!(L, cache.δ𝑀, cache.pred, cache.y) # # assign a root and grow tree tree = TreeGPU{L,T}(params.max_depth, evotree.K, zero(T)) - grow_tree_gpu!(tree, cache.nodes, params, cache.δ𝑀, cache.edges, CuVector(cache.𝑗), cache.out, cache.left, cache.right, cache.X_bin, cache.K, cache.monotone_constraints) + grow_tree_gpu!( + tree, + cache.nodes, + params, + cache.δ𝑀, + cache.edges, + CuVector(cache.𝑗), + cache.out, + cache.left, + cache.right, + cache.x_bin, + cache.K, + cache.monotone_constraints, + ) push!(evotree.trees, tree) # update predctions - predict!(cache.pred, tree, cache.X, cache.K) + predict!(cache.pred, tree, cache.x, cache.K) end # end of nrounds cache.params.nrounds = params.nrounds - # return model, cache + CUDA.reclaim() return evotree end @@ -117,8 +146,14 @@ function grow_tree_gpu!( params::EvoTypes{L,T,S}, δ𝑀::AbstractMatrix, edges, - 𝑗, out, left, right, - X_bin::AbstractMatrix, K, monotone_constraints) where {L,T,S} + 𝑗, + out, + left, + right, + x_bin::AbstractMatrix, + K, + monotone_constraints, +) where {L,T,S} n_next = [1] n_current = copy(n_next) @@ -133,7 +168,7 @@ function grow_tree_gpu!( end # initialize summary stats - nodes[1].βˆ‘ .= vec(sum(δ𝑀[:, nodes[1].𝑖], dims=2)) + nodes[1].βˆ‘ .= vec(sum(δ𝑀[:, nodes[1].𝑖], dims = 2)) nodes[1].gain = get_gain(L, Array(nodes[1].βˆ‘), params.lambda, K) # should use a GPU version? # grow while there are remaining active nodes - TO DO histogram substraction hits issue on GPU @@ -151,14 +186,15 @@ function grow_tree_gpu!( CUDA.synchronize() end else - update_hist_gpu!(L, nodes[n].h, δ𝑀, X_bin, nodes[n].𝑖, 𝑗, K) + update_hist_gpu!(L, nodes[n].h, δ𝑀, x_bin, nodes[n].𝑖, 𝑗, K) end end end # grow while there are remaining active nodes for n ∈ sort(n_current) - if depth == params.max_depth || @allowscalar(nodes[n].βˆ‘[end] <= params.min_weight) + if depth == params.max_depth || + @allowscalar(nodes[n].βˆ‘[end] <= params.min_weight) pred_leaf_gpu!(tree.pred, n, Array(nodes[n].βˆ‘), params) else update_gains_gpu!(nodes[n], 𝑗, params, K, monotone_constraints) @@ -177,14 +213,24 @@ function grow_tree_gpu!( pred_leaf_gpu!(tree.pred, n, Array(nodes[n].βˆ‘), params) popfirst!(n_next) else - _left, _right = split_set_threads_gpu!(out, left, right, @allowscalar(nodes[n]).𝑖, X_bin, @allowscalar(tree.feat[n]), @allowscalar(tree.cond_bin[n]), offset) + _left, _right = split_set_threads_gpu!( + out, + left, + right, + @allowscalar(nodes[n]).𝑖, + x_bin, + @allowscalar(tree.feat[n]), + @allowscalar(tree.cond_bin[n]), + offset, + ) nodes[n<<1].𝑖, nodes[n<<1+1].𝑖 = _left, _right offset += length(nodes[n].𝑖) # println("length(_left): ", length(_left), " | length(_right): ", length(_right)) # println("best: ", best) update_childs_βˆ‘_gpu!(L, nodes, n, best[2][1], best[2][2]) nodes[n<<1].gain = get_gain(L, Array(nodes[n<<1].βˆ‘), params.lambda, K) - nodes[n<<1+1].gain = get_gain(L, Array(nodes[n<<1+1].βˆ‘), params.lambda, K) + nodes[n<<1+1].gain = + get_gain(L, Array(nodes[n<<1+1].βˆ‘), params.lambda, K) if length(_right) >= length(_left) push!(n_next, n << 1) diff --git a/src/gpu/loss_gpu.jl b/src/gpu/loss_gpu.jl index 6712bdfd..e6de3f64 100644 --- a/src/gpu/loss_gpu.jl +++ b/src/gpu/loss_gpu.jl @@ -4,8 +4,8 @@ function get_gain_gpu(::Type{L}, βˆ‘::AbstractVector{T}, lambda::T) where {L<:Gr return gain end -# Gaussian regression -function get_gain_gpu(::Type{L}, βˆ‘::AbstractVector{T}, lambda::T) where {L<:GaussianRegression,T<:AbstractFloat} +# MLE regression +function get_gain_gpu(::Type{L}, βˆ‘::AbstractVector{T}, lambda::T) where {L<:MLE2P,T<:AbstractFloat} gain = βˆ‘[1]^2 / (βˆ‘[3] + lambda * βˆ‘[5]) / 2 + βˆ‘[2]^2 / (βˆ‘[4] + lambda * βˆ‘[5]) / 2 return gain end @@ -129,7 +129,7 @@ function kernel_gauss_δ𝑀!(δ𝑀::CuDeviceMatrix, p::CuDeviceMatrix, y::CuDe return end -function update_grads_gpu!(::Type{Gaussian}, δ𝑀::CuMatrix, p::CuMatrix, y::CuVector; MAX_THREADS=1024) +function update_grads_gpu!(::Type{GaussianDist}, δ𝑀::CuMatrix, p::CuMatrix, y::CuVector; MAX_THREADS=1024) threads = min(MAX_THREADS, length(y)) blocks = ceil(Int, (length(y)) / threads) @cuda blocks = blocks threads = threads kernel_gauss_δ𝑀!(δ𝑀, p, y) diff --git a/src/gpu/predict_gpu.jl b/src/gpu/predict_gpu.jl index e7557661..ec0672d1 100644 --- a/src/gpu/predict_gpu.jl +++ b/src/gpu/predict_gpu.jl @@ -52,9 +52,9 @@ end """ predict_kernel! - GaussianRegression + MLE2P """ -function predict_kernel!(::Type{L}, pred::AbstractMatrix{T}, split, feat, cond_float, leaf_pred::AbstractMatrix{T}, X::CuDeviceMatrix, K) where {L<:GaussianRegression,T} +function predict_kernel!(::Type{L}, pred::AbstractMatrix{T}, split, feat, cond_float, leaf_pred::AbstractMatrix{T}, X::CuDeviceMatrix, K) where {L<:MLE2P,T} idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x nid = 1 @inbounds if idx <= size(pred, 2) @@ -97,7 +97,7 @@ function predict(model::GBTreeGPU{L,T,S}, X::AbstractMatrix) where {L,T,S} pred .= sigmoid.(pred) elseif L ∈ [Poisson, Gamma, Tweedie] pred .= exp.(pred) - elseif L == Gaussian + elseif L == GaussianDist pred[2, :] .= exp.(pred[2, :]) elseif L == Softmax pred = transpose(reshape(pred, model.K, :)) @@ -119,12 +119,12 @@ function pred_scalar_gpu!(βˆ‘::AbstractVector{T}, lambda) where {L<:GradientRegr @allowscalar(-βˆ‘[1] / (βˆ‘[2] + lambda * βˆ‘[3])) end -# prediction in Leaf - Gaussian -function pred_leaf_gpu!(p::AbstractMatrix{T}, n, βˆ‘::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:GaussianRegression,T,S} +# prediction in Leaf - MLE2P +function pred_leaf_gpu!(p::AbstractMatrix{T}, n, βˆ‘::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:MLE2P,T,S} @allowscalar(p[1, n] = -params.eta * βˆ‘[1] / (βˆ‘[3] + params.lambda * βˆ‘[5])) @allowscalar(p[2, n] = -params.eta * βˆ‘[2] / (βˆ‘[4] + params.lambda * βˆ‘[5])) return nothing end -function pred_scalar_gpu!(βˆ‘::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:GradientRegression,T,S} +function pred_scalar_gpu!(βˆ‘::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:MLE2P,T,S} @allowscalar(-params.eta * βˆ‘[1] / (βˆ‘[3] + params.lambda * βˆ‘[5])) end \ No newline at end of file diff --git a/src/importance.jl b/src/importance.jl index 4d47152e..0e2ee95a 100644 --- a/src/importance.jl +++ b/src/importance.jl @@ -10,6 +10,7 @@ end importance(model::GBTree) Sorted normalized feature importance based on loss function gain. +Feature names associated to the model are stored in `model.info[:fnames]` as a string `Vector` and can be updated at any time. Eg: `model.info[:fnames] = new_fnames_vec`. """ function importance(model::Union{GBTree,GBTreeGPU}) fnames = model.info[:fnames] @@ -22,7 +23,7 @@ function importance(model::Union{GBTree,GBTreeGPU}) gain .= gain ./ sum(gain) pairs = collect(Dict(zip(string.(fnames), gain))) - sort!(pairs, by=x -> -x[2]) + sort!(pairs, by = x -> -x[2]) return pairs end diff --git a/src/loss.jl b/src/loss.jl index f195aba4..a211262c 100644 --- a/src/loss.jl +++ b/src/loss.jl @@ -39,21 +39,24 @@ function update_grads!(::Type{Tweedie}, δ𝑀::Matrix, p::Matrix, y::Vector; kw @inbounds for i in eachindex(y) pred = exp(p[1, i]) δ𝑀[1, i] = 2 * (pred^(2 - rho) - y[i] * pred^(1 - rho)) * δ𝑀[3, i] - δ𝑀[2, i] = 2 * ((2 - rho) * pred^(2 - rho) - (1 - rho) * y[i] * pred^(1 - rho)) * δ𝑀[3, i] + δ𝑀[2, i] = + 2 * ((2 - rho) * pred^(2 - rho) - (1 - rho) * y[i] * pred^(1 - rho)) * δ𝑀[3, i] end end # L1 function update_grads!(::Type{L1}, δ𝑀::Matrix, p::Matrix, y::Vector; alpha, kwargs...) @inbounds for i in eachindex(y) - δ𝑀[1, i] = (alpha * max(y[i] - p[1, i], 0) - (1 - alpha) * max(p[1, i] - y[i], 0)) * δ𝑀[3, i] + δ𝑀[1, i] = + (alpha * max(y[i] - p[1, i], 0) - (1 - alpha) * max(p[1, i] - y[i], 0)) * + δ𝑀[3, i] end end # Softmax function update_grads!(::Type{Softmax}, δ𝑀::Matrix, p::Matrix, y::Vector; kwargs...) - p .= p .- maximum(p, dims=1) - sums = sum(exp.(p), dims=1) + p .= p .- maximum(p, dims = 1) + sums = sum(exp.(p), dims = 1) K = (size(δ𝑀, 1) - 1) Γ· 2 for i in eachindex(y) for k = 1:K @@ -79,14 +82,42 @@ end # Gaussian - http://jrmeyer.github.io/machinelearning/2017/08/18/mle.html # pred[i][1] = ΞΌ # pred[i][2] = log(Οƒ) -function update_grads!(::Type{Gaussian}, δ𝑀::Matrix, p::Matrix, y::Vector; kwargs...) +function update_grads!(::Type{GaussianDist}, δ𝑀::Matrix, p::Matrix, y::Vector; kwargs...) @inbounds @simd for i in eachindex(y) # first order δ𝑀[1, i] = (p[1, i] - y[i]) / exp(2 * p[2, i]) * δ𝑀[5, i] δ𝑀[2, i] = (1 - (p[1, i] - y[i])^2 / exp(2 * p[2, i])) * δ𝑀[5, i] # second order δ𝑀[3, i] = δ𝑀[5, i] / exp(2 * p[2, i]) - δ𝑀[4, i] = 2 * δ𝑀[5, i] / exp(2 * p[2, i]) * (p[1, i] - y[i])^2 + δ𝑀[4, i] = δ𝑀[5, i] * 2 / exp(2 * p[2, i]) * (p[1, i] - y[i])^2 + end +end + +# LogisticProb - https://en.wikipedia.org/wiki/Logistic_distribution +# pdf = +# pred[i][1] = ΞΌ +# pred[i][2] = log(s) +function update_grads!(::Type{LogisticDist}, δ𝑀::Matrix, p::Matrix, y::Vector; kwargs...) + Ο΅ = eltype(p)(2e-7) + @inbounds @simd for i in eachindex(y) + # first order + δ𝑀[1, i] = -tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) * exp(-p[2, i]) * δ𝑀[5, i] + δ𝑀[2, i] = + -( + exp(-p[2, i]) * + (y[i] - p[1, i]) * + tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) - 1 + ) * δ𝑀[5, i] + # second order + δ𝑀[3, i] = + sech((y[i] - p[1, i]) / (2 * exp(p[2, i])))^2 / (2 * exp(2 * p[2, i])) * + δ𝑀[5, i] + δ𝑀[4, i] = + ( + exp(-2 * p[2, i]) * + (p[1, i] - y[i]) * + (p[1, i] - y[i] + exp(p[2, i]) * sinh(exp(-p[2, i]) * (p[1, i] - y[i]))) + ) / (1 + cosh(exp(-p[2, i]) * (p[1, i] - y[i]))) * δ𝑀[5, i] end end @@ -116,17 +147,27 @@ end # get the gain metric ############################## # GradientRegression -function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:GradientRegression,T<:AbstractFloat} +function get_gain( + ::Type{L}, + βˆ‘::Vector{T}, + Ξ»::T, + K, +) where {L<:GradientRegression,T<:AbstractFloat} βˆ‘[1]^2 / (βˆ‘[2] + Ξ» * βˆ‘[3]) / 2 end # GaussianRegression -function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:GaussianRegression,T<:AbstractFloat} +function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:MLE2P,T<:AbstractFloat} (βˆ‘[1]^2 / (βˆ‘[3] + Ξ» * βˆ‘[5]) + βˆ‘[2]^2 / (βˆ‘[4] + Ξ» * βˆ‘[5])) / 2 end # MultiClassRegression -function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:MultiClassRegression,T<:AbstractFloat} +function get_gain( + ::Type{L}, + βˆ‘::Vector{T}, + Ξ»::T, + K, +) where {L<:MultiClassRegression,T<:AbstractFloat} gain = zero(T) @inbounds for k = 1:K gain += βˆ‘[k]^2 / (βˆ‘[k+K] + Ξ» * βˆ‘[2*K+1]) / 2 @@ -135,7 +176,12 @@ function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:MultiClassRegre end # QuantileRegression -function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:QuantileRegression,T<:AbstractFloat} +function get_gain( + ::Type{L}, + βˆ‘::Vector{T}, + Ξ»::T, + K, +) where {L<:QuantileRegression,T<:AbstractFloat} abs(βˆ‘[1]) end @@ -145,13 +191,20 @@ function get_gain(::Type{L}, βˆ‘::Vector{T}, Ξ»::T, K) where {L<:L1Regression,T< end -function update_childs_βˆ‘!(::Type{L}, nodes, n, bin, feat, K) where {L<:Union{GradientRegression,QuantileRegression,L1Regression}} +function update_childs_βˆ‘!( + ::Type{L}, + nodes, + n, + bin, + feat, + K, +) where {L<:Union{GradientRegression,QuantileRegression,L1Regression}} nodes[n<<1].βˆ‘ .= nodes[n].hL[feat][(3*bin-2):(3*bin)] nodes[n<<1+1].βˆ‘ .= nodes[n].hR[feat][(3*bin-2):(3*bin)] return nothing end -function update_childs_βˆ‘!(::Type{L}, nodes, n, bin, feat, K) where {L<:GaussianRegression} +function update_childs_βˆ‘!(::Type{L}, nodes, n, bin, feat, K) where {L<:MLE2P} nodes[n<<1].βˆ‘ .= nodes[n].hL[feat][(5*bin-4):(5*bin)] nodes[n<<1+1].βˆ‘ .= nodes[n].hR[feat][(5*bin-4):(5*bin)] return nothing diff --git a/src/models.jl b/src/models.jl index acb244cc..90959401 100644 --- a/src/models.jl +++ b/src/models.jl @@ -3,7 +3,7 @@ abstract type GradientRegression <: ModelType end abstract type L1Regression <: ModelType end abstract type QuantileRegression <: ModelType end abstract type MultiClassRegression <: ModelType end -abstract type GaussianRegression <: ModelType end +abstract type MLE2P <: ModelType end # 2-parameters max-likelihood struct Linear <: GradientRegression end struct Logistic <: GradientRegression end struct Poisson <: GradientRegression end @@ -12,7 +12,8 @@ struct Tweedie <: GradientRegression end struct L1 <: L1Regression end struct Quantile <: QuantileRegression end struct Softmax <: MultiClassRegression end -struct Gaussian <: GaussianRegression end +struct GaussianDist <: MLE2P end +struct LogisticDist <: MLE2P end # make a Random Number Generator object mk_rng(rng::Random.AbstractRNG) = rng @@ -29,9 +30,9 @@ mutable struct EvoTreeRegressor{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Det colsample::T nbins::S alpha::T - monotone_constraints - rng - device + monotone_constraints::Any + rng::Any + device::Any end function EvoTreeRegressor(; kwargs...) @@ -52,16 +53,18 @@ function EvoTreeRegressor(; kwargs...) :alpha => 0.5, :monotone_constraints => Dict{Int,Int}(), :rng => 123, - :device => "cpu" + :device => "cpu", ) args_ignored = setdiff(keys(kwargs), keys(args)) args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." args_default = setdiff(keys(args), keys(kwargs)) args_default_str = join(args_default, ", ") - length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." args_override = intersect(keys(args), keys(kwargs)) for arg in args_override @@ -85,7 +88,9 @@ function EvoTreeRegressor(; kwargs...) elseif args[:loss] == :quantile L = Quantile else - error("Invalid loss: $(args[:loss]). Only [`:linear`, `:logistic`, `:L1`, `:quantile`] are supported at the moment by EvoTreeRegressor.") + error( + "Invalid loss: $(args[:loss]). Only [`:linear`, `:logistic`, `:L1`, `:quantile`] are supported at the moment by EvoTreeRegressor.", + ) end model = EvoTreeRegressor{L,T,Int}( @@ -101,7 +106,8 @@ function EvoTreeRegressor(; kwargs...) T(args[:alpha]), args[:monotone_constraints], args[:rng], - args[:device]) + args[:device], + ) return model end @@ -118,9 +124,9 @@ mutable struct EvoTreeCount{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabi colsample::T nbins::S alpha::T - monotone_constraints - rng - device + monotone_constraints::Any + rng::Any + device::Any end function EvoTreeCount(; kwargs...) @@ -140,16 +146,18 @@ function EvoTreeCount(; kwargs...) :alpha => 0.5, :monotone_constraints => Dict{Int,Int}(), :rng => 123, - :device => "cpu" + :device => "cpu", ) args_ignored = setdiff(keys(kwargs), keys(args)) args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." args_default = setdiff(keys(args), keys(kwargs)) args_default_str = join(args_default, ", ") - length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." args_override = intersect(keys(args), keys(kwargs)) for arg in args_override @@ -173,7 +181,8 @@ function EvoTreeCount(; kwargs...) T(args[:alpha]), args[:monotone_constraints], args[:rng], - args[:device]) + args[:device], + ) return model end @@ -189,8 +198,8 @@ mutable struct EvoTreeClassifier{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Pr colsample::T nbins::S alpha::T - rng - device + rng::Any + device::Any end function EvoTreeClassifier(; kwargs...) @@ -209,16 +218,18 @@ function EvoTreeClassifier(; kwargs...) :nbins => 32, :alpha => 0.5, :rng => 123, - :device => "cpu" + :device => "cpu", ) args_ignored = setdiff(keys(kwargs), keys(args)) args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." args_default = setdiff(keys(args), keys(kwargs)) args_default_str = join(args_default, ", ") - length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." args_override = intersect(keys(args), keys(kwargs)) for arg in args_override @@ -241,12 +252,13 @@ function EvoTreeClassifier(; kwargs...) args[:nbins], T(args[:alpha]), args[:rng], - args[:device]) + args[:device], + ) return model end -mutable struct EvoTreeGaussian{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabilistic +mutable struct EvoTreeMLE{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabilistic nrounds::S lambda::T gamma::T @@ -257,11 +269,96 @@ mutable struct EvoTreeGaussian{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Prob colsample::T nbins::S alpha::T - monotone_constraints - rng - device + monotone_constraints::Any + rng::Any + device::Any +end + +function EvoTreeMLE(; kwargs...) + + # defaults arguments + args = Dict{Symbol,Any}( + :T => Float64, + :loss => :gaussian, + :nrounds => 10, + :lambda => 0.0, + :gamma => 0.0, # min gain to split + :eta => 0.1, # learning rate + :max_depth => 5, + :min_weight => 1.0, # minimal weight, different from xgboost (but same for linear) + :rowsample => 1.0, + :colsample => 1.0, + :nbins => 32, + :alpha => 0.5, + :monotone_constraints => Dict{Int,Int}(), + :rng => 123, + :device => "cpu", + ) + + args_ignored = setdiff(keys(kwargs), keys(args)) + args_ignored_str = join(args_ignored, ", ") + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + + args_default = setdiff(keys(args), keys(kwargs)) + args_default_str = join(args_default, ", ") + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + + args_override = intersect(keys(args), keys(kwargs)) + for arg in args_override + args[arg] = kwargs[arg] + end + + args[:rng] = mk_rng(args[:rng])::Random.AbstractRNG + args[:loss] = Symbol(args[:loss]) + T = args[:T] + + if args[:loss] in [:gaussian, :normal] + L = GaussianDist + elseif args[:loss] == :logistic + L = LogisticDist + else + error( + "Invalid loss: $(args[:loss]). Only `:normal`, `:gaussian` and `:logistic` are supported at the moment by EvoTreeMLE.", + ) + end + + model = EvoTreeMLE{L,T,Int}( + args[:nrounds], + T(args[:lambda]), + T(args[:gamma]), + T(args[:eta]), + args[:max_depth], + T(args[:min_weight]), + T(args[:rowsample]), + T(args[:colsample]), + args[:nbins], + T(args[:alpha]), + args[:monotone_constraints], + args[:rng], + args[:device], + ) + + return model end + +mutable struct EvoTreeGaussian{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabilistic + nrounds::S + lambda::T + gamma::T + eta::T + max_depth::S + min_weight::T # real minimum number of observations, different from xgboost (but same for linear) + rowsample::T # subsample + colsample::T + nbins::S + alpha::T + monotone_constraints::Any + rng::Any + device::Any +end function EvoTreeGaussian(; kwargs...) # defaults arguments @@ -279,16 +376,18 @@ function EvoTreeGaussian(; kwargs...) :alpha => 0.5, :monotone_constraints => Dict{Int,Int}(), :rng => 123, - :device => "cpu" + :device => "cpu", ) args_ignored = setdiff(keys(kwargs), keys(args)) args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." args_default = setdiff(keys(args), keys(kwargs)) args_default_str = join(args_default, ", ") - length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." args_override = intersect(keys(args), keys(kwargs)) for arg in args_override @@ -296,7 +395,7 @@ function EvoTreeGaussian(; kwargs...) end args[:rng] = mk_rng(args[:rng])::Random.AbstractRNG - L = Gaussian + L = GaussianDist T = args[:T] model = EvoTreeGaussian{L,T,Int}( @@ -312,10 +411,17 @@ function EvoTreeGaussian(; kwargs...) T(args[:alpha]), args[:monotone_constraints], args[:rng], - args[:device]) + args[:device], + ) return model end # const EvoTypes = Union{EvoTreeRegressor,EvoTreeCount,EvoTreeClassifier,EvoTreeGaussian} -const EvoTypes{L,T,S} = Union{EvoTreeRegressor{L,T,S},EvoTreeCount{L,T,S},EvoTreeClassifier{L,T,S},EvoTreeGaussian{L,T,S}} +const EvoTypes{L,T,S} = Union{ + EvoTreeRegressor{L,T,S}, + EvoTreeCount{L,T,S}, + EvoTreeClassifier{L,T,S}, + EvoTreeGaussian{L,T,S}, + EvoTreeMLE{L,T,S}, +} diff --git a/src/plot.jl b/src/plot.jl index e99cefd4..0f04dcaf 100644 --- a/src/plot.jl +++ b/src/plot.jl @@ -20,16 +20,21 @@ function get_adj_list(tree::EvoTrees.Tree) push!(adj, []) end end - return (map=map, adj=adj) + return (map = map, adj = adj) end function get_shapes(tree_layout) shapes = Vector(undef, length(tree_layout)) - for i = 1:length(tree_layout) + for i = eachindex(tree_layout) x, y = tree_layout[i][1], tree_layout[i][2] # center point x_buff = 0.45 y_buff = 0.45 - shapes[i] = [(x - x_buff, y + y_buff), (x + x_buff, y + y_buff), (x + x_buff, y - y_buff), (x - x_buff, y - y_buff)] + shapes[i] = [ + (x - x_buff, y + y_buff), + (x + x_buff, y + y_buff), + (x + x_buff, y - y_buff), + (x - x_buff, y - y_buff), + ] end return shapes end @@ -37,13 +42,15 @@ end function get_annotations(tree_layout, map, tree, var_names) # annotations = Vector{Tuple{Float64, Float64, String, Tuple}}(undef, length(tree_layout)) annotations = [] - for i = 1:length(tree_layout) + for i = eachindex(tree_layout) x, y = tree_layout[i][1], tree_layout[i][2] # center point if tree.split[map[i]] - feat = isnothing(var_names) ? "feat: " * string(tree.feat[map[i]]) : var_names[tree.feat[map[i]]] - txt = "$feat\n" * string(round(tree.cond_float[map[i]], sigdigits=3)) + feat = + isnothing(var_names) ? "feat: " * string(tree.feat[map[i]]) : + var_names[tree.feat[map[i]]] + txt = "$feat\n" * string(round(tree.cond_float[map[i]], sigdigits = 3)) else - txt = "pred:\n" * string(round(tree.pred[1, map[i]], sigdigits=3)) + txt = "pred:\n" * string(round(tree.pred[1, map[i]], sigdigits = 3)) end # annotations[i] = (x, y, txt, (9, :white, "helvetica")) push!(annotations, (x, y, txt, 10)) @@ -54,16 +61,22 @@ end function get_curves(adj, tree_layout, shapes) curves = [] num_curves = sum(length.(adj)) - for i = 1:length(adj) - for j = 1:length(adj[i]) + for i = eachindex(adj) + for j = eachindex(adj[i]) # curves is a length 2 tuple: (vector Xs, vector Ys) - push!(curves, ([tree_layout[i][1], tree_layout[adj[i][j]][1]], [shapes[i][3][2], shapes[adj[i][j]][1][2]])) + push!( + curves, + ( + [tree_layout[i][1], tree_layout[adj[i][j]][1]], + [shapes[i][3][2], shapes[adj[i][j]][1][2]], + ), + ) end end return curves end -@recipe function plot(tree::EvoTrees.Tree, var_names=nothing) +@recipe function plot(tree::EvoTrees.Tree, var_names = nothing) map, adj = EvoTrees.get_adj_list(tree) tree_layout = length(adj) == 1 ? [[0.0, 0.0]] : NetworkLayout.buchheim(adj) @@ -82,7 +95,7 @@ end size --> size annotations --> annotations - for i = 1:length(shapes) + for i = eachindex(shapes) @series begin fillcolor = length(adj[i]) == 0 ? "#84DCC6" : "#C8D3D5" fillcolor --> fillcolor @@ -91,7 +104,7 @@ end end end - for i = 1:length(curves) + for i = eachindex(curves) @series begin seriestype --> :curves return curves[i] @@ -99,7 +112,7 @@ end end end -@recipe function plot(model::EvoTrees.GBTree, n=1, var_names=nothing) +@recipe function plot(model::EvoTrees.GBTree, n = 1, var_names = nothing) isnothing(var_names) @@ -121,7 +134,7 @@ end size --> size annotations --> annotations - for i = 1:length(shapes) + for i = eachindex(shapes) @series begin fillcolor = length(adj[i]) == 0 ? "#84DCC6" : "#C8D3D5" fillcolor --> fillcolor @@ -130,7 +143,7 @@ end end end - for i = 1:length(curves) + for i = eachindex(curves) @series begin seriestype --> :curves return curves[i] diff --git a/src/predict.jl b/src/predict.jl index 8160a429..8f384799 100644 --- a/src/predict.jl +++ b/src/predict.jl @@ -2,7 +2,8 @@ function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:GradientRegress @inbounds @threads for i in axes(X, 1) nid = 1 @inbounds while tree.split[nid] - X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1 + X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : + nid = nid << 1 + 1 end @inbounds pred[1, i] += tree.pred[1, nid] end @@ -13,18 +14,20 @@ function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:Logistic,T} @inbounds @threads for i in axes(X, 1) nid = 1 @inbounds while tree.split[nid] - X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1 + X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : + nid = nid << 1 + 1 end @inbounds pred[1, i] = clamp(pred[1, i] + tree.pred[1, nid], -15, 15) end return nothing end -function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:GaussianRegression,T} +function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:MLE2P,T} @inbounds @threads for i in axes(X, 1) nid = 1 @inbounds while tree.split[nid] - X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1 + X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : + nid = nid << 1 + 1 end @inbounds pred[1, i] += tree.pred[1, nid] @inbounds pred[2, i] = max(-15, pred[2, i] + tree.pred[2, nid]) @@ -41,7 +44,8 @@ function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L,T} @inbounds @threads for i in axes(X, 1) nid = 1 @inbounds while tree.split[nid] - X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1 + X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : + nid = nid << 1 + 1 end @inbounds for k = 1:K pred[k, i] += tree.pred[k, nid] @@ -75,7 +79,7 @@ function predict(model::GBTree{L,T,S}, X::AbstractMatrix) where {L,T,S} pred .= sigmoid.(pred) elseif L ∈ [Poisson, Gamma, Tweedie] pred .= exp.(pred) - elseif L == Gaussian + elseif L in [GaussianDist, LogisticDist] pred[2, :] .= exp.(pred[2, :]) elseif L == Softmax @inbounds for i in axes(pred, 2) @@ -87,31 +91,67 @@ function predict(model::GBTree{L,T,S}, X::AbstractMatrix) where {L,T,S} end -function pred_leaf_cpu!(pred, n, βˆ‘::Vector, params::EvoTypes{L,T,S}, K, δ𝑀, 𝑖) where {L<:GradientRegression,T,S} +function pred_leaf_cpu!( + pred, + n, + βˆ‘::Vector, + params::EvoTypes{L,T,S}, + K, + δ𝑀, + 𝑖, +) where {L<:GradientRegression,T,S} pred[1, n] = -params.eta * βˆ‘[1] / (βˆ‘[2] + params.lambda * βˆ‘[3]) end -function pred_scalar_cpu!(βˆ‘::Vector{T}, params::EvoTypes, K) where {L<:GradientRegression,T,S} +function pred_scalar_cpu!( + βˆ‘::AbstractVector{T}, + params::EvoTypes, + K, +) where {L<:GradientRegression,T,S} -params.eta * βˆ‘[1] / (βˆ‘[2] + params.lambda * βˆ‘[3]) end -# prediction in Leaf - GaussianRegression -function pred_leaf_cpu!(pred, n, βˆ‘::Vector, params::EvoTypes{L,T,S}, K, δ𝑀, 𝑖) where {L<:GaussianRegression,T,S} +# prediction in Leaf - MLE2P +function pred_leaf_cpu!( + pred, + n, + βˆ‘::Vector, + params::EvoTypes{L,T,S}, + K, + δ𝑀, + 𝑖, +) where {L<:MLE2P,T,S} pred[1, n] = -params.eta * βˆ‘[1] / (βˆ‘[3] + params.lambda * βˆ‘[5]) pred[2, n] = -params.eta * βˆ‘[2] / (βˆ‘[4] + params.lambda * βˆ‘[5]) end -function pred_scalar_cpu!(βˆ‘::Vector{T}, params::EvoTypes{L,T,S}, K) where {L<:GaussianRegression,T,S} +function pred_scalar_cpu!(βˆ‘::AbstractVector{T}, params::EvoTypes{L,T,S}, K) where {L<:MLE2P,T,S} -params.eta * βˆ‘[1] / (βˆ‘[3] + params.lambda * βˆ‘[5]) end # prediction in Leaf - MultiClassRegression -function pred_leaf_cpu!(pred, n, βˆ‘::Vector, params::EvoTypes{L,T,S}, K, δ𝑀, 𝑖) where {L<:MultiClassRegression,T,S} +function pred_leaf_cpu!( + pred, + n, + βˆ‘::Vector, + params::EvoTypes{L,T,S}, + K, + δ𝑀, + 𝑖, +) where {L<:MultiClassRegression,T,S} @inbounds for k = 1:K pred[k, n] = -params.eta * βˆ‘[k] / (βˆ‘[k+K] + params.lambda * βˆ‘[2*K+1]) end end # prediction in Leaf - QuantileRegression -function pred_leaf_cpu!(pred, n, βˆ‘::Vector, params::EvoTypes{L,T,S}, K, δ𝑀, 𝑖) where {L<:QuantileRegression,T,S} +function pred_leaf_cpu!( + pred, + n, + βˆ‘::Vector, + params::EvoTypes{L,T,S}, + K, + δ𝑀, + 𝑖, +) where {L<:QuantileRegression,T,S} pred[1, n] = params.eta * quantile(δ𝑀[2, 𝑖], params.alpha) / (1 + params.lambda) # pred[1,n] = params.eta * quantile(view(δ𝑀, 2, 𝑖), params.alpha) / (1 + params.lambda) end @@ -120,9 +160,17 @@ end # end # prediction in Leaf - L1Regression -function pred_leaf_cpu!(pred, n, βˆ‘::Vector, params::EvoTypes{L,T,S}, K, δ𝑀, 𝑖) where {L<:L1Regression,T,S} +function pred_leaf_cpu!( + pred, + n, + βˆ‘::Vector, + params::EvoTypes{L,T,S}, + K, + δ𝑀, + 𝑖, +) where {L<:L1Regression,T,S} pred[1, n] = params.eta * βˆ‘[1] / (βˆ‘[3] * (1 + params.lambda)) end -function pred_scalar_cpu!(βˆ‘::Vector, params::EvoTypes{L,T,S}, K) where {L<:L1Regression,T,S} +function pred_scalar_cpu!(βˆ‘::AbstractVector{T}, params::EvoTypes{L,T,S}, K) where {L<:L1Regression,T,S} params.eta * βˆ‘[1] / (βˆ‘[3] * (1 + params.lambda)) end \ No newline at end of file diff --git a/src/structs.jl b/src/structs.jl index 64a22055..17752dd2 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -19,7 +19,8 @@ function TrainNode(nvars, nbins, K, T) [zeros(T, (2 * K + 1) * nbins) for j = 1:nvars], [zeros(T, (2 * K + 1) * nbins) for j = 1:nvars], [zeros(T, (2 * K + 1) * nbins) for j = 1:nvars], - zeros(T, nbins, nvars)) + zeros(T, nbins, nvars), + ) return node end @@ -40,7 +41,8 @@ function Tree{L,T}(x::Vector{T}) where {L,T} zeros(T, 1), zeros(T, 1), reshape(x, :, 1), - zeros(Bool, 1)) + zeros(Bool, 1), + ) end function Tree{L,T}(depth, K, ::T) where {L,T} @@ -50,7 +52,7 @@ function Tree{L,T}(depth, K, ::T) where {L,T} zeros(T, 2^depth - 1), zeros(T, 2^depth - 1), zeros(T, K, 2^depth - 1), - zeros(Bool, 2^depth - 1) + zeros(Bool, 2^depth - 1), ) end @@ -67,6 +69,6 @@ struct GBTree{L,T,S} params::EvoTypes metric::Metric K::Int - info + info::Any end -(m::GBTree)(x::AbstractMatrix) = predict(m, x) \ No newline at end of file +(m::GBTree)(x::AbstractMatrix) = predict(m, x) diff --git a/test/MLJ.jl b/test/MLJ.jl index a19dc22b..3d28a3a8 100644 --- a/test/MLJ.jl +++ b/test/MLJ.jl @@ -15,18 +15,18 @@ X = MLJBase.table(X) # @load EvoTreeRegressor # linear regression -tree_model = EvoTreeRegressor(max_depth=5, eta=0.05, nrounds=10) +tree_model = EvoTreeRegressor(max_depth = 5, eta = 0.05, nrounds = 10) # logistic regression -tree_model = EvoTreeRegressor(loss=:logistic, max_depth=5, eta=0.05, nrounds=10) +tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, eta = 0.05, nrounds = 10) # quantile regression # tree_model = EvoTreeRegressor(loss=:quantile, alpha=0.75, max_depth=5, eta=0.05, nrounds=10) mach = machine(tree_model, X, y) -train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1) +train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1) mach.model.nrounds += 10 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) # predict on train data pred_train = predict(mach, selectrows(X, train)) @@ -64,15 +64,16 @@ mean(abs.(pred_test - selectrows(Y, test))) ################################################## X, y = @load_crabs -tree_model = EvoTreeClassifier(max_depth=4, eta=0.05, lambda=0.0, gamma=0.0, nrounds=10) +tree_model = + EvoTreeClassifier(max_depth = 4, eta = 0.05, lambda = 0.0, gamma = 0.0, nrounds = 10) # @load EvoTreeRegressor mach = machine(tree_model, X, y) -train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1) +train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1) mach.model.nrounds += 50 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) pred_train = predict(mach, selectrows(X, train)) pred_train_mode = predict_mode(mach, selectrows(X, train)) @@ -95,7 +96,7 @@ Y = rand(UInt8, size(X, 1)) 𝑖 = collect(1:size(X, 1)) # train-eval split -𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) +𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] @@ -105,22 +106,29 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval] # @load EvoTreeRegressor tree_model = EvoTreeCount( - loss=:poisson, metric=:poisson, - nrounds=10, - lambda=0.0, gamma=0.0, eta=0.1, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=0.5, nbins=32) + loss = :poisson, + metric = :poisson, + nrounds = 10, + lambda = 0.0, + gamma = 0.0, + eta = 0.1, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 0.5, + nbins = 32, +) X = MLJBase.table(X) X = MLJBase.matrix(X) # typeof(X) mach = machine(tree_model, X, Y) -train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1, force=true) +train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1, force = true) mach.model.nrounds += 10 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) pred = predict(mach, selectrows(X, train)) pred_mean = predict_mean(mach, selectrows(X, train)) @@ -136,7 +144,7 @@ Y = rand(size(X, 1)) 𝑖 = collect(1:size(X, 1)) # train-eval split -𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) +𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] @@ -146,21 +154,78 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval] # @load EvoTreeRegressor tree_model = EvoTreeGaussian( - loss=:gaussian, metric=:gaussian, - nrounds=10, - lambda=0.0, gamma=0.0, eta=0.1, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=0.5, nbins=32) + nrounds = 10, + lambda = 0.0, + gamma = 0.0, + eta = 0.1, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 0.5, + nbins = 32, +) X = MLJBase.table(X) # typeof(X) mach = machine(tree_model, X, Y) -train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1, force=true) +train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1, force = true) mach.model.nrounds += 10 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) + +pred = predict(mach, selectrows(X, train)) +pred_mean = predict_mean(mach, selectrows(X, train)) +pred_mode = predict_mode(mach, selectrows(X, train)) +# pred_mode = predict_median(mach, selectrows(X,train)) +mean(abs.(pred_mean - selectrows(Y, train))) + +q_20 = quantile.(pred, 0.20) +q_20 = quantile.(pred, 0.80) + +report(mach) + +################################################## +### Logistic - Larger data +################################################## +features = rand(10_000, 10) +X = features +Y = rand(size(X, 1)) +𝑖 = collect(1:size(X, 1)) + +# train-eval split +𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) +train_size = 0.8 +𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] +𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] + +x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :] +y_train, y_eval = Y[𝑖_train], Y[𝑖_eval] + +# @load EvoTreeRegressor +tree_model = EvoTreeMLE( + loss = :logistic, + nrounds = 10, + lambda = 1.0, + gamma = 0.0, + eta = 0.1, + max_depth = 6, + min_weight = 32.0, + rowsample = 0.5, + colsample = 0.5, + nbins = 32, +) + +X = MLJBase.table(X) + +# typeof(X) +mach = machine(tree_model, X, Y) +train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1, force = true) + +mach.model.nrounds += 10 +fit!(mach, rows = train, verbosity = 1) pred = predict(mach, selectrows(X, train)) pred_mean = predict_mean(mach, selectrows(X, train)) @@ -198,6 +263,7 @@ for model ∈ [ EvoTreeClassifier(), EvoTreeCount(), EvoTreeRegressor(), + EvoTreeMLE(), EvoTreeGaussian(), ] diff --git a/test/core.jl b/test/core.jl index 32e9c522..5bb2c65b 100644 --- a/test/core.jl +++ b/test/core.jl @@ -13,7 +13,7 @@ Y = sigmoid(Y) 𝑖 = collect(1:size(X, 1)) # train-eval split -𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) +𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] @@ -24,16 +24,31 @@ y_train, y_eval = Y[𝑖_train], Y[𝑖_eval] @testset "EvoTreeRegressor - Linear" begin # linear params1 = EvoTreeRegressor( - loss=:linear, metric=:mse, - nrounds=100, nbins=100, - lambda=0.5, gamma=0.1, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :linear, + nrounds = 100, + nbins = 100, + lambda = 0.5, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :mse, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -43,16 +58,30 @@ end @testset "EvoTreeRegressor - Logistic" begin params1 = EvoTreeRegressor( - loss=:logistic, metric=:logloss, - nrounds=100, - lambda=0.5, gamma=0.1, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :logistic, + nrounds = 100, + lambda = 0.5, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :logloss, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -62,16 +91,30 @@ end @testset "EvoTreeRegressor - Gamma" begin params1 = EvoTreeRegressor( - loss=:gamma, metric=:gamma, - nrounds=100, - lambda=0.5, gamma=0.1, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :gamma, + nrounds = 100, + lambda = 0.5, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :gamma, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -81,16 +124,30 @@ end @testset "EvoTreeRegressor - Tweedie" begin params1 = EvoTreeRegressor( - loss=:tweedie, metric=:tweedie, - nrounds=100, - lambda=0.5, gamma=0.1, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :tweedie, + nrounds = 100, + lambda = 0.5, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :tweedie, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -100,16 +157,32 @@ end @testset "EvoTreeRegressor - L1" begin params1 = EvoTreeRegressor( - loss=:L1, alpha=0.5, metric=:mae, - nrounds=100, nbins=100, - lambda=0.5, gamma=0.0, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :L1, + alpha = 0.5, + nrounds = 100, + nbins = 100, + lambda = 0.5, + gamma = 0.0, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :mae, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -119,16 +192,32 @@ end @testset "EvoTreeRegressor - Quantile" begin params1 = EvoTreeRegressor( - loss=:quantile, alpha=0.5, metric=:quantile, - nrounds=100, nbins=100, - lambda=0.5, gamma=0.0, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :quantile, + alpha = 0.5, + nrounds = 100, + nbins = 100, + lambda = 0.5, + gamma = 0.0, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :quantile, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -138,16 +227,30 @@ end @testset "EvoTreeCount - Count" begin params1 = EvoTreeCount( - loss=:poisson, metric=:poisson, - nrounds=100, - lambda=0.5, gamma=0.1, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :poisson, + nrounds = 100, + lambda = 0.5, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval) mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :poisson, + print_every_n = 25, + ) preds = EvoTrees.predict(model, x_eval) mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -155,18 +258,92 @@ end @test mse_gain_pct < -0.75 end +@testset "EvoTreeMLE - Gaussian" begin + params1 = EvoTreeMLE( + loss = :gaussian, + nrounds = 100, + nbins = 100, + lambda = 0.0, + gamma = 0.0, + eta = 0.05, + max_depth = 6, + min_weight = 10.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) + + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) + preds_ini = EvoTrees.predict(model, x_eval)[:, 1] + mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :gaussian, + print_every_n = 25, + ) + + preds = EvoTrees.predict(model, x_eval)[:, 1] + mse_error = mean(abs.(preds .- y_eval) .^ 2) + mse_gain_pct = mse_error / mse_error_ini - 1 + @test mse_gain_pct < -0.75 +end + +@testset "EvoTreeMLE - Logistic" begin + params1 = EvoTreeMLE( + loss = :logistic, + nrounds = 100, + nbins = 100, + lambda = 0.0, + gamma = 0.0, + eta = 0.05, + max_depth = 6, + min_weight = 10.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) + + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) + preds_ini = EvoTrees.predict(model, x_eval)[:, 1] + mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) + model = fit_evotree( + params1; + x_train, + y_train, + x_eval, + y_eval, + metric = :logistic, + print_every_n = 25, + ) + + preds = EvoTrees.predict(model, x_eval)[:, 1] + mse_error = mean(abs.(preds .- y_eval) .^ 2) + mse_gain_pct = mse_error / mse_error_ini - 1 + @test mse_gain_pct < -0.75 +end + @testset "EvoTreeGaussian - Gaussian" begin params1 = EvoTreeGaussian( - loss=:gaussian, metric=:gaussian, - nrounds=100, nbins=100, - lambda=0.0, gamma=0.0, eta=0.05, - max_depth=6, min_weight=10.0, - rowsample=0.5, colsample=1.0, rng=123) + nrounds = 100, + nbins = 100, + lambda = 0.0, + gamma = 0.0, + eta = 0.05, + max_depth = 6, + min_weight = 10.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) - model, cache = EvoTrees.init_evotree(params1, x_train, y_train) + model, cache = EvoTrees.init_evotree(params1; x_train, y_train) preds_ini = EvoTrees.predict(model, x_eval)[:, 1] mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) preds = EvoTrees.predict(model, x_eval)[:, 1] mse_error = mean(abs.(preds .- y_eval) .^ 2) @@ -176,11 +353,18 @@ end @testset "EvoTrees - Feature Importance" begin params1 = EvoTreeRegressor( - loss=:linear, metric=:mae, - nrounds=100, nbins=100, - lambda=0.5, gamma=0.1, eta=0.05, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, rng=123) + loss = :linear, + nrounds = 100, + nbins = 100, + lambda = 0.5, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + ) model = fit_evotree(params1; x_train, y_train) features_gain = importance(model) diff --git a/test/gpu_base.jl b/test/gpu_base.jl index 22b98d5c..78c7c282 100644 --- a/test/gpu_base.jl +++ b/test/gpu_base.jl @@ -18,7 +18,7 @@ Y = sigmoid(Y) 𝑖 = collect(1:size(X, 1)) # train-eval split -𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) +𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] @@ -29,43 +29,85 @@ y_train, y_eval = Y[𝑖_train], Y[𝑖_eval] ################################ # linear ################################ -params1 = EvoTreeRegressor(T=Float32, - loss=:linear, metric=:none, - nrounds=200, nbins=64, - lambda=0.5, gamma=0.1, eta=0.1, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0) +params1 = EvoTreeRegressor( + T = Float32, + loss = :linear, + metric = :none, + nrounds = 200, + nbins = 64, + lambda = 0.5, + gamma = 0.1, + eta = 0.1, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, +) @time model = fit_evotree_gpu(params1; x_train, y_train); @time pred_train_linear = predict_gpu(model, X_train) x_perm = sortperm(X_train[:, 1]) -plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") -plot!(X_train[:, 1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear") +plot( + X_train, + Y_train, + msize = 1, + mcolor = "gray", + mswidth = 0, + background_color = RGB(1, 1, 1), + seriestype = :scatter, + xaxis = ("feature"), + yaxis = ("target"), + legend = true, + label = "", +) +plot!( + X_train[:, 1][x_perm], + pred_train_linear[x_perm], + color = "navy", + linewidth = 1.5, + label = "Linear", +) # savefig("figures/regression_sinus_gpu.png") -params1 = EvoTreeRegressor(T=Float32, - loss=:linear, metric=:mse, - nrounds=200, nbins=64, - lambda=0.5, gamma=0.1, eta=0.1, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, - device="gpu") +params1 = EvoTreeRegressor( + T = Float32, + loss = :linear, + metric = :mse, + nrounds = 200, + nbins = 64, + lambda = 0.5, + gamma = 0.1, + eta = 0.1, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + device = "gpu", +) -@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n=25); -@time model = fit_evotree_gpu(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); +@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n = 25); +@time model = fit_evotree_gpu(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25); @time pred_train_linear = predict_gpu(model, x_train) ################################ # Logistic ################################ -params1 = EvoTreeRegressor(T=Float32, - loss=:logistic, metric=:logloss, - nrounds=200, nbins=64, - lambda=0.5, gamma=0.1, eta=0.1, - max_depth=6, min_weight=1.0, - rowsample=0.5, colsample=1.0, - device="gpu") +params1 = EvoTreeRegressor( + T = Float32, + loss = :logistic, + metric = :logloss, + nrounds = 200, + nbins = 64, + lambda = 0.5, + gamma = 0.1, + eta = 0.1, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + device = "gpu", +) @time model = fit_evotree_gpu(params1; x_train, y_train); @time pred_train_linear = predict_gpu(model, X_train) @@ -73,18 +115,30 @@ params1 = EvoTreeRegressor(T=Float32, ################################ # Gaussian ################################ -params1 = EvoTreeGaussian(T=Float64, - loss=:gaussian, metric=:gaussian, - nrounds=200, nbins=64, - lambda=1.0, gamma=0.1, eta=0.1, - max_depth=5, min_weight=100.0, - rowsample=0.5, colsample=1.0, rng=123, - device="gpu") +params1 = EvoTreeGaussian( + T = Float64, + loss = :gaussian, + metric = :gaussian, + nrounds = 200, + nbins = 64, + lambda = 1.0, + gamma = 0.1, + eta = 0.1, + max_depth = 5, + min_weight = 100.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, + device = "gpu", +) -@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n=25); +@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n = 25); @time pred_train_gauss = predict_gpu(model, x_train) -pred_gauss = [Distributions.Normal(pred_train_gauss[i, 1], pred_train_gauss[i, 2]) for i in axes(pred_train_gauss, 1)] +pred_gauss = [ + Distributions.Normal(pred_train_gauss[i, 1], pred_train_gauss[i, 2]) for + i in axes(pred_train_gauss, 1) +] pred_q20 = quantile.(pred_gauss, 0.2) pred_q80 = quantile.(pred_gauss, 0.8) diff --git a/test/monotonic.jl b/test/monotonic.jl index 6233afb4..873cd4f8 100644 --- a/test/monotonic.jl +++ b/test/monotonic.jl @@ -29,11 +29,18 @@ # benchmark params1 = EvoTreeRegressor( device="cpu", - loss=:linear, metric=:mse, - nrounds=200, nbins=32, - lambda=1.0, gamma=0.0, eta=0.05, - max_depth=6, min_weight=0.0, - rowsample=0.5, colsample=1.0, rng=seed) + loss=:linear, + nrounds=20, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + rowsample=0.5, + colsample=1.0, + rng=seed, + ) model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_ref = predict(model, x_train) @@ -41,22 +48,28 @@ # monotonic constraint params1 = EvoTreeRegressor( device="cpu", - loss=:linear, metric=:mse, - nrounds=200, nbins=32, - lambda=1.0, gamma=0.0, eta=0.5, - max_depth=6, min_weight=0.0, + loss=:linear, + nrounds=20, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.5, + max_depth=6, + min_weight=0.0, monotone_constraints=Dict(1 => 1), - rowsample=0.5, colsample=1.0, rng=seed) + rowsample=0.5, + colsample=1.0, + rng=seed, + ) model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_mono = predict(model, x_train) # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") ###################################### @@ -101,11 +114,19 @@ # benchmark params1 = EvoTreeRegressor( device="cpu", - loss=:logistic, metric=:logloss, - nrounds=200, nbins=32, - lambda=0.05, gamma=0.0, eta=0.05, - max_depth=6, min_weight=0.0, - rowsample=0.5, colsample=1.0, rng=seed) + loss=:logistic, + metric=:logloss, + nrounds=200, + nbins=32, + lambda=0.05, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + rowsample=0.5, + colsample=1.0, + rng=seed, + ) model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_ref = predict(model, x_train) @@ -113,22 +134,28 @@ # monotonic constraint params1 = EvoTreeRegressor( device="cpu", - loss=:logistic, metric=:logloss, - nrounds=200, nbins=32, - lambda=0.05, gamma=0.0, eta=0.05, - max_depth=6, min_weight=0.0, + loss=:logistic, + metric=:logloss, + nrounds=200, + nbins=32, + lambda=0.05, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, monotone_constraints=Dict(1 => 1), - rowsample=0.5, colsample=1.0, rng=seed) + rowsample=0.5, + colsample=1.0, + rng=seed, + ) model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_mono = predict(model, x_train) - # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") ###################################### @@ -170,14 +197,21 @@ ###################################### ### Gaussian - CPU ###################################### - # linear - benchmark + # benchmark params1 = EvoTreeGaussian( device="cpu", metric=:gaussian, - nrounds=200, nbins=32, - lambda=1.0, gamma=0.0, eta=0.05, - max_depth=6, min_weight=0.0, - rowsample=0.5, colsample=1.0, rng=seed) + nrounds=200, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + rowsample=0.5, + colsample=1.0, + rng=seed, + ) model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_ref = predict(model, x_train) @@ -186,27 +220,32 @@ params1 = EvoTreeGaussian( device="cpu", metric=:gaussian, - nrounds=200, nbins=32, - lambda=1.0, gamma=0.0, eta=0.5, - max_depth=6, min_weight=0.0, + nrounds=200, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.5, + max_depth=6, + min_weight=0.0, monotone_constraints=Dict(1 => 1), - rowsample=0.5, colsample=1.0, rng=seed) + rowsample=0.5, + colsample=1.0, + rng=seed, + ) model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_mono = predict(model, x_train) - # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") ###################################### ### Gaussian - GPU ###################################### - # linear - benchmark + # benchmark # params1 = EvoTreeGaussian( # device="gpu", # metric=:gaussian, @@ -215,8 +254,8 @@ # max_depth=6, min_weight=0.0, # rowsample=0.5, colsample=1.0, rng=seed) - # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); - # preds_ref = predict(model, x_train); + # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + # preds_ref = predict(model, x_train) # # monotonic constraint # params1 = EvoTreeGaussian( @@ -228,14 +267,12 @@ # monotone_constraints=Dict(1 => 1), # rowsample=0.5, colsample=1.0, rng=seed) - # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); - # preds_mono = predict(model, x_train); + # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + # preds_mono = predict(model, x_train) - # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="GPU Gauss") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") end \ No newline at end of file diff --git a/test/plot.jl b/test/plot.jl index 2e8a598f..c182bd54 100644 --- a/test/plot.jl +++ b/test/plot.jl @@ -8,7 +8,7 @@ using EvoTrees # @load "data/model_gaussian_5.bson" model model = EvoTrees.load("data/model_linear_4.bson"); -var_names = ["var_$i" for i in 1:100] +var_names = ["var_$i" for i = 1:100] plot(model) plot(model, 2) plot(model, 3, var_names) @@ -17,15 +17,15 @@ plot(model.trees[2], var_names) typeof(tree_layout[1]) BezierCurve(tree_layout[1]) -mutable struct BCurve{T <: GeometryBasics.Point} +mutable struct BCurve{T<:GeometryBasics.Point} control_points::Vector{T} end function (bc::BCurve)(t::Real) p = zero(P2) n = length(bc.control_points) - 1 - for i in 0:n - p += bc.control_points[i + 1] * binomial(n, i) * (1 - t)^(n - i) * t^i + for i = 0:n + p += bc.control_points[i+1] * binomial(n, i) * (1 - t)^(n - i) * t^i end p end diff --git a/test/save_load.jl b/test/save_load.jl new file mode 100644 index 00000000..c6d8ca11 --- /dev/null +++ b/test/save_load.jl @@ -0,0 +1,69 @@ +using Statistics +using StatsBase: sample, quantile +using Distributions +using Random +using EvoTrees +using EvoTrees: sigmoid, logit +using Serialization + +# prepare a dataset +Random.seed!(12) +features = rand(10_000) .* 5 +X = reshape(features, (size(features)[1], 1)) +Y = sin.(features) .* 0.5 .+ 0.5 +Y = logit(Y) + randn(size(Y)) +Y = sigmoid(Y) +𝑖 = collect(1:size(X, 1)) + +# train-eval split +𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) +train_size = 0.8 +𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] +𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] + +x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :] +y_train, y_eval = Y[𝑖_train], Y[𝑖_eval] + +# linear +params1 = EvoTreeRegressor( + T = Float64, + loss = :linear, + metric = :mse, + nrounds = 200, + nbins = 64, + lambda = 0.1, + gamma = 0.1, + eta = 0.05, + max_depth = 6, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + rng = 123, +) + +m = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25); +p = m(x_eval) + +# serialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v182.dat"), m); +# serialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v182.dat"), p); + +# m_172 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v172.dat")); +# p_172 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v172.dat")); +# pm_172 = m_172(x_eval) + +# m_180 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v180.dat")); +# p_180 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v180.dat")); +# pm_180 = m_180(x_eval) + +# m_182 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v182.dat")); +# p_182 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v182.dat")); +# pm_182 = m_182(x_eval) + +# @assert all(p .== p_172) +# @assert all(p .== pm_172) +# @assert all(p .== p_180) +# @assert all(p .== pm_180) +# @assert all(p .== p_182) +# @assert all(p .== pm_182) + +# @info "test successful! πŸš€" \ No newline at end of file