Merge pull request #162 from Evovest/monotonic

Monotonic
Evovest · Sep 14, 2022 · 8effc61 · 8effc61 · jeremiedb · Sep 14, 2022
2 parents c1bf698 + 118ac35
commit 8effc61
Show file tree

Hide file tree

Showing 15 changed files with 676 additions and 323 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "EvoTrees"
 uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 authors = ["jeremiedb <[email protected]>"]
-version = "0.10.0"
+version = "0.10.1"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"

diff --git a/experiments/benchmarks_v2.jl b/experiments/benchmarks_v2.jl
@@ -30,7 +30,7 @@ params_evo = EvoTreeRegressor(T=Float32,
         rowsample=0.5, colsample=0.5, nbins=64)
 
 
-nobs = Int(5e6)
+nobs = Int(1e6)
 num_feat = Int(100)
 @info "testing with: $nobs observations | $num_feat features."
 X = rand(nobs, num_feat)

diff --git a/experiments/readme_plots_cpu.jl b/experiments/readme_plots_cpu.jl
@@ -21,7 +21,7 @@ Y = sigmoid(Y)
 𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
-𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
 
 X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
 Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
@@ -30,7 +30,7 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 params1 = EvoTreeRegressor(T=Float64,
     loss=:linear, metric=:mse,
     nrounds=100, nbins=64,
-    λ=0.1, γ=0.1, η=1.0,
+    lambda=0.1, gamma=0.1, eta=1.0,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0,
     rng=123)
@@ -47,7 +47,7 @@ params1 = EvoTreeRegressor(T=Float64,
 @time pred_train_linear = predict(model, X_train);
 @time pred_eval_linear = predict(model, X_eval)
 mean(abs.(pred_train_linear .- Y_train))
-sqrt(mean((pred_train_linear .- Y_train).^2))
+sqrt(mean((pred_train_linear .- Y_train) .^ 2))
 
 # linear weighted
 params1 = EvoTreeRegressor(T=Float64,
@@ -73,13 +73,13 @@ W_train = rand(eltype(Y_train), size(Y_train)) .+ 0
 @time pred_train_linear_w = predict(model, X_train);
 @time pred_eval_linear_w = predict(model, X_eval)
 mean(abs.(pred_train_linear_w .- Y_train))
-sqrt(mean((pred_train_linear_w .- Y_train).^2))
+sqrt(mean((pred_train_linear_w .- Y_train) .^ 2))
 
 # logistic / cross-entropy
 params1 = EvoTreeRegressor(
     loss=:logistic, metric=:logloss,
     nrounds=200, nbins=64,
-    λ=0.1, γ=0.1, η=0.05,
+    lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 
@@ -88,40 +88,40 @@ params1 = EvoTreeRegressor(
 # @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval)
 @time pred_train_logistic = predict(model, X_train);
 @time pred_eval_logistic = predict(model, X_eval)
-sqrt(mean((pred_train_logistic .- Y_train).^2))
+sqrt(mean((pred_train_logistic .- Y_train) .^ 2))
 
 # Poisson
 params1 = EvoTreeCount(
     loss=:poisson, metric=:poisson,
     nrounds=200, nbins=64,
-    λ=0.1, γ=0.1, η=0.05,
+    lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
 # @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval)
 @time pred_train_poisson = predict(model, X_train);
 @time pred_eval_poisson = predict(model, X_eval)
-sqrt(mean((pred_train_poisson .- Y_train).^2))
+sqrt(mean((pred_train_poisson .- Y_train) .^ 2))
 
 # L1
 params1 = EvoTreeRegressor(
     loss=:L1, α=0.5, metric=:mae,
     nrounds=200, nbins=64,
-    λ=0.1, γ=0.1, η=0.05,
+    lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
 @time pred_train_L1 = predict(model, X_train)
 @time pred_eval_L1 = predict(model, X_eval)
-sqrt(mean((pred_train_L1 .- Y_train).^2))
+sqrt(mean((pred_train_L1 .- Y_train) .^ 2))
 
-x_perm = sortperm(X_train[:,1])
+x_perm = sortperm(X_train[:, 1])
 plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-plot!(X_train[:,1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
-plot!(X_train[:,1][x_perm], pred_train_linear_w[x_perm], color="lightblue", linewidth=1.5, label="LinearW")
-plot!(X_train[:,1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
-plot!(X_train[:,1][x_perm], pred_train_poisson[x_perm], color="green", linewidth=1.5, label="Poisson")
-plot!(X_train[:,1][x_perm], pred_train_L1[x_perm], color="pink", linewidth=1.5, label="L1")
+plot!(X_train[:, 1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
+plot!(X_train[:, 1][x_perm], pred_train_linear_w[x_perm], color="lightblue", linewidth=1.5, label="LinearW")
+plot!(X_train[:, 1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
+plot!(X_train[:, 1][x_perm], pred_train_poisson[x_perm], color="green", linewidth=1.5, label="Poisson")
+plot!(X_train[:, 1][x_perm], pred_train_L1[x_perm], color="pink", linewidth=1.5, label="L1")
 savefig("figures/regression_sinus.png")
 
 ###############################
@@ -131,7 +131,7 @@ savefig("figures/regression_sinus.png")
 params1 = EvoTreeRegressor(
     loss=:quantile, α=0.5, metric=:none,
     nrounds=200, nbins=64,
-    λ=1.0, γ=0.0, η=0.05,
+    lambda=1.0, gamma=0.0, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 
@@ -145,7 +145,7 @@ sum(pred_train_q50 .< Y_train) / length(Y_train)
 params1 = EvoTreeRegressor(
     loss=:quantile, α=0.2, metric=:none,
     nrounds=200, nbins=64,
-    λ=1.0, γ=0.0, η=0.05,
+    lambda=1.0, gamma=0.0, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
@@ -156,19 +156,19 @@ sum(pred_train_q20 .< Y_train) / length(Y_train)
 params1 = EvoTreeRegressor(
     loss=:quantile, α=0.8, metric=:none,
     nrounds=200, nbins=64,
-    λ=1.0, γ=0.0, η=0.05,
+    lambda=1.0, gamma=0.0, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 
 @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25)
 @time pred_train_q80 = predict(model, X_train)
 sum(pred_train_q80 .< Y_train) / length(Y_train)
 
-x_perm = sortperm(X_train[:,1])
+x_perm = sortperm(X_train[:, 1])
 plot(X_train, Y_train, ms=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-plot!(X_train[:,1][x_perm], pred_train_q50[x_perm], color="navy", linewidth=1.5, label="Median")
-plot!(X_train[:,1][x_perm], pred_train_q20[x_perm], color="darkred", linewidth=1.5, label="Q20")
-plot!(X_train[:,1][x_perm], pred_train_q80[x_perm], color="green", linewidth=1.5, label="Q80")
+plot!(X_train[:, 1][x_perm], pred_train_q50[x_perm], color="navy", linewidth=1.5, label="Median")
+plot!(X_train[:, 1][x_perm], pred_train_q20[x_perm], color="darkred", linewidth=1.5, label="Q20")
+plot!(X_train[:, 1][x_perm], pred_train_q80[x_perm], color="green", linewidth=1.5, label="Q80")
 savefig("figures/quantiles_sinus.png")
 
 
@@ -178,7 +178,7 @@ savefig("figures/quantiles_sinus.png")
 params1 = EvoTreeGaussian(
     loss=:gaussian, metric=:gaussian,
     nrounds=200, nbins=64,
-    λ=0.1, γ=0.1, η=0.05,
+    lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=1.0, colsample=1.0, rng=123)
 
@@ -187,17 +187,17 @@ params1 = EvoTreeGaussian(
 @time pred_train = EvoTrees.predict(model, X_train);
 # @btime pred_train = EvoTrees.predict(model, X_train);
 
-pred_gauss = [Distributions.Normal(pred_train[i,1], pred_train[i,2]) for i in 1:size(pred_train, 1)]
+pred_gauss = [Distributions.Normal(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1)]
 pred_q80 = quantile.(pred_gauss, 0.8)
 pred_q20 = quantile.(pred_gauss, 0.2)
 
 mean(Y_train .< pred_q80)
 mean(Y_train .< pred_q20)
 
-x_perm = sortperm(X_train[:,1])
+x_perm = sortperm(X_train[:, 1])
 plot(X_train[:, 1], Y_train, ms=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-plot!(X_train[:,1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, label="mu")
-plot!(X_train[:,1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
-plot!(X_train[:,1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
-plot!(X_train[:,1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
+plot!(X_train[:, 1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, label="mu")
+plot!(X_train[:, 1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
+plot!(X_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
+plot!(X_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
 savefig("figures/gaussian_sinus.png")
diff --git a/experiments/readme_plots_gpu.jl b/experiments/readme_plots_gpu.jl
@@ -22,7 +22,7 @@ Y = sigmoid(Y)
 𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
-𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
 
 X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
 Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
@@ -31,7 +31,7 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 params1 = EvoTreeRegressor(T=Float32,
     loss=:linear, metric=:mse,
     nrounds=200, nbins=64,
-    λ=0.5, γ=0.1, η=0.1,
+    lambda=0.5, gamma=0.1, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.1, colsample=1.0,
     device="gpu")
@@ -47,13 +47,13 @@ sum(pred_train_linear_gpu .- pred_train_linear_cpu)
 # @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval, print_every_n = 25, metric=:mae)
 @time pred_train_linear = predict(model, X_train)
 mean(abs.(pred_train_linear .- Y_train))
-sqrt(mean((pred_train_linear .- Y_train).^2))
+sqrt(mean((pred_train_linear .- Y_train) .^ 2))
 
 # logistic / cross-entropy
 params1 = EvoTreeRegressor(T=Float32,
     loss=:logistic, metric=:logloss,
     nrounds=200, nbins=64,
-    λ=0.5, γ=0.1, η=0.1,
+    lambda=0.5, gamma=0.1, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0,
     device="gpu")
@@ -63,12 +63,12 @@ params1 = EvoTreeRegressor(T=Float32,
 # 218.040 ms (123372 allocations: 34.71 MiB)
 # @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval)
 @time pred_train_logistic = predict(model, X_train)
-sqrt(mean((pred_train_logistic .- Y_train).^2))
+sqrt(mean((pred_train_logistic .- Y_train) .^ 2))
 
-x_perm = sortperm(X_train[:,1])
+x_perm = sortperm(X_train[:, 1])
 plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-plot!(X_train[:,1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
-plot!(X_train[:,1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
+plot!(X_train[:, 1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
+plot!(X_train[:, 1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
 # plot!(X_train[:,1][x_perm], pred_train_poisson[x_perm], color = "green", linewidth = 1.5, label = "Poisson")
 # plot!(X_train[:,1][x_perm], pred_train_L1[x_perm], color = "pink", linewidth = 1.5, label = "L1")
 savefig("figures/regression_sinus_gpu.png")
@@ -80,7 +80,7 @@ savefig("figures/regression_sinus_gpu.png")
 params1 = EvoTreeGaussian(T=Float32,
     loss=:gaussian, metric=:gaussian,
     nrounds=200, nbins=64,
-    λ=1.0, γ=0.1, η=0.1,
+    lambda=1.0, gamma=0.1, eta=0.1,
     max_depth=6, min_weight=0.1,
     rowsample=0.5, colsample=1.0, rng=123,
     device="gpu")
@@ -90,17 +90,17 @@ params1 = EvoTreeGaussian(T=Float32,
 # @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
 @time pred_train_gaussian = EvoTrees.predict(model, X_train)
 
-pred_gauss = [Distributions.Normal(pred_train_gaussian[i,1], pred_train_gaussian[i,2]) for i in 1:size(pred_train_gaussian, 1)]
+pred_gauss = [Distributions.Normal(pred_train_gaussian[i, 1], pred_train_gaussian[i, 2]) for i in axes(pred_train_gaussian, 1)]
 pred_q80 = quantile.(pred_gauss, 0.8)
 pred_q20 = quantile.(pred_gauss, 0.2)
 
 mean(Y_train .< pred_q80)
 mean(Y_train .< pred_q20)
 
-x_perm = sortperm(X_train[:,1])
+x_perm = sortperm(X_train[:, 1])
 plot(X_train[:, 1], Y_train, ms=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-plot!(X_train[:,1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linewidth=1.5, label="mu")
-plot!(X_train[:,1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
-plot!(X_train[:,1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
-plot!(X_train[:,1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
+plot!(X_train[:, 1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linewidth=1.5, label="mu")
+plot!(X_train[:, 1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
+plot!(X_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
+plot!(X_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
 savefig("figures/gaussian_sinus_gpu.png")
diff --git a/src/MLJ.jl b/src/MLJ.jl
@@ -140,6 +140,8 @@ EvoTreeRegressor is used to perform the following regression types:
 - `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
 - `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
 - `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
+- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). 
+  Only `:linear` and `:logistic` losses are supported at the moment.
 - `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
 - `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:mse`, `:mae`, `:logloss`.
 - `device="cpu"`:         Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`. Only `:linear` and `:logistic` losses are supported on GPU.
@@ -365,6 +367,7 @@ EvoTreeCount is used to perform Poisson probabilistic regression on count target
 - `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.
 - `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.
 - `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
+- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
 - `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
 - `metric::Symbol=:none`:       Metric that is to be tracked during the training process. One of: `:none`, `:poisson`, `:mae`, `:mse`.
 - `device="cpu"`:               Hardware device to use for computations. Only CPU is supported at the moment.
@@ -485,9 +488,11 @@ EvoTreeGaussian is used to perform Gaussain probabilistic regression, fitting μ
 - `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
 - `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
 - `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
+- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). 
+  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.
 - `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
 - `metric::Symbol=:none`:       Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`.
-- `device="cpu"`:               Hardware device to use for computations. Only CPU is supported at the moment.
+- `device="cpu"`:               Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`.
 
 # Internal API