Skip to content

Commit

Permalink
Merge pull request #162 from Evovest/monotonic
Browse files Browse the repository at this point in the history
Monotonic
  • Loading branch information
jeremiedb authored Sep 14, 2022
2 parents c1bf698 + 118ac35 commit 8effc61
Show file tree
Hide file tree
Showing 15 changed files with 676 additions and 323 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "EvoTrees"
uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
authors = ["jeremiedb <[email protected]>"]
version = "0.10.0"
version = "0.10.1"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand Down
2 changes: 1 addition & 1 deletion experiments/benchmarks_v2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ params_evo = EvoTreeRegressor(T=Float32,
rowsample=0.5, colsample=0.5, nbins=64)


nobs = Int(5e6)
nobs = Int(1e6)
num_feat = Int(100)
@info "testing with: $nobs observations | $num_feat features."
X = rand(nobs, num_feat)
Expand Down
60 changes: 30 additions & 30 deletions experiments/readme_plots_cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Y = sigmoid(Y)
𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
train_size = 0.8
𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]

X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
Expand All @@ -30,7 +30,7 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
params1 = EvoTreeRegressor(T=Float64,
loss=:linear, metric=:mse,
nrounds=100, nbins=64,
λ=0.1, γ=0.1, η=1.0,
lambda=0.1, gamma=0.1, eta=1.0,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0,
rng=123)
Expand All @@ -47,7 +47,7 @@ params1 = EvoTreeRegressor(T=Float64,
@time pred_train_linear = predict(model, X_train);
@time pred_eval_linear = predict(model, X_eval)
mean(abs.(pred_train_linear .- Y_train))
sqrt(mean((pred_train_linear .- Y_train).^2))
sqrt(mean((pred_train_linear .- Y_train) .^ 2))

# linear weighted
params1 = EvoTreeRegressor(T=Float64,
Expand All @@ -73,13 +73,13 @@ W_train = rand(eltype(Y_train), size(Y_train)) .+ 0
@time pred_train_linear_w = predict(model, X_train);
@time pred_eval_linear_w = predict(model, X_eval)
mean(abs.(pred_train_linear_w .- Y_train))
sqrt(mean((pred_train_linear_w .- Y_train).^2))
sqrt(mean((pred_train_linear_w .- Y_train) .^ 2))

# logistic / cross-entropy
params1 = EvoTreeRegressor(
loss=:logistic, metric=:logloss,
nrounds=200, nbins=64,
λ=0.1, γ=0.1, η=0.05,
lambda=0.1, gamma=0.1, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0)

Expand All @@ -88,40 +88,40 @@ params1 = EvoTreeRegressor(
# @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval)
@time pred_train_logistic = predict(model, X_train);
@time pred_eval_logistic = predict(model, X_eval)
sqrt(mean((pred_train_logistic .- Y_train).^2))
sqrt(mean((pred_train_logistic .- Y_train) .^ 2))

# Poisson
params1 = EvoTreeCount(
loss=:poisson, metric=:poisson,
nrounds=200, nbins=64,
λ=0.1, γ=0.1, η=0.05,
lambda=0.1, gamma=0.1, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0)
@time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
# @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval)
@time pred_train_poisson = predict(model, X_train);
@time pred_eval_poisson = predict(model, X_eval)
sqrt(mean((pred_train_poisson .- Y_train).^2))
sqrt(mean((pred_train_poisson .- Y_train) .^ 2))

# L1
params1 = EvoTreeRegressor(
loss=:L1, α=0.5, metric=:mae,
nrounds=200, nbins=64,
λ=0.1, γ=0.1, η=0.05,
lambda=0.1, gamma=0.1, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0)
@time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
@time pred_train_L1 = predict(model, X_train)
@time pred_eval_L1 = predict(model, X_eval)
sqrt(mean((pred_train_L1 .- Y_train).^2))
sqrt(mean((pred_train_L1 .- Y_train) .^ 2))

x_perm = sortperm(X_train[:,1])
x_perm = sortperm(X_train[:, 1])
plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
plot!(X_train[:,1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
plot!(X_train[:,1][x_perm], pred_train_linear_w[x_perm], color="lightblue", linewidth=1.5, label="LinearW")
plot!(X_train[:,1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
plot!(X_train[:,1][x_perm], pred_train_poisson[x_perm], color="green", linewidth=1.5, label="Poisson")
plot!(X_train[:,1][x_perm], pred_train_L1[x_perm], color="pink", linewidth=1.5, label="L1")
plot!(X_train[:, 1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
plot!(X_train[:, 1][x_perm], pred_train_linear_w[x_perm], color="lightblue", linewidth=1.5, label="LinearW")
plot!(X_train[:, 1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
plot!(X_train[:, 1][x_perm], pred_train_poisson[x_perm], color="green", linewidth=1.5, label="Poisson")
plot!(X_train[:, 1][x_perm], pred_train_L1[x_perm], color="pink", linewidth=1.5, label="L1")
savefig("figures/regression_sinus.png")

###############################
Expand All @@ -131,7 +131,7 @@ savefig("figures/regression_sinus.png")
params1 = EvoTreeRegressor(
loss=:quantile, α=0.5, metric=:none,
nrounds=200, nbins=64,
λ=1.0, γ=0.0, η=0.05,
lambda=1.0, gamma=0.0, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0)

Expand All @@ -145,7 +145,7 @@ sum(pred_train_q50 .< Y_train) / length(Y_train)
params1 = EvoTreeRegressor(
loss=:quantile, α=0.2, metric=:none,
nrounds=200, nbins=64,
λ=1.0, γ=0.0, η=0.05,
lambda=1.0, gamma=0.0, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0)
@time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
Expand All @@ -156,19 +156,19 @@ sum(pred_train_q20 .< Y_train) / length(Y_train)
params1 = EvoTreeRegressor(
loss=:quantile, α=0.8, metric=:none,
nrounds=200, nbins=64,
λ=1.0, γ=0.0, η=0.05,
lambda=1.0, gamma=0.0, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0)

@time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25)
@time pred_train_q80 = predict(model, X_train)
sum(pred_train_q80 .< Y_train) / length(Y_train)

x_perm = sortperm(X_train[:,1])
x_perm = sortperm(X_train[:, 1])
plot(X_train, Y_train, ms=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
plot!(X_train[:,1][x_perm], pred_train_q50[x_perm], color="navy", linewidth=1.5, label="Median")
plot!(X_train[:,1][x_perm], pred_train_q20[x_perm], color="darkred", linewidth=1.5, label="Q20")
plot!(X_train[:,1][x_perm], pred_train_q80[x_perm], color="green", linewidth=1.5, label="Q80")
plot!(X_train[:, 1][x_perm], pred_train_q50[x_perm], color="navy", linewidth=1.5, label="Median")
plot!(X_train[:, 1][x_perm], pred_train_q20[x_perm], color="darkred", linewidth=1.5, label="Q20")
plot!(X_train[:, 1][x_perm], pred_train_q80[x_perm], color="green", linewidth=1.5, label="Q80")
savefig("figures/quantiles_sinus.png")


Expand All @@ -178,7 +178,7 @@ savefig("figures/quantiles_sinus.png")
params1 = EvoTreeGaussian(
loss=:gaussian, metric=:gaussian,
nrounds=200, nbins=64,
λ=0.1, γ=0.1, η=0.05,
lambda=0.1, gamma=0.1, eta=0.05,
max_depth=6, min_weight=1.0,
rowsample=1.0, colsample=1.0, rng=123)

Expand All @@ -187,17 +187,17 @@ params1 = EvoTreeGaussian(
@time pred_train = EvoTrees.predict(model, X_train);
# @btime pred_train = EvoTrees.predict(model, X_train);

pred_gauss = [Distributions.Normal(pred_train[i,1], pred_train[i,2]) for i in 1:size(pred_train, 1)]
pred_gauss = [Distributions.Normal(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1)]
pred_q80 = quantile.(pred_gauss, 0.8)
pred_q20 = quantile.(pred_gauss, 0.2)

mean(Y_train .< pred_q80)
mean(Y_train .< pred_q20)

x_perm = sortperm(X_train[:,1])
x_perm = sortperm(X_train[:, 1])
plot(X_train[:, 1], Y_train, ms=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
plot!(X_train[:,1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, label="mu")
plot!(X_train[:,1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
plot!(X_train[:,1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
plot!(X_train[:,1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
plot!(X_train[:, 1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, label="mu")
plot!(X_train[:, 1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
plot!(X_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
plot!(X_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
savefig("figures/gaussian_sinus.png")
30 changes: 15 additions & 15 deletions experiments/readme_plots_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Y = sigmoid(Y)
𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
train_size = 0.8
𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]

X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
Expand All @@ -31,7 +31,7 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
params1 = EvoTreeRegressor(T=Float32,
loss=:linear, metric=:mse,
nrounds=200, nbins=64,
λ=0.5, γ=0.1, η=0.1,
lambda=0.5, gamma=0.1, eta=0.1,
max_depth=6, min_weight=1.0,
rowsample=0.1, colsample=1.0,
device="gpu")
Expand All @@ -47,13 +47,13 @@ sum(pred_train_linear_gpu .- pred_train_linear_cpu)
# @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval, print_every_n = 25, metric=:mae)
@time pred_train_linear = predict(model, X_train)
mean(abs.(pred_train_linear .- Y_train))
sqrt(mean((pred_train_linear .- Y_train).^2))
sqrt(mean((pred_train_linear .- Y_train) .^ 2))

# logistic / cross-entropy
params1 = EvoTreeRegressor(T=Float32,
loss=:logistic, metric=:logloss,
nrounds=200, nbins=64,
λ=0.5, γ=0.1, η=0.1,
lambda=0.5, gamma=0.1, eta=0.1,
max_depth=6, min_weight=1.0,
rowsample=0.5, colsample=1.0,
device="gpu")
Expand All @@ -63,12 +63,12 @@ params1 = EvoTreeRegressor(T=Float32,
# 218.040 ms (123372 allocations: 34.71 MiB)
# @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval)
@time pred_train_logistic = predict(model, X_train)
sqrt(mean((pred_train_logistic .- Y_train).^2))
sqrt(mean((pred_train_logistic .- Y_train) .^ 2))

x_perm = sortperm(X_train[:,1])
x_perm = sortperm(X_train[:, 1])
plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
plot!(X_train[:,1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
plot!(X_train[:,1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
plot!(X_train[:, 1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
plot!(X_train[:, 1][x_perm], pred_train_logistic[x_perm], color="darkred", linewidth=1.5, label="Logistic")
# plot!(X_train[:,1][x_perm], pred_train_poisson[x_perm], color = "green", linewidth = 1.5, label = "Poisson")
# plot!(X_train[:,1][x_perm], pred_train_L1[x_perm], color = "pink", linewidth = 1.5, label = "L1")
savefig("figures/regression_sinus_gpu.png")
Expand All @@ -80,7 +80,7 @@ savefig("figures/regression_sinus_gpu.png")
params1 = EvoTreeGaussian(T=Float32,
loss=:gaussian, metric=:gaussian,
nrounds=200, nbins=64,
λ=1.0, γ=0.1, η=0.1,
lambda=1.0, gamma=0.1, eta=0.1,
max_depth=6, min_weight=0.1,
rowsample=0.5, colsample=1.0, rng=123,
device="gpu")
Expand All @@ -90,17 +90,17 @@ params1 = EvoTreeGaussian(T=Float32,
# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
@time pred_train_gaussian = EvoTrees.predict(model, X_train)

pred_gauss = [Distributions.Normal(pred_train_gaussian[i,1], pred_train_gaussian[i,2]) for i in 1:size(pred_train_gaussian, 1)]
pred_gauss = [Distributions.Normal(pred_train_gaussian[i, 1], pred_train_gaussian[i, 2]) for i in axes(pred_train_gaussian, 1)]
pred_q80 = quantile.(pred_gauss, 0.8)
pred_q20 = quantile.(pred_gauss, 0.2)

mean(Y_train .< pred_q80)
mean(Y_train .< pred_q20)

x_perm = sortperm(X_train[:,1])
x_perm = sortperm(X_train[:, 1])
plot(X_train[:, 1], Y_train, ms=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
plot!(X_train[:,1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linewidth=1.5, label="mu")
plot!(X_train[:,1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
plot!(X_train[:,1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
plot!(X_train[:,1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
plot!(X_train[:, 1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linewidth=1.5, label="mu")
plot!(X_train[:, 1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
plot!(X_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
plot!(X_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
savefig("figures/gaussian_sinus_gpu.png")
7 changes: 6 additions & 1 deletion src/MLJ.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ EvoTreeRegressor is used to perform the following regression types:
- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
Only `:linear` and `:logistic` losses are supported at the moment.
- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:mse`, `:mae`, `:logloss`.
- `device="cpu"`: Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`. Only `:linear` and `:logistic` losses are supported on GPU.
Expand Down Expand Up @@ -365,6 +367,7 @@ EvoTreeCount is used to perform Poisson probabilistic regression on count target
- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.
- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.
- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:poisson`, `:mae`, `:mse`.
- `device="cpu"`: Hardware device to use for computations. Only CPU is supported at the moment.
Expand Down Expand Up @@ -485,9 +488,11 @@ EvoTreeGaussian is used to perform Gaussain probabilistic regression, fitting μ
- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
!Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.
- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`.
- `device="cpu"`: Hardware device to use for computations. Only CPU is supported at the moment.
- `device="cpu"`: Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`.
# Internal API
Expand Down
Loading

2 comments on commit 8effc61

@jeremiedb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/68289

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.10.1 -m "<description of version>" 8effc613c609a23874374ea435b28d1bbd8ce268
git push origin v0.10.1

Please sign in to comment.