Skip to content

Commit

Permalink
monotonic constraints
Browse files Browse the repository at this point in the history
synthax & logistic minor fixes
  • Loading branch information
jeremiedb committed Sep 14, 2022
2 parents 38874c1 + 937d27d commit 118ac35
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 347 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "EvoTrees"
uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
authors = ["jeremiedb <[email protected]>"]
version = "0.10.0"
version = "0.10.1"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand Down
5 changes: 5 additions & 0 deletions src/MLJ.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ EvoTreeRegressor is used to perform the following regression types:
- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
Only `:linear` and `:logistic` losses are supported at the moment.
- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:mse`, `:mae`, `:logloss`.
- `device="cpu"`: Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`. Only `:linear` and `:logistic` losses are supported on GPU.
Expand Down Expand Up @@ -365,6 +367,7 @@ EvoTreeCount is used to perform Poisson probabilistic regression on count target
- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.
- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.
- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:poisson`, `:mae`, `:mse`.
- `device="cpu"`: Hardware device to use for computations. Only CPU is supported at the moment.
Expand Down Expand Up @@ -485,6 +488,8 @@ EvoTreeGaussian is used to perform Gaussain probabilistic regression, fitting μ
- `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
- `nbins=32`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).
!Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.
- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
- `metric::Symbol=:none`: Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`.
- `device="cpu"`: Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`.
Expand Down
113 changes: 11 additions & 102 deletions src/find_split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,6 @@ function binarize(X, edges)
return X_bin
end

"""
Non Allocating split_set!
Take a view into left and right placeholders. Right ids are assigned at the end of the length of the current node set.
"""
# function split_set!(left::V, right::V, 𝑖, X_bin::Matrix{S}, feat, cond_bin::S, offset) where {S,V}
# left_count = 0
# right_count = 0
# @inbounds for i in 1:length(𝑖)
# @inbounds if X_bin[𝑖[i], feat] <= cond_bin
# left_count += 1
# left[offset + left_count] = 𝑖[i]
# else
# right[offset + length(𝑖) - right_count] = 𝑖[i]
# right_count += 1
# end
# end
# return (view(left, (offset + 1):(offset + left_count)), view(right, (offset + length(𝑖)):-1:(offset + left_count + 1)))
# end

"""
Multi-threaded split_set!
Take a view into left and right placeholders. Right ids are assigned at the end of the length of the current node set.
Expand Down Expand Up @@ -127,7 +108,7 @@ function update_hist!(
𝑖::AbstractVector{S},
𝑗::AbstractVector{S}, K) where {L<:GradientRegression,T,S}

@inbounds @threads for j in 𝑗
@threads for j in 𝑗
@inbounds @simd for i in 𝑖
hid = 3 * X_bin[i, j] - 2
hist[j][hid] += δ𝑤[1, i]
Expand All @@ -150,7 +131,7 @@ function update_hist!(
𝑖::AbstractVector{S},
𝑗::AbstractVector{S}, K) where {L<:GaussianRegression,T,S}

@inbounds @threads for j in 𝑗
@threads for j in 𝑗
@inbounds @simd for i in 𝑖
hid = 5 * X_bin[i, j] - 4
hist[j][hid] += δ𝑤[1, i]
Expand All @@ -175,10 +156,10 @@ function update_hist!(
𝑖::AbstractVector{S},
𝑗::AbstractVector{S}, K) where {L,T,S}

@inbounds @threads for j in 𝑗
@inbounds @simd for i in 𝑖
@threads for j in 𝑗
@inbounds for i in 𝑖
hid = (2 * K + 1) * (X_bin[i, j] - 1)
for k = 1:(2*K+1)
for k in 1:(2*K+1)
hist[j][hid+k] += δ𝑤[k, i]
end
end
Expand All @@ -188,85 +169,13 @@ end


"""
update_gains!
GradientRegression
"""
function update_gains!(
loss::L,
node::TrainNode{T},
𝑗::Vector{S},
params::EvoTypes, K, monotone_constraints) where {L<:GradientRegression,T,S}

@inbounds @threads for j in 𝑗
node.hL[j][1] = node.h[j][1]
node.hL[j][2] = node.h[j][2]
node.hL[j][3] = node.h[j][3]

node.hR[j][1] = node.∑[1] - node.h[j][1]
node.hR[j][2] = node.∑[2] - node.h[j][2]
node.hR[j][3] = node.∑[3] - node.h[j][3]
@inbounds for bin = 2:params.nbins
binid = 3 * bin - 2
node.hL[j][binid] = node.hL[j][binid-3] + node.h[j][binid]
node.hL[j][binid+1] = node.hL[j][binid-2] + node.h[j][binid+1]
node.hL[j][binid+2] = node.hL[j][binid-1] + node.h[j][binid+2]

node.hR[j][binid] = node.hR[j][binid-3] - node.h[j][binid]
node.hR[j][binid+1] = node.hR[j][binid-2] - node.h[j][binid+1]
node.hR[j][binid+2] = node.hR[j][binid-1] - node.h[j][binid+2]

end
hist_gains_cpu!(loss, view(node.gains, :, j), node.hL[j], node.hR[j], params, K, monotone_constraints[j])
end
return nothing
end

"""
update_gains!
GaussianRegression
"""
function update_gains!(
loss::L,
node::TrainNode{T},
𝑗::Vector{S},
params::EvoTypes, K, monotone_constraints) where {L<:GaussianRegression,T,S}

@inbounds @threads for j in 𝑗
node.hL[j][1] = node.h[j][1]
node.hL[j][2] = node.h[j][2]
node.hL[j][3] = node.h[j][3]
node.hL[j][4] = node.h[j][4]
node.hL[j][5] = node.h[j][5]

node.hR[j][1] = node.∑[1] - node.h[j][1]
node.hR[j][2] = node.∑[2] - node.h[j][2]
node.hR[j][3] = node.∑[3] - node.h[j][3]
node.hR[j][4] = node.∑[4] - node.h[j][4]
node.hR[j][5] = node.∑[5] - node.h[j][5]
@inbounds for bin in 2:params.nbins
binid = 5 * bin - 4
node.hL[j][binid] = node.hL[j][binid-5] + node.h[j][binid]
node.hL[j][binid+1] = node.hL[j][binid-4] + node.h[j][binid+1]
node.hL[j][binid+2] = node.hL[j][binid-3] + node.h[j][binid+2]
node.hL[j][binid+3] = node.hL[j][binid-2] + node.h[j][binid+3]
node.hL[j][binid+4] = node.hL[j][binid-1] + node.h[j][binid+4]

node.hR[j][binid] = node.hR[j][binid-5] - node.h[j][binid]
node.hR[j][binid+1] = node.hR[j][binid-4] - node.h[j][binid+1]
node.hR[j][binid+2] = node.hR[j][binid-3] - node.h[j][binid+2]
node.hR[j][binid+3] = node.hR[j][binid-2] - node.h[j][binid+3]
node.hR[j][binid+4] = node.hR[j][binid-1] - node.h[j][binid+4]
update_gains!(
loss::L,
node::TrainNode{T},
𝑗::Vector{S},
params::EvoTypes, K, monotone_constraints) where {L,T,S}
end
hist_gains_cpu!(loss, view(node.gains, :, j), node.hL[j], node.hR[j], params, K, monotone_constraints[j])
end
return nothing
end


"""
update_gains!
Generic fallback
Generic fallback
"""
function update_gains!(
loss::L,
Expand Down
4 changes: 2 additions & 2 deletions src/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ function init_evotree(params::EvoTypes{T,U,S}, X::AbstractMatrix, Y::AbstractVec

# assign monotone contraints in constraints vector
monotone_constraints = zeros(Int32, X_size[2])
isdefined(params, :monotone_constraint) && for (k, v) in params.monotone_constraints
hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints
monotone_constraints[k] = v
end

Expand Down Expand Up @@ -147,7 +147,7 @@ function grow_tree!(
offset = 0 # identifies breakpoint for each node set within a depth

if depth < params.max_depth
for n_id 1:length(n_current)
for n_id in eachindex(n_current)
n = n_current[n_id]
if n_id % 2 == 0
if n % 2 == 0
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/fit_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function init_evotree_gpu(params::EvoTypes{T,U,S},

# assign monotone contraints in constraints vector
monotone_constraints = zeros(Int32, X_size[2])
isdefined(params, :monotone_constraint) && for (k, v) in params.monotone_constraints
hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints
monotone_constraints[k] = v
end

Expand Down Expand Up @@ -130,7 +130,7 @@ function grow_tree_gpu!(
while length(n_current) > 0 && depth <= params.max_depth
offset = 0 # identifies breakpoint for each node set within a depth
if depth < params.max_depth
for n_id 1:length(n_current)
for n_id in eachindex(n_current)
n = n_current[n_id]
if n_id % 2 == 0
if n % 2 == 0
Expand Down
2 changes: 1 addition & 1 deletion src/models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ function EvoTreeGaussian(; kwargs...)

args[:rng] = mk_rng(args[:rng])::Random.AbstractRNG

model = EvoTreeCount(
model = EvoTreeGaussian(
args[:loss],
args[:nrounds],
args[:T](args[:lambda]),
Expand Down
2 changes: 1 addition & 1 deletion src/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ function pred_scalar_cpu!(::S, ∑::Vector{T}, params::EvoTypes, K) where {S<:Ga
end

# prediction in Leaf - MultiClassRegression
function pred_leaf_cpu!(::S, pred, n, ∑::Vector{T}, params::EvoTypes, K) where {S<:MultiClassRegression,T}
function pred_leaf_cpu!(::S, pred, n, ∑::Vector{T}, params::EvoTypes, K, δ𝑤, 𝑖) where {S<:MultiClassRegression,T}
@inbounds for k = 1:K
pred[k, n] = -params.eta * ∑[k] / (∑[k+K] + params.lambda * ∑[2*K+1])
end
Expand Down
Loading

0 comments on commit 118ac35

Please sign in to comment.