Skip to content

Commit

Permalink
Merge pull request #202 from Evovest/split-fix
Browse files Browse the repository at this point in the history
Split fix
  • Loading branch information
jeremiedb authored Dec 25, 2022
2 parents 61e7085 + 701efd9 commit 717304d
Show file tree
Hide file tree
Showing 14 changed files with 156 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "EvoTrees"
uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
authors = ["jeremiedb <[email protected]>"]
version = "0.14.2"
version = "0.14.3"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand Down
30 changes: 15 additions & 15 deletions experiments/benchmarks-regressor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ params_xgb = Dict(
:max_bin => 64,
)

dtrain = DMatrix(x_train, y_train .- 1)
watchlist = Dict("train" => DMatrix(x_train, y_train .- 1))
@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
# dtrain = DMatrix(x_train, y_train .- 1)
# watchlist = Dict("train" => DMatrix(x_train, y_train .- 1))
# @time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
@btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
@info "xgboost predict:"
@time pred_xgb = XGBoost.predict(m_xgb, x_train);
@btime XGBoost.predict($m_xgb, $x_train);
# # @time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
# @info "xgboost predict:"
# @time pred_xgb = XGBoost.predict(m_xgb, x_train);
# @btime XGBoost.predict($m_xgb, $x_train);

# @info "lightgbm train:"
# m_gbm = LGBMRegression(
Expand Down Expand Up @@ -87,7 +87,7 @@ watchlist = Dict("train" => DMatrix(x_train, y_train .- 1))
# @time pred_gbm = LightGBM.predict(m_gbm, x_train) |> vec

@info "evotrees train CPU:"
# EvoTrees params
# # EvoTrees params
params_evo = EvoTreeRegressor(
T=T,
loss=loss_evo,
Expand All @@ -103,14 +103,14 @@ params_evo = EvoTreeRegressor(
nbins=64,
rng = 123,
)
params_evo.device = "cpu"
@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
# params_evo.device = "cpu"
# @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
# @time m_evo = fit_evotree(params_evo; x_train, y_train);
@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo);
@info "evotrees predict CPU:"
@time pred_evo = EvoTrees.predict(m_evo, x_train);
@btime EvoTrees.predict($m_evo, $x_train);
# # @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
# # @time m_evo = fit_evotree(params_evo; x_train, y_train);
# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo);
# @info "evotrees predict CPU:"
# @time pred_evo = EvoTrees.predict(m_evo, x_train);
# @btime EvoTrees.predict($m_evo, $x_train);

@info "evotrees train GPU:"
params_evo.device = "gpu"
Expand Down
126 changes: 126 additions & 0 deletions experiments/debug-softmax-split.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
using Revise
using Statistics
using StatsBase: sample
using EvoTrees
using BenchmarkTools

using CSV, DataFrames, MLJBase, EvoTrees
using StableRNGs

data = CSV.read(joinpath(@__DIR__, "..", "data", "debug", "pb_data.csv"), DataFrame)
y = categorical(data.target)
X = data[!, Not(:target)]

train, test = MLJBase.train_test_pairs(Holdout(), 1:size(X, 1), X, y)[1]
rng = StableRNG(6)
model = EvoTreeClassifier(nrounds = 5, lambda = 1e-5, max_depth = 7, rng = rng)
Xtrain, ytrain = MLJBase.reformat(model, selectrows(X, train), selectrows(y, train))
# MLJBase.fit(model, 1, Xtrain, ytrain);

# EvoTrees params
rng = StableRNG(6)
params_evo = EvoTreeClassifier(;
T = Float32,
nrounds = 200,
lambda = 0.0,
gamma = 0.0,
eta = 0.1,
max_depth = 7,
min_weight = 1.0,
rowsample = 1.0,
colsample = 1.0,
nbins = 64,
rng,
)

using CategoricalArrays
x_train = Xtrain[:matrix]
y_train = CategoricalArrays.levelcode.(ytrain)

mean(y_train)
sum(ytrain .== true) ./ length(y_train)

@info "evotrees train CPU:"
params_evo.device = "cpu"
@time m_evo = fit_evotree(params_evo; x_train, y_train);
# @time m_evo = fit_evotree(params_evo; x_train, y_train);
# @time m_evo = fit_evotree(params_evo; x_train, y_train);

# function h1(h, hL, hR, ∑, K, nbins)
# KK = 2 * K + 1
# @inbounds for j in js
# @inbounds for k = 1:KK
# val = h[k, 1, j]
# hL[k, 1, j] = val
# hR[k, 1, j] = ∑[k, j] - val
# end
# @inbounds for bin = 2:nbins
# @inbounds for k = 1:KK
# val = h[k, bin, j]
# hL[k, bin, j] = hL[k, bin-1, j] + val
# hR[k, bin, j] = hR[k, bin-1, j] - val
# end
# end
# end
# return hR
# end

# function h2(h, hL, hR, nbins)
# cumsum!(hL, h, dims = 2)
# hR .= view(hL, :, nbins:nbins, :) .- hL
# return hR
# end

# nbins = 64
# js = 12
# K = 2
# h = rand(2*K+1, nbins, js)
# hL = zeros(2*K+1, nbins, js)
# hR = zeros(2*K+1, nbins, js)
# ∑ = dropdims(sum(h, dims=2), dims=2)

# x1 = h1(h, hL, hR, ∑, K, nbins)
# x2 = h2(h, hL, hR, nbins)

# minimum(x1 .- x2)
# maximum(x1 .- x2)

mutable struct Node
h
hL
hR
end

function h1_A(node, K, nbins)

KK = 2 * K + 1
h = node.h
hL = node.hL
hR = node.hR
= node.∑

hL = copy(hL)
hR = copy(hR)
@inbounds for j in js
@inbounds for k = 1:KK
val = h[k, 1, j]
hL[k, 1, j] = val
hR[k, 1, j] = ∑[k] - val
end
@inbounds for bin = 2:nbins
@inbounds for k = 1:KK
val = h[k, bin, j]
hL[k, bin, j] = hL[k, bin-1, j] + val
hR[k, bin, j] = hR[k, bin-1, j] - val
end
end
end

hL2 = copy(hL)
hR2 = copy(hR)
cumsum!(hL2, h, dims = 2)
hR2 .= view(hL2, :, nbins:nbins, :) .- hL2

@info "max abs diff hR" maximum(abs.(hR[3,:,:] .- hR2[3,:,:]))
return nothing
end
2 changes: 1 addition & 1 deletion experiments/readme_plots_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,4 @@ plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linew
plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
savefig("figures/gaussian-sinus-gpu.png")
savefig("figures/gaussian-sinus-gpu.png")
Binary file modified figures/gaussian-sinus-gpu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/gaussian-sinus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/logistic-sinus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/quantiles_sinus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/regression_sinus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/regression_sinus2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/regression_sinus_gpu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 7 additions & 18 deletions src/find_split.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ function split_set_threads!(
lefts = zeros(Int, nblocks)
rights = zeros(Int, nblocks)

# @info "length(is)" length(is)
# @info "offset" offset
# @info "chunk_size" chunk_size
# @info "nblocks" nblocks

@threads for bid = 1:nblocks
lefts[bid], rights[bid] = split_set_chunk!(
left,
Expand Down Expand Up @@ -236,31 +241,16 @@ function update_gains!(
node::TrainNode,
js::Vector,
params::EvoTypes{L,T},
K,
monotone_constraints,
) where {L,T}

KK = 2 * K + 1
hL = node.hL
h = node.h
hL = node.hL
hR = node.hR
gains = node.gains

@inbounds for j in js
@inbounds for k = 1:KK
val = h[k, 1, j]
hL[k, 1, j] = val
hR[k, 1, j] = node.∑[k] - val
end
@inbounds for bin = 2:params.nbins
@inbounds for k = 1:KK
val = h[k, bin, j]
hL[k, bin, j] = hL[k, bin-1, j] + val
hR[k, bin, j] = hR[k, bin-1, j] - val
end
end
end
cumsum!(hL, h, dims = 2)
hR .= view(hL, :, params.nbins:params.nbins, :) .- hL

@inbounds for j in js
monotone_constraint = monotone_constraints[j]
Expand All @@ -281,6 +271,5 @@ function update_gains!(
end
end
end

return nothing
end
8 changes: 3 additions & 5 deletions src/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ function grow_tree!(
@threads for n in nodes
n.h .= 0
n.∑ .= 0
n.gain = 0
n.gain = T(0)
n.gains .= 0
end

Expand All @@ -192,7 +192,7 @@ function grow_tree!(
# grow while there are remaining active nodes
while length(n_current) > 0 && depth <= params.max_depth
offset = 0 # identifies breakpoint for each node set within a depth

if depth < params.max_depth
for n_id in eachindex(n_current)
n = n_current[n_id]
Expand All @@ -212,8 +212,7 @@ function grow_tree!(
if depth == params.max_depth || nodes[n].∑[end] <= params.min_weight
pred_leaf_cpu!(tree.pred, n, nodes[n].∑, params, ∇, nodes[n].is)
else
# histogram subtraction
update_gains!(nodes[n], js, params, K, monotone_constraints)
update_gains!(nodes[n], js, params, monotone_constraints)
best = findmax(nodes[n].gains)
if best[2][1] != params.nbins && best[1] > nodes[n].gain + params.gamma
tree.gain[n] = best[1] - nodes[n].gain
Expand Down Expand Up @@ -252,7 +251,6 @@ function grow_tree!(
push!(n_next, n << 1)
end
popfirst!(n_next)
# println("n_next split post: ", n, " | ", n_next)
end
end
end
Expand Down
7 changes: 3 additions & 4 deletions src/gpu/find_split_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,13 @@ function update_gains!(
node::TrainNodeGPU,
js::AbstractVector,
params::EvoTypes{L,T},
monotone_constraints;
MAX_THREADS=512
monotone_constraints,
) where {L,T}

cumsum!(node.hL, node.h, dims=2)
cumsum!(node.hL, node.h, dims = 2)
node.hR .= view(node.hL, :, params.nbins:params.nbins, :) .- node.hL

threads = min(params.nbins, MAX_THREADS)
threads = params.nbins
blocks = length(js)
@cuda blocks = blocks threads = threads update_gains_kernel!(
node.gains,
Expand Down

2 comments on commit 717304d

@jeremiedb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/74652

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.14.3 -m "<description of version>" 717304dd7cc4379b7b77521c2ca470f98fbe2383
git push origin v0.14.3

Please sign in to comment.