Merge pull request #202 from Evovest/split-fix

Split fix
Evovest · Dec 25, 2022 · 717304d · 717304d · jeremiedb · Dec 26, 2022
2 parents 61e7085 + 701efd9
commit 717304d
Show file tree

Hide file tree

Showing 14 changed files with 156 additions and 44 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "EvoTrees"
 uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 authors = ["jeremiedb <[email protected]>"]
-version = "0.14.2"
+version = "0.14.3"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"

diff --git a/experiments/benchmarks-regressor.jl b/experiments/benchmarks-regressor.jl
@@ -43,14 +43,14 @@ params_xgb = Dict(
     :max_bin => 64,
 )
 
-dtrain = DMatrix(x_train, y_train .- 1)
-watchlist = Dict("train" => DMatrix(x_train, y_train .- 1))
-@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
+# dtrain = DMatrix(x_train, y_train .- 1)
+# watchlist = Dict("train" => DMatrix(x_train, y_train .- 1))
 # @time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
-@btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
-@info "xgboost predict:"
-@time pred_xgb = XGBoost.predict(m_xgb, x_train);
-@btime XGBoost.predict($m_xgb, $x_train);
+# # @time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
+# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
+# @info "xgboost predict:"
+# @time pred_xgb = XGBoost.predict(m_xgb, x_train);
+# @btime XGBoost.predict($m_xgb, $x_train);
 
 # @info "lightgbm train:"
 # m_gbm = LGBMRegression(
@@ -87,7 +87,7 @@ watchlist = Dict("train" => DMatrix(x_train, y_train .- 1))
 # @time pred_gbm = LightGBM.predict(m_gbm, x_train) |> vec
 
 @info "evotrees train CPU:"
-# EvoTrees params
+# # EvoTrees params
 params_evo = EvoTreeRegressor(
     T=T,
     loss=loss_evo,
@@ -103,14 +103,14 @@ params_evo = EvoTreeRegressor(
     nbins=64,
     rng = 123,
 )
-params_evo.device = "cpu"
-@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
+# params_evo.device = "cpu"
 # @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
-# @time m_evo = fit_evotree(params_evo; x_train, y_train);
-@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo);
-@info "evotrees predict CPU:"
-@time pred_evo = EvoTrees.predict(m_evo, x_train);
-@btime EvoTrees.predict($m_evo, $x_train);
+# # @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
+# # @time m_evo = fit_evotree(params_evo; x_train, y_train);
+# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo);
+# @info "evotrees predict CPU:"
+# @time pred_evo = EvoTrees.predict(m_evo, x_train);
+# @btime EvoTrees.predict($m_evo, $x_train);
 
 @info "evotrees train GPU:"
 params_evo.device = "gpu"

diff --git a/experiments/debug-softmax-split.jl b/experiments/debug-softmax-split.jl
@@ -0,0 +1,126 @@
+using Revise
+using Statistics
+using StatsBase: sample
+using EvoTrees
+using BenchmarkTools
+
+using CSV, DataFrames, MLJBase, EvoTrees
+using StableRNGs
+
+data = CSV.read(joinpath(@__DIR__, "..", "data", "debug", "pb_data.csv"), DataFrame)
+y = categorical(data.target)
+X = data[!, Not(:target)]
+
+train, test = MLJBase.train_test_pairs(Holdout(), 1:size(X, 1), X, y)[1]
+rng = StableRNG(6)
+model = EvoTreeClassifier(nrounds = 5, lambda = 1e-5, max_depth = 7, rng = rng)
+Xtrain, ytrain = MLJBase.reformat(model, selectrows(X, train), selectrows(y, train))
+# MLJBase.fit(model, 1, Xtrain, ytrain);
+
+# EvoTrees params
+rng = StableRNG(6)
+params_evo = EvoTreeClassifier(;
+    T = Float32,
+    nrounds = 200,
+    lambda = 0.0,
+    gamma = 0.0,
+    eta = 0.1,
+    max_depth = 7,
+    min_weight = 1.0,
+    rowsample = 1.0,
+    colsample = 1.0,
+    nbins = 64,
+    rng,
+)
+
+using CategoricalArrays
+x_train = Xtrain[:matrix]
+y_train = CategoricalArrays.levelcode.(ytrain)
+
+mean(y_train)
+sum(ytrain .== true) ./ length(y_train)
+
+@info "evotrees train CPU:"
+params_evo.device = "cpu"
+@time m_evo = fit_evotree(params_evo; x_train, y_train);
+# @time m_evo = fit_evotree(params_evo; x_train, y_train);
+# @time m_evo = fit_evotree(params_evo; x_train, y_train);
+
+# function h1(h, hL, hR, ∑, K, nbins)
+#     KK = 2 * K + 1
+#     @inbounds for j in js
+#         @inbounds for k = 1:KK
+#             val = h[k, 1, j]
+#             hL[k, 1, j] = val
+#             hR[k, 1, j] = ∑[k, j] - val
+#         end
+#         @inbounds for bin = 2:nbins
+#             @inbounds for k = 1:KK
+#                 val = h[k, bin, j]
+#                 hL[k, bin, j] = hL[k, bin-1, j] + val
+#                 hR[k, bin, j] = hR[k, bin-1, j] - val
+#             end
+#         end
+#     end
+#     return hR
+# end
+
+# function h2(h, hL, hR, nbins)
+#     cumsum!(hL, h, dims = 2)
+#     hR .= view(hL, :, nbins:nbins, :) .- hL
+#     return hR
+# end
+
+# nbins = 64
+# js = 12
+# K = 2
+# h = rand(2*K+1, nbins, js)
+# hL = zeros(2*K+1, nbins, js)
+# hR = zeros(2*K+1, nbins, js)
+# ∑ = dropdims(sum(h, dims=2), dims=2)
+
+# x1 = h1(h, hL, hR, ∑, K, nbins)
+# x2 = h2(h, hL, hR, nbins)
+
+# minimum(x1 .- x2)
+# maximum(x1 .- x2)
+
+mutable struct Node
+    h
+    hL
+    hR
+end
+
+function h1_A(node, K, nbins)
+
+    KK = 2 * K + 1
+    h = node.h
+    hL = node.hL
+    hR = node.hR
+    ∑ = node.∑
+
+    hL = copy(hL)
+    hR = copy(hR)
+    @inbounds for j in js
+        @inbounds for k = 1:KK
+            val = h[k, 1, j]
+            hL[k, 1, j] = val
+            hR[k, 1, j] = ∑[k] - val
+        end
+        @inbounds for bin = 2:nbins
+            @inbounds for k = 1:KK
+                val = h[k, bin, j]
+                hL[k, bin, j] = hL[k, bin-1, j] + val
+                hR[k, bin, j] = hR[k, bin-1, j] - val
+            end
+        end
+    end
+
+    hL2 = copy(hL)
+    hR2 = copy(hR)
+    cumsum!(hL2, h, dims = 2)
+    hR2 .= view(hL2, :, nbins:nbins, :) .- hL2
+
+    @info "max abs diff hR" maximum(abs.(hR[3,:,:] .- hR2[3,:,:]))
+    return nothing
+end
diff --git a/experiments/readme_plots_gpu.jl b/experiments/readme_plots_gpu.jl
@@ -140,4 +140,4 @@ plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linew
 plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
 plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
 plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
-savefig("figures/gaussian-sinus-gpu.png")
+savefig("figures/gaussian-sinus-gpu.png")
diff --git a/figures/gaussian-sinus-gpu.png b/figures/gaussian-sinus-gpu.png
diff --git a/figures/gaussian-sinus.png b/figures/gaussian-sinus.png
diff --git a/figures/logistic-sinus.png b/figures/logistic-sinus.png
diff --git a/figures/quantiles_sinus.png b/figures/quantiles_sinus.png
diff --git a/figures/regression_sinus.png b/figures/regression_sinus.png
diff --git a/figures/regression_sinus2.png b/figures/regression_sinus2.png
diff --git a/figures/regression_sinus_gpu.png b/figures/regression_sinus_gpu.png
diff --git a/src/find_split.jl b/src/find_split.jl
@@ -110,6 +110,11 @@ function split_set_threads!(
     lefts = zeros(Int, nblocks)
     rights = zeros(Int, nblocks)
 
+    # @info "length(is)" length(is)
+    # @info "offset" offset
+    # @info "chunk_size" chunk_size
+    # @info "nblocks" nblocks
+
     @threads for bid = 1:nblocks
         lefts[bid], rights[bid] = split_set_chunk!(
             left,
@@ -236,31 +241,16 @@ function update_gains!(
     node::TrainNode,
     js::Vector,
     params::EvoTypes{L,T},
-    K,
     monotone_constraints,
 ) where {L,T}
 
-    KK = 2 * K + 1
-    hL = node.hL
     h = node.h
     hL = node.hL
     hR = node.hR
     gains = node.gains
 
-    @inbounds for j in js
-        @inbounds for k = 1:KK
-            val = h[k, 1, j]
-            hL[k, 1, j] = val
-            hR[k, 1, j] = node.∑[k] - val
-        end
-        @inbounds for bin = 2:params.nbins
-            @inbounds for k = 1:KK
-                val = h[k, bin, j]
-                hL[k, bin, j] = hL[k, bin-1, j] + val
-                hR[k, bin, j] = hR[k, bin-1, j] - val
-            end
-        end
-    end
+    cumsum!(hL, h, dims = 2)
+    hR .= view(hL, :, params.nbins:params.nbins, :) .- hL
 
     @inbounds for j in js
         monotone_constraint = monotone_constraints[j]
@@ -281,6 +271,5 @@ function update_gains!(
             end
         end
     end
-
     return nothing
 end
diff --git a/src/fit.jl b/src/fit.jl
@@ -177,7 +177,7 @@ function grow_tree!(
     @threads for n in nodes
         n.h .= 0
         n.∑ .= 0
-        n.gain = 0
+        n.gain = T(0)
         n.gains .= 0
     end
 
@@ -192,7 +192,7 @@ function grow_tree!(
     # grow while there are remaining active nodes
     while length(n_current) > 0 && depth <= params.max_depth
         offset = 0 # identifies breakpoint for each node set within a depth
-
+        
         if depth < params.max_depth
             for n_id in eachindex(n_current)
                 n = n_current[n_id]
@@ -212,8 +212,7 @@ function grow_tree!(
             if depth == params.max_depth || nodes[n].∑[end] <= params.min_weight
                 pred_leaf_cpu!(tree.pred, n, nodes[n].∑, params, ∇, nodes[n].is)
             else
-                # histogram subtraction
-                update_gains!(nodes[n], js, params, K, monotone_constraints)
+                update_gains!(nodes[n], js, params, monotone_constraints)
                 best = findmax(nodes[n].gains)
                 if best[2][1] != params.nbins && best[1] > nodes[n].gain + params.gamma
                     tree.gain[n] = best[1] - nodes[n].gain
@@ -252,7 +251,6 @@ function grow_tree!(
                         push!(n_next, n << 1)
                     end
                     popfirst!(n_next)
-                    # println("n_next split post: ", n, " | ", n_next)
                 end
             end
         end

diff --git a/src/gpu/find_split_gpu.jl b/src/gpu/find_split_gpu.jl
@@ -182,14 +182,13 @@ function update_gains!(
     node::TrainNodeGPU,
     js::AbstractVector,
     params::EvoTypes{L,T},
-    monotone_constraints;
-    MAX_THREADS=512
+    monotone_constraints,
 ) where {L,T}
 
-    cumsum!(node.hL, node.h, dims=2)
+    cumsum!(node.hL, node.h, dims = 2)
     node.hR .= view(node.hL, :, params.nbins:params.nbins, :) .- node.hL
 
-    threads = min(params.nbins, MAX_THREADS)
+    threads = params.nbins
     blocks = length(js)
     @cuda blocks = blocks threads = threads update_gains_kernel!(
         node.gains,