diff --git a/experiments/benchmarks_v2.jl b/experiments/benchmarks_v2.jl index 5a20465e..79a74c33 100644 --- a/experiments/benchmarks_v2.jl +++ b/experiments/benchmarks_v2.jl @@ -58,31 +58,32 @@ num_feat = Int(100) x_train = rand(nobs, num_feat) y_train = rand(size(x_train, 1)) -@info "xgboost train:" -@time m_xgb = xgboost(x_train, nrounds, label=y_train, param=params_xgb, metrics=metrics, nthread=nthread, silent=1); -@btime xgboost($x_train, $nrounds, label=$y_train, param=$params_xgb, metrics=$metrics, nthread=$nthread, silent=1); -@info "xgboost predict:" -@time pred_xgb = XGBoost.predict(m_xgb, x_train); -@btime XGBoost.predict($m_xgb, $x_train); +# @info "xgboost train:" +# @time m_xgb = xgboost(x_train, nrounds, label=y_train, param=params_xgb, metrics=metrics, nthread=nthread, silent=1); +# @btime xgboost($x_train, $nrounds, label=$y_train, param=$params_xgb, metrics=$metrics, nthread=$nthread, silent=1); +# @info "xgboost predict:" +# @time pred_xgb = XGBoost.predict(m_xgb, x_train); +# @btime XGBoost.predict($m_xgb, $x_train); -@info "evotrees train CPU:" -params_evo.device = "cpu" -@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=50); -@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo); -@info "evotrees predict CPU:" -@time pred_evo = EvoTrees.predict(m_evo, x_train); -@btime EvoTrees.predict($m_evo, $x_train); +# @info "evotrees train CPU:" +# params_evo.device = "cpu" +# @time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100); +# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo); +# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train); +# @info "evotrees predict CPU:" +# @time pred_evo = EvoTrees.predict(m_evo, x_train); +# @btime EvoTrees.predict($m_evo, $x_train); CUDA.allowscalar(true) @info "evotrees train GPU:" params_evo.device = "gpu" @time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); -@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=50); -@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo); -@info "evotrees predict GPU:" -@time pred_evo = EvoTrees.predict(m_evo_gpu, x_train); -@btime EvoTrees.predict($m_evo_gpu, $x_train); - -# w_train = ones(length(y_train)) -# @time m_evo_gpu = fit_evotree(params_evo, x_train, y_train); -# @time m_evo_gpu = fit_evotree(params_evo, x_train, y_train, w_train); \ No newline at end of file +@time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); +@time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); +@time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); +@time m_evo_gpu = fit_evotree(params_evo; x_train, y_train); +@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100); +# @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo); +# @info "evotrees predict GPU:" +# @time pred_evo = EvoTrees.predict(m_evo_gpu, x_train); +# @btime EvoTrees.predict($m_evo_gpu, $x_train); \ No newline at end of file diff --git a/experiments/random.jl b/experiments/random.jl index e02f333e..63e93c49 100644 --- a/experiments/random.jl +++ b/experiments/random.jl @@ -33,7 +33,7 @@ params1 = EvoTreeRegressor(T=Float32, # asus laptopt: for 1.25e6 no eval: 9.650007 seconds (893.53 k allocations: 2.391 GiB, 5.52% gc time) @time model = fit_evotree(params1; x_train, y_train); -@time model = fit_evotree(params1; x_train, y_train, metric=:mse, x_eval, y_eval, print_every_n=10); +@time model = fit_evotree(params1; x_train, y_train, metric=:mse, x_eval, y_eval, print_every_n=100); @btime model = fit_evotree(params1; x_train, y_train); @time pred_train = predict(model, x_train); @btime pred_train = predict(model, x_train); @@ -77,7 +77,7 @@ params1 = EvoTreeGaussian(T=Float32, # train model params1 = EvoTreeRegressor(T=Float32, loss=:linear, metric=:mse, - nrounds=10, + nrounds=100, lambda=1.0, gamma=0, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=0.5, nbins=64, @@ -86,7 +86,7 @@ params1 = EvoTreeRegressor(T=Float32, # Asus laptop: 10.015568 seconds (13.80 M allocations: 1.844 GiB, 4.00% gc time) @time model = EvoTrees.fit_evotree(params1; x_train, y_train); @btime model = EvoTrees.fit_evotree(params1; x_train, y_train); -@time model, cache = EvoTrees.init_evotree_gpu(params1, X_train, Y_train); +@time model, cache = EvoTrees.init_evotree_gpu(params1; x_train, y_train); @time EvoTrees.grow_evotree!(model, cache); using MLJBase @@ -118,14 +118,14 @@ params1 = EvoTreeRegressor(T=Float32, # GPU - Gaussian ################################ params1 = EvoTreeGaussian(T=Float32, - loss=:gaussian, metric=:gaussian, + loss=:gaussian, nrounds=100, lambda=1.0, gamma=0, eta=0.1, max_depth=6, min_weight=1.0, rowsample=0.5, colsample=0.5, nbins=32, device="gpu") # Asus laptop: 14.304369 seconds (24.81 M allocations: 2.011 GiB, 1.90% gc time) -@time model = EvoTrees.fit_evotree(params1, X_train, Y_train); +@time model = EvoTrees.fit_evotree(params1; x_train, y_train); # Auss laptop: 1.888472 seconds (8.40 k allocations: 1.613 GiB, 14.86% gc time) @time model, cache = EvoTrees.init_evotree(params1, X_train, Y_train); diff --git a/src/find_split.jl b/src/find_split.jl index 907d5290..7e233a2c 100644 --- a/src/find_split.jl +++ b/src/find_split.jl @@ -298,8 +298,10 @@ function hist_gains_cpu!( if bin == params.nbins gains[bin] = hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) / 2 elseif hL[i+2] > params.min_weight && hR[i+2] > params.min_weight - predL = pred_scalar_cpu!(hL[i:i+2], params, K) - predR = pred_scalar_cpu!(hR[i:i+2], params, K) + if monotone_constraint != 0 + predL = pred_scalar_cpu!(view(hL, i:i+2), params, K) + predR = pred_scalar_cpu!(view(hR, i:i+2), params, K) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) @@ -362,8 +364,10 @@ function hist_gains_cpu!( hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4]) ) / 2 elseif hL[i+4] > params.min_weight && hR[i+4] > params.min_weight - predL = pred_scalar_cpu!(hL[i:i+4], params, K) - predR = pred_scalar_cpu!(hR[i:i+4], params, K) + if monotone_constraint != 0 + predL = pred_scalar_cpu!(view(hL, i:i+4), params, K) + predR = pred_scalar_cpu!(view(hR, i:i+4), params, K) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) diff --git a/src/fit.jl b/src/fit.jl index fe7ff45a..abbe9ee2 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -94,7 +94,7 @@ function init_evotree( # assign monotone contraints in constraints vector monotone_constraints = zeros(Int32, x_size[2]) - hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints + hasproperty(params, :monotone_constraints) && for (k, v) in params.monotone_constraints monotone_constraints[k] = v end diff --git a/src/gpu/find_split_gpu.jl b/src/gpu/find_split_gpu.jl index b6a29678..833d8749 100644 --- a/src/gpu/find_split_gpu.jl +++ b/src/gpu/find_split_gpu.jl @@ -237,8 +237,10 @@ function hist_gains_gpu_kernel!(gains::CuDeviceMatrix{T}, hL::CuDeviceArray{T,3} if i == nbins gains[i, j] = hL[1, i, j]^2 / (hL[2, i, j] + lambda * hL[3, i, j]) / 2 elseif hL[3, i, j] > min_weight && hR[3, i, j] > min_weight - predL = -hL[1, i, j] / (hL[2, i, j] + lambda * hL[3, i, j]) - predR = -hR[1, i, j] / (hR[2, i, j] + lambda * hR[3, i, j]) + if monotone_constraint != 0 + predL = -hL[1, i, j] / (hL[2, i, j] + lambda * hL[3, i, j]) + predR = -hR[1, i, j] / (hR[2, i, j] + lambda * hR[3, i, j]) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) @@ -281,8 +283,10 @@ function hist_gains_gpu_kernel_gauss!(gains::CuDeviceMatrix{T}, hL::CuDeviceArra if i == nbins gains[i, j] = (hL[1, i, j]^2 / (hL[3, i, j] + lambda * hL[5, i, j]) + hL[2, i, j]^2 / (hL[4, i, j] + lambda * hL[5, i, j])) / 2 elseif hL[5, i, j] > min_weight && hR[5, i, j] > min_weight - predL = -hL[1, i, j] / (hL[3, i, j] + lambda * hL[5, i, j]) - predR = -hR[1, i, j] / (hR[3, i, j] + lambda * hR[5, i, j]) + if monotone_constraint != 0 + predL = -hL[1, i, j] / (hL[3, i, j] + lambda * hL[5, i, j]) + predR = -hR[1, i, j] / (hR[3, i, j] + lambda * hR[5, i, j]) + end if (monotone_constraint == 0) || (monotone_constraint == -1 && predL > predR) || (monotone_constraint == 1 && predL < predR) diff --git a/src/gpu/fit_gpu.jl b/src/gpu/fit_gpu.jl index 7dbb2f6f..d26cb3ed 100644 --- a/src/gpu/fit_gpu.jl +++ b/src/gpu/fit_gpu.jl @@ -68,7 +68,7 @@ function init_evotree_gpu( # assign monotone contraints in constraints vector monotone_constraints = zeros(Int32, x_size[2]) - hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints + hasproperty(params, :monotone_constraints) && for (k, v) in params.monotone_constraints monotone_constraints[k] = v end diff --git a/src/predict.jl b/src/predict.jl index f6d3c64e..8f384799 100644 --- a/src/predict.jl +++ b/src/predict.jl @@ -103,7 +103,7 @@ function pred_leaf_cpu!( pred[1, n] = -params.eta * ∑[1] / (∑[2] + params.lambda * ∑[3]) end function pred_scalar_cpu!( - ∑::Vector{T}, + ∑::AbstractVector{T}, params::EvoTypes, K, ) where {L<:GradientRegression,T,S} @@ -123,7 +123,7 @@ function pred_leaf_cpu!( pred[1, n] = -params.eta * ∑[1] / (∑[3] + params.lambda * ∑[5]) pred[2, n] = -params.eta * ∑[2] / (∑[4] + params.lambda * ∑[5]) end -function pred_scalar_cpu!(∑::Vector{T}, params::EvoTypes{L,T,S}, K) where {L<:MLE2P,T,S} +function pred_scalar_cpu!(∑::AbstractVector{T}, params::EvoTypes{L,T,S}, K) where {L<:MLE2P,T,S} -params.eta * ∑[1] / (∑[3] + params.lambda * ∑[5]) end @@ -171,6 +171,6 @@ function pred_leaf_cpu!( ) where {L<:L1Regression,T,S} pred[1, n] = params.eta * ∑[1] / (∑[3] * (1 + params.lambda)) end -function pred_scalar_cpu!(∑::Vector, params::EvoTypes{L,T,S}, K) where {L<:L1Regression,T,S} +function pred_scalar_cpu!(∑::AbstractVector{T}, params::EvoTypes{L,T,S}, K) where {L<:L1Regression,T,S} params.eta * ∑[1] / (∑[3] * (1 + params.lambda)) end \ No newline at end of file diff --git a/test/monotonic.jl b/test/monotonic.jl index 4acfde0d..873cd4f8 100644 --- a/test/monotonic.jl +++ b/test/monotonic.jl @@ -15,7 +15,7 @@ seed = 123 # train-eval split - 𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) + 𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] @@ -28,51 +28,48 @@ ###################################### # benchmark params1 = EvoTreeRegressor( - device = "cpu", - loss = :linear, - metric = :mse, - nrounds = 200, - nbins = 32, - lambda = 1.0, - gamma = 0.0, - eta = 0.05, - max_depth = 6, - min_weight = 0.0, - rowsample = 0.5, - colsample = 1.0, - rng = seed, + device="cpu", + loss=:linear, + nrounds=20, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + rowsample=0.5, + colsample=1.0, + rng=seed, ) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_ref = predict(model, x_train) # monotonic constraint params1 = EvoTreeRegressor( - device = "cpu", - loss = :linear, - metric = :mse, - nrounds = 200, - nbins = 32, - lambda = 1.0, - gamma = 0.0, - eta = 0.5, - max_depth = 6, - min_weight = 0.0, - monotone_constraints = Dict(1 => 1), - rowsample = 0.5, - colsample = 1.0, - rng = seed, + device="cpu", + loss=:linear, + nrounds=20, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.5, + max_depth=6, + min_weight=0.0, + monotone_constraints=Dict(1 => 1), + rowsample=0.5, + colsample=1.0, + rng=seed, ) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_mono = predict(model, x_train) # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") ###################################### @@ -116,51 +113,49 @@ ###################################### # benchmark params1 = EvoTreeRegressor( - device = "cpu", - loss = :logistic, - metric = :logloss, - nrounds = 200, - nbins = 32, - lambda = 0.05, - gamma = 0.0, - eta = 0.05, - max_depth = 6, - min_weight = 0.0, - rowsample = 0.5, - colsample = 1.0, - rng = seed, + device="cpu", + loss=:logistic, + metric=:logloss, + nrounds=200, + nbins=32, + lambda=0.05, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + rowsample=0.5, + colsample=1.0, + rng=seed, ) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_ref = predict(model, x_train) # monotonic constraint params1 = EvoTreeRegressor( - device = "cpu", - loss = :logistic, - metric = :logloss, - nrounds = 200, - nbins = 32, - lambda = 0.05, - gamma = 0.0, - eta = 0.05, - max_depth = 6, - min_weight = 0.0, - monotone_constraints = Dict(1 => 1), - rowsample = 0.5, - colsample = 1.0, - rng = seed, + device="cpu", + loss=:logistic, + metric=:logloss, + nrounds=200, + nbins=32, + lambda=0.05, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + monotone_constraints=Dict(1 => 1), + rowsample=0.5, + colsample=1.0, + rng=seed, ) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_mono = predict(model, x_train) - # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") ###################################### @@ -202,57 +197,55 @@ ###################################### ### Gaussian - CPU ###################################### - # linear - benchmark + # benchmark params1 = EvoTreeGaussian( - device = "cpu", - metric = :gaussian, - nrounds = 200, - nbins = 32, - lambda = 1.0, - gamma = 0.0, - eta = 0.05, - max_depth = 6, - min_weight = 0.0, - rowsample = 0.5, - colsample = 1.0, - rng = seed, + device="cpu", + metric=:gaussian, + nrounds=200, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.05, + max_depth=6, + min_weight=0.0, + rowsample=0.5, + colsample=1.0, + rng=seed, ) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_ref = predict(model, x_train) # monotonic constraint params1 = EvoTreeGaussian( - device = "cpu", - metric = :gaussian, - nrounds = 200, - nbins = 32, - lambda = 1.0, - gamma = 0.0, - eta = 0.5, - max_depth = 6, - min_weight = 0.0, - monotone_constraints = Dict(1 => 1), - rowsample = 0.5, - colsample = 1.0, - rng = seed, + device="cpu", + metric=:gaussian, + nrounds=200, + nbins=32, + lambda=1.0, + gamma=0.0, + eta=0.5, + max_depth=6, + min_weight=0.0, + monotone_constraints=Dict(1 => 1), + rowsample=0.5, + colsample=1.0, + rng=seed, ) - model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25) + model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) preds_mono = predict(model, x_train) - # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") ###################################### ### Gaussian - GPU ###################################### - # linear - benchmark + # benchmark # params1 = EvoTreeGaussian( # device="gpu", # metric=:gaussian, @@ -261,8 +254,8 @@ # max_depth=6, min_weight=0.0, # rowsample=0.5, colsample=1.0, rng=seed) - # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); - # preds_ref = predict(model, x_train); + # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + # preds_ref = predict(model, x_train) # # monotonic constraint # params1 = EvoTreeGaussian( @@ -274,14 +267,12 @@ # monotone_constraints=Dict(1 => 1), # rowsample=0.5, colsample=1.0, rng=seed) - # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25); - # preds_mono = predict(model, x_train); + # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25) + # preds_mono = predict(model, x_train) - # using Plots - # using Colors - # x_perm = sortperm(X_train[:, 1]) - # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="") - # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") - # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") + # x_perm = sortperm(x_train[:, 1]) + # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="GPU Gauss") + # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference") + # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic") end \ No newline at end of file