Skip to content

Commit

Permalink
Merge pull request #134 from Evovest/mlj-gpu
Browse files Browse the repository at this point in the history
Fix support for GPU with MLJ
  • Loading branch information
jeremiedb authored Feb 1, 2022
2 parents 776da92 + 8fe2238 commit 92747ff
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 75 deletions.
19 changes: 2 additions & 17 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.4' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
- '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
- 'nightly'
os:
Expand Down Expand Up @@ -50,19 +50,4 @@ jobs:
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
file: lcov.info
docs:
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: '1'
- name: Install dependencies
run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key
run: julia --project=docs/ docs/make.jl
file: lcov.info
23 changes: 23 additions & 0 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Documentation
on:
push:
branches:
- main
tags: '*'
pull_request:

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@latest
with:
version: '1.6'
- name: Install dependencies
run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key
run: julia --project=docs/ docs/make.jl
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "EvoTrees"
uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
authors = ["jeremiedb <[email protected]>"]
version = "0.9.1"
version = "0.9.2"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand Down
15 changes: 8 additions & 7 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
using Documenter
using EvoTrees

push!(LOAD_PATH,"../src/")
push!(LOAD_PATH, "../src/")

pages = ["Home" => "index.md",
pages = [
"Home" => "index.md",
"Examples" => "examples.md"]

makedocs(
sitename="EvoTrees.jl",
sitename = "EvoTrees.jl",
authors = "Jeremie Desgagne-Bouchard and contributors.",
format=Documenter.HTML(),
format = Documenter.HTML(),
pages = pages,
modules = [EvoTrees],)

deploydocs(repo="github.com/Evovest/EvoTrees.jl.git",
target="build",
deploydocs(repo = "github.com/Evovest/EvoTrees.jl.git",
target = "build",
devbranch = "main",
push_preview=false)
push_preview = false)
6 changes: 3 additions & 3 deletions src/EvoTrees.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ include("eval.jl")
include("predict.jl")
include("find_split.jl")
include("fit.jl")
include("importance.jl")
include("plot.jl")
include("MLJ.jl")

include("gpu/structs_gpu.jl")
include("gpu/loss_gpu.jl")
Expand All @@ -38,6 +35,9 @@ include("gpu/predict_gpu.jl")
include("gpu/find_split_gpu.jl")
include("gpu/fit_gpu.jl")

include("importance.jl")
include("plot.jl")
include("MLJ.jl")

function convert(::Type{GBTree}, m::GBTreeGPU)
EvoTrees.GBTree([EvoTrees.Tree(Array(tree.feat),
Expand Down
8 changes: 7 additions & 1 deletion src/MLJ.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
function MLJModelInterface.fit(model::EvoTypes, verbosity::Int, A, y)
fitresult, cache = init_evotree(model, A.matrix, y)

if model.device == "gpu"
fitresult, cache = init_evotree_gpu(model, A.matrix, y)
else
fitresult, cache = init_evotree(model, A.matrix, y)
end
grow_evotree!(fitresult, cache)
report = (feature_importances = importance(fitresult, A.names),)
return fitresult, cache, report
Expand All @@ -16,6 +21,7 @@ function okay_to_continue(new, old)
new.colsample == old.colsample &&
new.nbins == old.nbins &&
new.α == old.α &&
new.device == old.device &&
new.metric == old.metric
end

Expand Down
5 changes: 3 additions & 2 deletions src/importance.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# end
# end

function importance!(gain::AbstractVector, tree::Tree)
function importance!(gain::AbstractVector, tree::Union{Tree,TreeGPU})
@inbounds for n in eachindex(tree.split)
if tree.split[n]
gain[tree.feat[n]] += tree.gain[n]
Expand All @@ -20,7 +20,7 @@ end
Sorted normalized feature importance based on loss function gain.
"""
function importance(model::GBTree, vars::AbstractVector)
function importance(model::Union{GBTree,GBTreeGPU}, vars::AbstractVector)
gain = zeros(length(vars))

# Loop importance over all trees and sort results.
Expand All @@ -34,3 +34,4 @@ function importance(model::GBTree, vars::AbstractVector)

return pairs
end

109 changes: 65 additions & 44 deletions test/MLJ.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,54 +15,75 @@ X = MLJBase.table(X)

# @load EvoTreeRegressor
# linear regression
tree_model = EvoTreeRegressor(max_depth=5, η=0.05, nrounds=10)
tree_model = EvoTreeRegressor(max_depth = 5, η = 0.05, nrounds = 10)
# logistic regression
tree_model = EvoTreeRegressor(loss=:logistic, max_depth=5, η=0.05, nrounds=10)
tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, η = 0.05, nrounds = 10)
# quantile regression
# tree_model = EvoTreeRegressor(loss=:quantile, α=0.75, max_depth=5, η=0.05, nrounds=10)

mach = machine(tree_model, X, y)
train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
fit!(mach, rows=train, verbosity=1)
train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
fit!(mach, rows = train, verbosity = 1)

mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)
fit!(mach, rows = train, verbosity = 1)

# predict on train data
pred_train = predict(mach, selectrows(X,train))
mean(abs.(pred_train - selectrows(Y,train)))
pred_train = predict(mach, selectrows(X, train))
mean(abs.(pred_train - selectrows(Y, train)))

# predict on test data
pred_test = predict(mach, selectrows(X,test))
mean(abs.(pred_test - selectrows(Y,test)))
pred_test = predict(mach, selectrows(X, test))
mean(abs.(pred_test - selectrows(Y, test)))

@test MLJBase.iteration_parameter(EvoTreeRegressor) == :nrounds

##################################################
### Regression - GPU
##################################################
# tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, η = 0.05, nrounds = 10, device = "gpu")
# mach = machine(tree_model, X, y)
# train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
# fit!(mach, rows = train, verbosity = 1)

# mach.model.nrounds += 10
# fit!(mach, rows = train, verbosity = 1)

# # predict on train data
# pred_train = predict(mach, selectrows(X, train))
# mean(abs.(pred_train - selectrows(Y, train)))

# # predict on test data
# pred_test = predict(mach, selectrows(X, test))
# mean(abs.(pred_test - selectrows(Y, test)))

# @test MLJBase.iteration_parameter(EvoTreeRegressor) == :nrounds

##################################################
### classif - categorical target
##################################################
X, y = @load_crabs

tree_model = EvoTreeClassifier(max_depth=4, η=0.05, λ=0.0, γ=0.0, nrounds=10)
tree_model = EvoTreeClassifier(max_depth = 4, η = 0.05, λ = 0.0, γ = 0.0, nrounds = 10)

# @load EvoTreeRegressor
mach = machine(tree_model, X, y)
train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
fit!(mach, rows=train, verbosity=1)
train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
fit!(mach, rows = train, verbosity = 1)

mach.model.nrounds += 50
fit!(mach, rows=train, verbosity=1)
fit!(mach, rows = train, verbosity = 1)

pred_train = predict(mach, selectrows(X,train))
pred_train_mode = predict_mode(mach, selectrows(X,train))
pred_train = predict(mach, selectrows(X, train))
pred_train_mode = predict_mode(mach, selectrows(X, train))
cross_entropy(pred_train, selectrows(y, train)) |> mean
sum(pred_train_mode .== y[train]) / length(y[train])

pred_test = predict(mach, selectrows(X,test))
pred_test_mode = predict_mode(mach, selectrows(X,test))
pred_test = predict(mach, selectrows(X, test))
pred_test_mode = predict_mode(mach, selectrows(X, test))
cross_entropy(pred_test, selectrows(y, test)) |> mean
sum(pred_test_mode .== y[test]) / length(y[test])
pred_test_mode = predict_mode(mach, selectrows(X,test))
pred_test_mode = predict_mode(mach, selectrows(X, test))

##################################################
### count
Expand All @@ -71,39 +92,39 @@ features = rand(10_000, 10)
# features = rand(100, 10)
X = features
Y = rand(UInt8, size(X, 1))
𝑖 = collect(1:size(X,1))
𝑖 = collect(1:size(X, 1))

# train-eval split
𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
train_size = 0.8
𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]

X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]

# @load EvoTreeRegressor
tree_model = EvoTreeCount(
loss=:poisson, metric=:poisson,
nrounds=10,
λ = 0.0, γ=0.0, η=0.1,
loss = :poisson, metric = :poisson,
nrounds = 10,
λ = 0.0, γ = 0.0, η = 0.1,
max_depth = 6, min_weight = 1.0,
rowsample=0.5, colsample=0.5, nbins=32)
rowsample = 0.5, colsample = 0.5, nbins = 32)

X = MLJBase.table(X)
X = MLJBase.matrix(X)

# typeof(X)
mach = machine(tree_model, X, Y)
train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
fit!(mach, rows=train, verbosity=1, force=true)
train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
fit!(mach, rows = train, verbosity = 1, force = true)

mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)
fit!(mach, rows = train, verbosity = 1)

pred = predict(mach, selectrows(X,train))
pred_mean = predict_mean(mach, selectrows(X,train))
pred_mode = predict_mode(mach, selectrows(X,train))
pred = predict(mach, selectrows(X, train))
pred_mean = predict_mean(mach, selectrows(X, train))
pred_mode = predict_mode(mach, selectrows(X, train))
# pred_mode = predict_median(mach, selectrows(X,train))

##################################################
Expand All @@ -112,40 +133,40 @@ pred_mode = predict_mode(mach, selectrows(X,train))
features = rand(10_000, 10)
X = features
Y = rand(size(X, 1))
𝑖 = collect(1:size(X,1))
𝑖 = collect(1:size(X, 1))

# train-eval split
𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
train_size = 0.8
𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]

X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]

# @load EvoTreeRegressor
tree_model = EvoTreeGaussian(
loss=:gaussian, metric=:gaussian,
nrounds=10,
λ = 0.0, γ=0.0, η=0.1,
loss = :gaussian, metric = :gaussian,
nrounds = 10,
λ = 0.0, γ = 0.0, η = 0.1,
max_depth = 6, min_weight = 1.0,
rowsample=0.5, colsample=0.5, nbins=32)
rowsample = 0.5, colsample = 0.5, nbins = 32)

X = MLJBase.table(X)

# typeof(X)
mach = machine(tree_model, X, Y)
train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
fit!(mach, rows=train, verbosity=1, force=true)
train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
fit!(mach, rows = train, verbosity = 1, force = true)

mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)
fit!(mach, rows = train, verbosity = 1)

pred = predict(mach, selectrows(X,train))
pred_mean = predict_mean(mach, selectrows(X,train))
pred_mode = predict_mode(mach, selectrows(X,train))
pred = predict(mach, selectrows(X, train))
pred_mean = predict_mean(mach, selectrows(X, train))
pred_mode = predict_mode(mach, selectrows(X, train))
# pred_mode = predict_median(mach, selectrows(X,train))
mean(abs.(pred_mean - selectrows(Y,train)))
mean(abs.(pred_mean - selectrows(Y, train)))

q_20 = quantile.(pred, 0.20)
q_20 = quantile.(pred, 0.80)
Expand All @@ -159,7 +180,7 @@ report(mach)
# tests that `update` handles data correctly in the case of a cold
# restatrt:

X = MLJBase.table(rand(5,2))
X = MLJBase.table(rand(5, 2))
y = rand(5)
model = EvoTreeRegressor()
data = MLJBase.reformat(model, X, y);
Expand Down

2 comments on commit 92747ff

@jeremiedb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/53591

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.9.2 -m "<description of version>" 92747ff703dd0bd5e2d656a4e6c63c46088c9b55
git push origin v0.9.2

Please sign in to comment.