Merge pull request #134 from Evovest/mlj-gpu

Fix support for GPU with MLJ
Evovest · Feb 1, 2022 · 92747ff · 92747ff · jeremiedb · Feb 1, 2022
2 parents 776da92 + 8fe2238
commit 92747ff
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 75 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.4' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
+          - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
           - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
           - 'nightly'
         os:
@@ -50,19 +50,4 @@ jobs:
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v1
         with:
-          file: lcov.info
-  docs:
-    name: Documentation
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: '1'
-      - name: Install dependencies
-        run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
-      - name: Build and deploy
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key
-        run: julia --project=docs/ docs/make.jl
+          file: lcov.info
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -0,0 +1,23 @@
+name: Documentation
+on:
+  push:
+    branches:
+      - main
+    tags: '*'
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: '1.6'
+      - name: Install dependencies
+        run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+      - name: Build and deploy
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key
+        run: julia --project=docs/ docs/make.jl
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "EvoTrees"
 uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 authors = ["jeremiedb <[email protected]>"]
-version = "0.9.1"
+version = "0.9.2"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"

diff --git a/docs/make.jl b/docs/make.jl
@@ -1,19 +1,20 @@
 using Documenter
 using EvoTrees
 
-push!(LOAD_PATH,"../src/")
+push!(LOAD_PATH, "../src/")
 
-pages = ["Home" => "index.md",
+pages = [
+    "Home" => "index.md",
     "Examples" => "examples.md"]
 
 makedocs(
-    sitename="EvoTrees.jl",
+    sitename = "EvoTrees.jl",
     authors = "Jeremie Desgagne-Bouchard and contributors.",
-    format=Documenter.HTML(),
+    format = Documenter.HTML(),
     pages = pages,
     modules = [EvoTrees],)
 
-deploydocs(repo="github.com/Evovest/EvoTrees.jl.git", 
-    target="build",
+deploydocs(repo = "github.com/Evovest/EvoTrees.jl.git",
+    target = "build",
     devbranch = "main",
-    push_preview=false)
+    push_preview = false)
diff --git a/src/EvoTrees.jl b/src/EvoTrees.jl
@@ -27,9 +27,6 @@ include("eval.jl")
 include("predict.jl")
 include("find_split.jl")
 include("fit.jl")
-include("importance.jl")
-include("plot.jl")
-include("MLJ.jl")
 
 include("gpu/structs_gpu.jl")
 include("gpu/loss_gpu.jl")
@@ -38,6 +35,9 @@ include("gpu/predict_gpu.jl")
 include("gpu/find_split_gpu.jl")
 include("gpu/fit_gpu.jl")
 
+include("importance.jl")
+include("plot.jl")
+include("MLJ.jl")
 
 function convert(::Type{GBTree}, m::GBTreeGPU)
     EvoTrees.GBTree([EvoTrees.Tree(Array(tree.feat),

diff --git a/src/MLJ.jl b/src/MLJ.jl
@@ -1,5 +1,10 @@
 function MLJModelInterface.fit(model::EvoTypes, verbosity::Int, A, y)
-    fitresult, cache = init_evotree(model, A.matrix, y)
+
+    if model.device == "gpu"
+        fitresult, cache = init_evotree_gpu(model, A.matrix, y)
+    else
+        fitresult, cache = init_evotree(model, A.matrix, y)
+    end
     grow_evotree!(fitresult, cache)
     report = (feature_importances = importance(fitresult, A.names),)
     return fitresult, cache, report
@@ -16,6 +21,7 @@ function okay_to_continue(new, old)
         new.colsample == old.colsample &&
         new.nbins == old.nbins &&
         new.α == old.α &&
+        new.device == old.device &&
         new.metric == old.metric
 end
 

diff --git a/src/importance.jl b/src/importance.jl
@@ -7,7 +7,7 @@
 #     end
 # end
 
-function importance!(gain::AbstractVector, tree::Tree)
+function importance!(gain::AbstractVector, tree::Union{Tree,TreeGPU})
     @inbounds for n in eachindex(tree.split)
         if tree.split[n]
             gain[tree.feat[n]] += tree.gain[n]
@@ -20,7 +20,7 @@ end
 
 Sorted normalized feature importance based on loss function gain.
 """
-function importance(model::GBTree, vars::AbstractVector)
+function importance(model::Union{GBTree,GBTreeGPU}, vars::AbstractVector)
     gain = zeros(length(vars))
 
     # Loop importance over all trees and sort results.
@@ -34,3 +34,4 @@ function importance(model::GBTree, vars::AbstractVector)
 
     return pairs
 end
+
diff --git a/test/MLJ.jl b/test/MLJ.jl
@@ -15,54 +15,75 @@ X = MLJBase.table(X)
 
 # @load EvoTreeRegressor
 # linear regression
-tree_model = EvoTreeRegressor(max_depth=5, η=0.05, nrounds=10)
+tree_model = EvoTreeRegressor(max_depth = 5, η = 0.05, nrounds = 10)
 # logistic regression
-tree_model = EvoTreeRegressor(loss=:logistic, max_depth=5, η=0.05, nrounds=10)
+tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, η = 0.05, nrounds = 10)
 # quantile regression
 # tree_model = EvoTreeRegressor(loss=:quantile, α=0.75, max_depth=5, η=0.05, nrounds=10)
 
 mach = machine(tree_model, X, y)
-train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1)
+train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1)
 
 mach.model.nrounds += 10
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
 # predict on train data
-pred_train = predict(mach, selectrows(X,train))
-mean(abs.(pred_train - selectrows(Y,train)))
+pred_train = predict(mach, selectrows(X, train))
+mean(abs.(pred_train - selectrows(Y, train)))
 
 # predict on test data
-pred_test = predict(mach, selectrows(X,test))
-mean(abs.(pred_test - selectrows(Y,test)))
+pred_test = predict(mach, selectrows(X, test))
+mean(abs.(pred_test - selectrows(Y, test)))
 
 @test MLJBase.iteration_parameter(EvoTreeRegressor) == :nrounds
 
+##################################################
+### Regression - GPU
+##################################################
+# tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, η = 0.05, nrounds = 10, device = "gpu")
+# mach = machine(tree_model, X, y)
+# train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
+# fit!(mach, rows = train, verbosity = 1)
+
+# mach.model.nrounds += 10
+# fit!(mach, rows = train, verbosity = 1)
+
+# # predict on train data
+# pred_train = predict(mach, selectrows(X, train))
+# mean(abs.(pred_train - selectrows(Y, train)))
+
+# # predict on test data
+# pred_test = predict(mach, selectrows(X, test))
+# mean(abs.(pred_test - selectrows(Y, test)))
+
+# @test MLJBase.iteration_parameter(EvoTreeRegressor) == :nrounds
+
 ##################################################
 ### classif - categorical target
 ##################################################
 X, y = @load_crabs
 
-tree_model = EvoTreeClassifier(max_depth=4, η=0.05, λ=0.0, γ=0.0, nrounds=10)
+tree_model = EvoTreeClassifier(max_depth = 4, η = 0.05, λ = 0.0, γ = 0.0, nrounds = 10)
 
 # @load EvoTreeRegressor
 mach = machine(tree_model, X, y)
-train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1)
+train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1)
 
 mach.model.nrounds += 50
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
-pred_train = predict(mach, selectrows(X,train))
-pred_train_mode = predict_mode(mach, selectrows(X,train))
+pred_train = predict(mach, selectrows(X, train))
+pred_train_mode = predict_mode(mach, selectrows(X, train))
 cross_entropy(pred_train, selectrows(y, train)) |> mean
 sum(pred_train_mode .== y[train]) / length(y[train])
 
-pred_test = predict(mach, selectrows(X,test))
-pred_test_mode = predict_mode(mach, selectrows(X,test))
+pred_test = predict(mach, selectrows(X, test))
+pred_test_mode = predict_mode(mach, selectrows(X, test))
 cross_entropy(pred_test, selectrows(y, test)) |> mean
 sum(pred_test_mode .== y[test]) / length(y[test])
-pred_test_mode = predict_mode(mach, selectrows(X,test))
+pred_test_mode = predict_mode(mach, selectrows(X, test))
 
 ##################################################
 ### count
@@ -71,39 +92,39 @@ features = rand(10_000, 10)
 # features = rand(100, 10)
 X = features
 Y = rand(UInt8, size(X, 1))
-𝑖 = collect(1:size(X,1))
+𝑖 = collect(1:size(X, 1))
 
 # train-eval split
 𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
-𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
 
 X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
 Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 
 # @load EvoTreeRegressor
 tree_model = EvoTreeCount(
-    loss=:poisson, metric=:poisson,
-    nrounds=10,
-    λ = 0.0, γ=0.0, η=0.1,
+    loss = :poisson, metric = :poisson,
+    nrounds = 10,
+    λ = 0.0, γ = 0.0, η = 0.1,
     max_depth = 6, min_weight = 1.0,
-    rowsample=0.5, colsample=0.5, nbins=32)
+    rowsample = 0.5, colsample = 0.5, nbins = 32)
 
 X = MLJBase.table(X)
 X = MLJBase.matrix(X)
 
 # typeof(X)
 mach = machine(tree_model, X, Y)
-train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1, force=true)
+train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1, force = true)
 
 mach.model.nrounds += 10
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
-pred = predict(mach, selectrows(X,train))
-pred_mean = predict_mean(mach, selectrows(X,train))
-pred_mode = predict_mode(mach, selectrows(X,train))
+pred = predict(mach, selectrows(X, train))
+pred_mean = predict_mean(mach, selectrows(X, train))
+pred_mode = predict_mode(mach, selectrows(X, train))
 # pred_mode = predict_median(mach, selectrows(X,train))
 
 ##################################################
@@ -112,40 +133,40 @@ pred_mode = predict_mode(mach, selectrows(X,train))
 features = rand(10_000, 10)
 X = features
 Y = rand(size(X, 1))
-𝑖 = collect(1:size(X,1))
+𝑖 = collect(1:size(X, 1))
 
 # train-eval split
 𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
-𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
 
 X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
 Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 
 # @load EvoTreeRegressor
 tree_model = EvoTreeGaussian(
-    loss=:gaussian, metric=:gaussian,
-    nrounds=10,
-    λ = 0.0, γ=0.0, η=0.1,
+    loss = :gaussian, metric = :gaussian,
+    nrounds = 10,
+    λ = 0.0, γ = 0.0, η = 0.1,
     max_depth = 6, min_weight = 1.0,
-    rowsample=0.5, colsample=0.5, nbins=32)
+    rowsample = 0.5, colsample = 0.5, nbins = 32)
 
 X = MLJBase.table(X)
 
 # typeof(X)
 mach = machine(tree_model, X, Y)
-train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1, force=true)
+train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1, force = true)
 
 mach.model.nrounds += 10
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
-pred = predict(mach, selectrows(X,train))
-pred_mean = predict_mean(mach, selectrows(X,train))
-pred_mode = predict_mode(mach, selectrows(X,train))
+pred = predict(mach, selectrows(X, train))
+pred_mean = predict_mean(mach, selectrows(X, train))
+pred_mode = predict_mode(mach, selectrows(X, train))
 # pred_mode = predict_median(mach, selectrows(X,train))
-mean(abs.(pred_mean - selectrows(Y,train)))
+mean(abs.(pred_mean - selectrows(Y, train)))
 
 q_20 = quantile.(pred, 0.20)
 q_20 = quantile.(pred, 0.80)
@@ -159,7 +180,7 @@ report(mach)
 # tests that `update` handles data correctly in the case of a cold
 # restatrt:
 
-X = MLJBase.table(rand(5,2))
+X = MLJBase.table(rand(5, 2))
 y = rand(5)
 model = EvoTreeRegressor()
 data = MLJBase.reformat(model, X, y);