diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 344a5482..1a80ca79 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: version: - - '1.4' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. + - '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. - 'nightly' os: @@ -50,19 +50,4 @@ jobs: - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: - file: lcov.info - docs: - name: Documentation - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: '1' - - name: Install dependencies - run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - - name: Build and deploy - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key - run: julia --project=docs/ docs/make.jl \ No newline at end of file + file: lcov.info \ No newline at end of file diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..79a23d82 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,23 @@ +name: Documentation +on: + push: + branches: + - main + tags: '*' + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: '1.6' + - name: Install dependencies + run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and deploy + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key + run: julia --project=docs/ docs/make.jl \ No newline at end of file diff --git a/Project.toml b/Project.toml index 9fd4cfea..0d424f94 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "EvoTrees" uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" authors = ["jeremiedb "] -version = "0.9.1" +version = "0.9.2" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" diff --git a/docs/make.jl b/docs/make.jl index 71b61edd..892fd6fd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,19 +1,20 @@ using Documenter using EvoTrees -push!(LOAD_PATH,"../src/") +push!(LOAD_PATH, "../src/") -pages = ["Home" => "index.md", +pages = [ + "Home" => "index.md", "Examples" => "examples.md"] makedocs( - sitename="EvoTrees.jl", + sitename = "EvoTrees.jl", authors = "Jeremie Desgagne-Bouchard and contributors.", - format=Documenter.HTML(), + format = Documenter.HTML(), pages = pages, modules = [EvoTrees],) -deploydocs(repo="github.com/Evovest/EvoTrees.jl.git", - target="build", +deploydocs(repo = "github.com/Evovest/EvoTrees.jl.git", + target = "build", devbranch = "main", - push_preview=false) \ No newline at end of file + push_preview = false) \ No newline at end of file diff --git a/src/EvoTrees.jl b/src/EvoTrees.jl index d4f90af6..a493988b 100644 --- a/src/EvoTrees.jl +++ b/src/EvoTrees.jl @@ -27,9 +27,6 @@ include("eval.jl") include("predict.jl") include("find_split.jl") include("fit.jl") -include("importance.jl") -include("plot.jl") -include("MLJ.jl") include("gpu/structs_gpu.jl") include("gpu/loss_gpu.jl") @@ -38,6 +35,9 @@ include("gpu/predict_gpu.jl") include("gpu/find_split_gpu.jl") include("gpu/fit_gpu.jl") +include("importance.jl") +include("plot.jl") +include("MLJ.jl") function convert(::Type{GBTree}, m::GBTreeGPU) EvoTrees.GBTree([EvoTrees.Tree(Array(tree.feat), diff --git a/src/MLJ.jl b/src/MLJ.jl index 4bc2d8dd..d161a073 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -1,5 +1,10 @@ function MLJModelInterface.fit(model::EvoTypes, verbosity::Int, A, y) - fitresult, cache = init_evotree(model, A.matrix, y) + + if model.device == "gpu" + fitresult, cache = init_evotree_gpu(model, A.matrix, y) + else + fitresult, cache = init_evotree(model, A.matrix, y) + end grow_evotree!(fitresult, cache) report = (feature_importances = importance(fitresult, A.names),) return fitresult, cache, report @@ -16,6 +21,7 @@ function okay_to_continue(new, old) new.colsample == old.colsample && new.nbins == old.nbins && new.α == old.α && + new.device == old.device && new.metric == old.metric end diff --git a/src/importance.jl b/src/importance.jl index 23b3ea09..fa22ef15 100644 --- a/src/importance.jl +++ b/src/importance.jl @@ -7,7 +7,7 @@ # end # end -function importance!(gain::AbstractVector, tree::Tree) +function importance!(gain::AbstractVector, tree::Union{Tree,TreeGPU}) @inbounds for n in eachindex(tree.split) if tree.split[n] gain[tree.feat[n]] += tree.gain[n] @@ -20,7 +20,7 @@ end Sorted normalized feature importance based on loss function gain. """ -function importance(model::GBTree, vars::AbstractVector) +function importance(model::Union{GBTree,GBTreeGPU}, vars::AbstractVector) gain = zeros(length(vars)) # Loop importance over all trees and sort results. @@ -34,3 +34,4 @@ function importance(model::GBTree, vars::AbstractVector) return pairs end + diff --git a/test/MLJ.jl b/test/MLJ.jl index f3899497..ffaa2c85 100644 --- a/test/MLJ.jl +++ b/test/MLJ.jl @@ -15,54 +15,75 @@ X = MLJBase.table(X) # @load EvoTreeRegressor # linear regression -tree_model = EvoTreeRegressor(max_depth=5, η=0.05, nrounds=10) +tree_model = EvoTreeRegressor(max_depth = 5, η = 0.05, nrounds = 10) # logistic regression -tree_model = EvoTreeRegressor(loss=:logistic, max_depth=5, η=0.05, nrounds=10) +tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, η = 0.05, nrounds = 10) # quantile regression # tree_model = EvoTreeRegressor(loss=:quantile, α=0.75, max_depth=5, η=0.05, nrounds=10) mach = machine(tree_model, X, y) -train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1) +train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1) mach.model.nrounds += 10 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) # predict on train data -pred_train = predict(mach, selectrows(X,train)) -mean(abs.(pred_train - selectrows(Y,train))) +pred_train = predict(mach, selectrows(X, train)) +mean(abs.(pred_train - selectrows(Y, train))) # predict on test data -pred_test = predict(mach, selectrows(X,test)) -mean(abs.(pred_test - selectrows(Y,test))) +pred_test = predict(mach, selectrows(X, test)) +mean(abs.(pred_test - selectrows(Y, test))) @test MLJBase.iteration_parameter(EvoTreeRegressor) == :nrounds +################################################## +### Regression - GPU +################################################## +# tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, η = 0.05, nrounds = 10, device = "gpu") +# mach = machine(tree_model, X, y) +# train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split +# fit!(mach, rows = train, verbosity = 1) + +# mach.model.nrounds += 10 +# fit!(mach, rows = train, verbosity = 1) + +# # predict on train data +# pred_train = predict(mach, selectrows(X, train)) +# mean(abs.(pred_train - selectrows(Y, train))) + +# # predict on test data +# pred_test = predict(mach, selectrows(X, test)) +# mean(abs.(pred_test - selectrows(Y, test))) + +# @test MLJBase.iteration_parameter(EvoTreeRegressor) == :nrounds + ################################################## ### classif - categorical target ################################################## X, y = @load_crabs -tree_model = EvoTreeClassifier(max_depth=4, η=0.05, λ=0.0, γ=0.0, nrounds=10) +tree_model = EvoTreeClassifier(max_depth = 4, η = 0.05, λ = 0.0, γ = 0.0, nrounds = 10) # @load EvoTreeRegressor mach = machine(tree_model, X, y) -train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1) +train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1) mach.model.nrounds += 50 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) -pred_train = predict(mach, selectrows(X,train)) -pred_train_mode = predict_mode(mach, selectrows(X,train)) +pred_train = predict(mach, selectrows(X, train)) +pred_train_mode = predict_mode(mach, selectrows(X, train)) cross_entropy(pred_train, selectrows(y, train)) |> mean sum(pred_train_mode .== y[train]) / length(y[train]) -pred_test = predict(mach, selectrows(X,test)) -pred_test_mode = predict_mode(mach, selectrows(X,test)) +pred_test = predict(mach, selectrows(X, test)) +pred_test_mode = predict_mode(mach, selectrows(X, test)) cross_entropy(pred_test, selectrows(y, test)) |> mean sum(pred_test_mode .== y[test]) / length(y[test]) -pred_test_mode = predict_mode(mach, selectrows(X,test)) +pred_test_mode = predict_mode(mach, selectrows(X, test)) ################################################## ### count @@ -71,39 +92,39 @@ features = rand(10_000, 10) # features = rand(100, 10) X = features Y = rand(UInt8, size(X, 1)) -𝑖 = collect(1:size(X,1)) +𝑖 = collect(1:size(X, 1)) # train-eval split 𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] -𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end] +𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :] Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval] # @load EvoTreeRegressor tree_model = EvoTreeCount( - loss=:poisson, metric=:poisson, - nrounds=10, - λ = 0.0, γ=0.0, η=0.1, + loss = :poisson, metric = :poisson, + nrounds = 10, + λ = 0.0, γ = 0.0, η = 0.1, max_depth = 6, min_weight = 1.0, - rowsample=0.5, colsample=0.5, nbins=32) + rowsample = 0.5, colsample = 0.5, nbins = 32) X = MLJBase.table(X) X = MLJBase.matrix(X) # typeof(X) mach = machine(tree_model, X, Y) -train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1, force=true) +train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1, force = true) mach.model.nrounds += 10 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) -pred = predict(mach, selectrows(X,train)) -pred_mean = predict_mean(mach, selectrows(X,train)) -pred_mode = predict_mode(mach, selectrows(X,train)) +pred = predict(mach, selectrows(X, train)) +pred_mean = predict_mean(mach, selectrows(X, train)) +pred_mode = predict_mode(mach, selectrows(X, train)) # pred_mode = predict_median(mach, selectrows(X,train)) ################################################## @@ -112,40 +133,40 @@ pred_mode = predict_mode(mach, selectrows(X,train)) features = rand(10_000, 10) X = features Y = rand(size(X, 1)) -𝑖 = collect(1:size(X,1)) +𝑖 = collect(1:size(X, 1)) # train-eval split 𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false) train_size = 0.8 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))] -𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end] +𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end] X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :] Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval] # @load EvoTreeRegressor tree_model = EvoTreeGaussian( - loss=:gaussian, metric=:gaussian, - nrounds=10, - λ = 0.0, γ=0.0, η=0.1, + loss = :gaussian, metric = :gaussian, + nrounds = 10, + λ = 0.0, γ = 0.0, η = 0.1, max_depth = 6, min_weight = 1.0, - rowsample=0.5, colsample=0.5, nbins=32) + rowsample = 0.5, colsample = 0.5, nbins = 32) X = MLJBase.table(X) # typeof(X) mach = machine(tree_model, X, Y) -train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split -fit!(mach, rows=train, verbosity=1, force=true) +train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split +fit!(mach, rows = train, verbosity = 1, force = true) mach.model.nrounds += 10 -fit!(mach, rows=train, verbosity=1) +fit!(mach, rows = train, verbosity = 1) -pred = predict(mach, selectrows(X,train)) -pred_mean = predict_mean(mach, selectrows(X,train)) -pred_mode = predict_mode(mach, selectrows(X,train)) +pred = predict(mach, selectrows(X, train)) +pred_mean = predict_mean(mach, selectrows(X, train)) +pred_mode = predict_mode(mach, selectrows(X, train)) # pred_mode = predict_median(mach, selectrows(X,train)) -mean(abs.(pred_mean - selectrows(Y,train))) +mean(abs.(pred_mean - selectrows(Y, train))) q_20 = quantile.(pred, 0.20) q_20 = quantile.(pred, 0.80) @@ -159,7 +180,7 @@ report(mach) # tests that `update` handles data correctly in the case of a cold # restatrt: -X = MLJBase.table(rand(5,2)) +X = MLJBase.table(rand(5, 2)) y = rand(5) model = EvoTreeRegressor() data = MLJBase.reformat(model, X, y);