Skip to content

Commit

Permalink
Merge pull request #40 from Evovest/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jeremiedb authored Feb 17, 2020
2 parents fa52460 + 43c6482 commit 49267db
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 63 deletions.
22 changes: 0 additions & 22 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ git-tree-sha1 = "b7720de347734f4716d1815b00ce5664ed6bbfd4"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.9"

[[DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
version = "1.0.0"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down Expand Up @@ -81,11 +76,6 @@ uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[IteratorInterfaceExtensions]]
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
uuid = "82899510-4779-5014-852e-03e436cf321d"
version = "1.0.0"

[[JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
Expand Down Expand Up @@ -248,18 +238,6 @@ version = "0.9.4"
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"

[[TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
version = "1.0.0"

[[Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
git-tree-sha1 = "aaed7b3b00248ff6a794375ad6adf30f30ca5591"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "0.2.11"

[[Test]]
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
7 changes: 1 addition & 6 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,16 @@ CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
CategoricalArrays = "0.7"
Distributions = "0.21, 0.22"
Distributions = "0.22"
MLJModelInterface = "0.1"
SortingAlgorithms = "0.3"
StaticArrays = "0.12"
StatsBase = "0.32"
Tables = "0.2"
julia = "1"

[extras]
Expand Down
3 changes: 2 additions & 1 deletion src/EvoTrees.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ using Random: seed!
using StaticArrays
using Distributions
using CategoricalArrays
import MLJModelInterface: predict
import MLJModelInterface
import MLJModelInterface: fit, update
import MLJModelInterface: predict

include("models.jl")
include("structs.jl")
Expand Down
67 changes: 33 additions & 34 deletions test/MLJ.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ tree_model = EvoTreeRegressor(loss=:logistic, max_depth=5, η=0.05, nrounds=10)
# quantile regression
tree_model = EvoTreeRegressor(loss=:quantile, α=0.75, max_depth=5, η=0.05, nrounds=10)

tree = machine(tree_model, X, y)
mach = machine(tree_model, X, y)
train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
fit!(tree, rows=train, verbosity=1)
fit!(mach, rows=train, verbosity=1)

tree.model.nrounds += 10
fit!(tree, rows=train, verbosity=1)
mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)

# predict on train data
pred_train = predict(tree, selectrows(X,train))
pred_train = predict(mach, selectrows(X,train))
mean(abs.(pred_train - selectrows(Y,train)))

# predict on test data
pred_test = predict(tree, selectrows(X,test))
pred_test = predict(mach, selectrows(X,test))
mean(abs.(pred_test - selectrows(Y,test)))


Expand All @@ -46,23 +46,23 @@ X, y = @load_crabs
tree_model = EvoTreeClassifier(max_depth=4, η=0.05, λ=0.0, γ=0.0, nrounds=10)

# @load EvoTreeRegressor
tree = machine(tree_model, X, y)
mach = machine(tree_model, X, y)
train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
fit!(tree, rows=train, verbosity=1)
fit!(mach, rows=train, verbosity=1)

tree.model.nrounds += 10
fit!(tree, rows=train, verbosity=1)
mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)

pred_train = predict(tree, selectrows(X,train))
pred_train_mode = predict_mode(tree, selectrows(X,train))
pred_train = predict(mach, selectrows(X,train))
pred_train_mode = predict_mode(mach, selectrows(X,train))
cross_entropy(pred_train, selectrows(y, train)) |> mean
sum(pred_train_mode .== y[train])

pred_test = predict(tree, selectrows(X,test))
pred_test_mode = predict_mode(tree, selectrows(X,test))
pred_test = predict(mach, selectrows(X,test))
pred_test_mode = predict_mode(mach, selectrows(X,test))
cross_entropy(pred_test, selectrows(y, test)) |> mean
sum(pred_test_mode .== y[test])
pred_test_mode = predict_mode(tree, selectrows(X,test))
pred_test_mode = predict_mode(mach, selectrows(X,test))

##################################################
### count
Expand Down Expand Up @@ -94,19 +94,20 @@ X = MLJBase.table(X)
X_matrix = MLJBase.matrix(X)

# typeof(X)
tree = machine(tree_model, X, Y)
mach = machine(tree_model, X, Y)
train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
fit!(tree, rows=train, verbosity=1, force=true)
fit!(mach, rows=train, verbosity=1, force=true)

tree.model.nrounds += 10
MLJBase.update(tree.model, 0, tree.fitresult, tree.cache, X, Y)
mach.model.nrounds += 10
MLJBase.update(mach.model, 0, mach.fitresult, mach.cache, X, Y)

tree.model.nrounds += 10
fit!(tree, rows=train, verbosity=1)
mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)

pred = predict(tree, selectrows(X,train))
pred_mean = predict_mean(tree, selectrows(X,train))
pred_mode = predict_mode(tree, selectrows(X,train))
pred = predict(mach, selectrows(X,train))
pred_mean = predict_mean(mach, selectrows(X,train))
pred_mode = predict_mode(mach, selectrows(X,train))
# pred_mode = predict_median(mach, selectrows(X,train))

##################################################
### Gaussian - Larger data
Expand Down Expand Up @@ -137,19 +138,17 @@ X = MLJBase.table(X)
X_matrix = MLJBase.matrix(X)

# typeof(X)
tree = machine(tree_model, X, Y)
mach = machine(tree_model, X, Y)
train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
fit!(tree, rows=train, verbosity=1, force=true)
fit!(mach, rows=train, verbosity=1, force=true)

tree.model.nrounds += 10
MLJBase.update(tree.model, 0, tree.fitresult, tree.cache, X, Y)
mach.model.nrounds += 10
fit!(mach, rows=train, verbosity=1)

tree.model.nrounds += 10
fit!(tree, rows=train, verbosity=1)

pred = predict(tree, selectrows(X,train))
pred_mean = predict_mean(tree, selectrows(X,train))
pred_mode = predict_mode(tree, selectrows(X,train))
pred = predict(mach, selectrows(X,train))
pred_mean = predict_mean(mach, selectrows(X,train))
pred_mode = predict_mode(mach, selectrows(X,train))
# pred_mode = predict_median(mach, selectrows(X,train))
mean(abs.(pred_mean - selectrows(Y,train)))

q_20 = quantile.(pred, 0.20)
Expand Down
1 change: 1 addition & 0 deletions test/core.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using Statistics
using StatsBase: sample
using EvoTrees: sigmoid, logit

Expand Down

0 comments on commit 49267db

Please sign in to comment.