diff --git a/Project.toml b/Project.toml
index a4c8527c..93366856 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 authors = ["jeremiedb <jeremie.db@evovest.com>"]
 name = "EvoTrees"
 uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
-version = "0.12.0"
+version = "0.12.1"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
diff --git a/experiments/benchmarks_v2-MLE.jl b/experiments/benchmarks_v2-MLE.jl
new file mode 100644
index 00000000..647b8300
--- /dev/null
+++ b/experiments/benchmarks_v2-MLE.jl
@@ -0,0 +1,79 @@
+using Revise
+using Statistics
+using StatsBase: sample
+using XGBoost
+using EvoTrees
+using BenchmarkTools
+using CUDA
+
+nrounds = 200
+nobs = Int(1e6)
+num_feat = Int(100)
+nthread = Base.Threads.nthreads()
+
+# EvoTrees params
+params_evo = EvoTreeMLE(
+    T=Float64,
+    loss=:gaussian,
+    nrounds=nrounds,
+    lambda=0.0,
+    gamma=0.0,
+    eta=0.05,
+    max_depth=6,
+    min_weight=100.0,
+    rowsample=0.5,
+    colsample=0.5,
+    nbins=64,
+)
+
+@info "testing with: $nobs observations | $num_feat features."
+x_train = rand(nobs, num_feat)
+y_train = rand(size(x_train, 1))
+
+@info "evotrees train CPU:"
+params_evo.device = "cpu"
+@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=:gaussian, print_every_n=100);
+@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=:gaussian);
+@info "evotrees predict CPU:"
+@time pred_evo = EvoTrees.predict(m_evo, x_train);
+@btime EvoTrees.predict($m_evo, $x_train);
+
+CUDA.allowscalar(true)
+@info "evotrees train GPU:"
+params_evo.device = "gpu"
+@time m_evo_gpu = fit_evotree(params_evo; x_train, y_train);
+@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=:gaussian, print_every_n=100);
+@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=:gaussian);
+@info "evotrees predict GPU:"
+@time pred_evo = EvoTrees.predict(m_evo_gpu, x_train);
+@btime EvoTrees.predict($m_evo_gpu, $x_train);
+
+
+################################
+# Logistic
+################################
+params_evo = EvoTreeMLE(
+    T=Float64,
+    loss=:logistic,
+    nrounds=nrounds,
+    lambda=0.0,
+    gamma=0.0,
+    eta=0.05,
+    max_depth=6,
+    min_weight=100.0,
+    rowsample=0.5,
+    colsample=0.5,
+    nbins=64,
+)
+
+@info "testing with: $nobs observations | $num_feat features."
+x_train = rand(nobs, num_feat)
+y_train = rand(size(x_train, 1))
+
+@info "evotrees train CPU:"
+params_evo.device = "cpu"
+@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=:logistic, print_every_n=100);
+@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=:logistic);
+@info "evotrees predict CPU:"
+@time pred_evo = EvoTrees.predict(m_evo, x_train);
+@btime EvoTrees.predict($m_evo, $x_train);
\ No newline at end of file
diff --git a/experiments/benchmarks_v2.jl b/experiments/benchmarks_v2.jl
index 42c0e683..14f66020 100644
--- a/experiments/benchmarks_v2.jl
+++ b/experiments/benchmarks_v2.jl
@@ -10,7 +10,7 @@ nrounds = 200
 nthread = Base.Threads.nthreads()
 
 @info nthread
-loss = "logistic"
+loss = "linear"
 if loss == "linear"
     loss_xgb = "reg:squarederror"
     metric_xgb = "mae"
@@ -40,7 +40,6 @@ metrics = [metric_xgb]
 params_evo = EvoTreeRegressor(
     T=Float32,
     loss=loss_evo,
-    metric=metric_evo,
     nrounds=nrounds,
     alpha=0.5,
     lambda=0.0,
@@ -68,8 +67,9 @@ y_train = rand(size(x_train, 1))
 
 @info "evotrees train CPU:"
 params_evo.device = "cpu"
-@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=50);
+@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
 @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo);
+@btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train);
 @info "evotrees predict CPU:"
 @time pred_evo = EvoTrees.predict(m_evo, x_train);
 @btime EvoTrees.predict($m_evo, $x_train);
@@ -78,12 +78,8 @@ CUDA.allowscalar(true)
 @info "evotrees train GPU:"
 params_evo.device = "gpu"
 @time m_evo_gpu = fit_evotree(params_evo; x_train, y_train);
-@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=50);
+@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, print_every_n=100);
 @btime fit_evotree($params_evo; x_train=$x_train, y_train=$y_train, x_eval=$x_train, y_eval=$y_train, metric=metric_evo);
 @info "evotrees predict GPU:"
 @time pred_evo = EvoTrees.predict(m_evo_gpu, x_train);
-@btime EvoTrees.predict($m_evo_gpu, $x_train);
-
-# w_train = ones(length(y_train))
-# @time m_evo_gpu = fit_evotree(params_evo, x_train, y_train);
-# @time m_evo_gpu = fit_evotree(params_evo, x_train, y_train, w_train);
\ No newline at end of file
+@btime EvoTrees.predict($m_evo_gpu, $x_train);
\ No newline at end of file
diff --git a/experiments/logistic_tests.jl b/experiments/logistic_tests.jl
new file mode 100644
index 00000000..d4f507de
--- /dev/null
+++ b/experiments/logistic_tests.jl
@@ -0,0 +1,142 @@
+using Statistics
+using StatsBase: sample, sample!
+using EvoTrees
+using BenchmarkTools
+using CUDA
+
+# prepare a dataset
+features = rand(Int(1.25e4), 100)
+# features = rand(100, 10)
+X = features
+Y = rand(size(X, 1))
+𝑖 = collect(1:size(X, 1))
+
+# train-eval split
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
+train_size = 0.8
+𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
+
+x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :]
+y_train, y_eval = Y[𝑖_train], Y[𝑖_eval]
+
+
+###########################
+# Tree CPU
+###########################
+params_c = EvoTrees.EvoTreeMLE(T=Float32,
+    loss=:logistic,
+    nrounds=200,
+    lambda=0.1, gamma=0.0, eta=0.1,
+    max_depth=5, min_weight=1.0,
+    rowsample=0.5, colsample=0.5, nbins=16);
+
+model_c, cache_c = EvoTrees.init_evotree(params_c, x_train, y_train);
+EvoTrees.grow_evotree!(model_c, cache_c)
+p = model_c(x_train)
+sort(p[:,1])
+sort(p[:,2])
+
+# initialize from cache
+params_c = model_c.params
+x_size = size(cache_c.X_bin)
+
+# select random rows and cols
+sample!(params_c.rng, cache_c.𝑖_, cache_c.nodes[1].𝑖, replace=false, ordered=true);
+sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true);
+# @btime sample!(params_c.rng, cache_c.𝑖_, cache_c.nodes[1].𝑖, replace=false, ordered=true);
+# @btime sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true);
+
+𝑖 = cache_c.nodes[1].𝑖
+𝑗 = cache_c.𝑗
+
+# build a new tree
+# 897.800 μs (6 allocations: 736 bytes)
+get_loss_type(m::EvoTreeGaussian{L,T,S}) where {L,T,S} = L
+get_loss_type(m::EvoTrees.EvoTreeLogistic{L,T,S}) where {L,T,S} = L
+
+L = get_loss_type(params_c)
+@time EvoTrees.update_grads!(L, cache_c.δ𝑤, cache_c.pred, cache_c.Y; alpha=params_c.alpha)
+cache_c.δ𝑤
+
+sort(cache_c.δ𝑤[1, :])
+sort(cache_c.δ𝑤[2, :])
+sort(cache_c.δ𝑤[3, :])
+sort(cache_c.δ𝑤[4, :])
+
+p = collect(-3:0.5:3)
+y = collect(-3:0.5:3)
+
+function get_grads(p, y)
+    grad = zeros(length(p), length(y))
+    for i in eachindex(p)
+        for j in eachindex(y)
+            # alternate from 1
+            # grad[i, j] = -(exp(-2s) * (u - y) * (u - y + exp(s) * sinh(exp(-s) * (u - y)))) / (1 + cosh(exp(-s) * (u - y)))
+            grad[i, j] = (exp(-2 * p[i]) * (0.0 - y[j]) * (0.0 - y[j] + exp(p[i]) * sinh(exp(-p[i]) * (0.0 - y[j])))) / (1 + cosh(exp(-p[i]) * (0.0 - y[j])))
+        end
+    end
+    return grad
+end
+
+grads = get_grads(p, y)
+heatmap(grads)
+# @btime EvoTrees.update_grads!($params_c.loss, $cache_c.δ𝑤, $cache_c.pred_cpu, $cache_c.Y_cpu, $params_c.α)
+# ∑ = vec(sum(cache_c.δ[𝑖,:], dims=1))
+# gain = EvoTrees.get_gain(params_c.loss, ∑, params_c.λ)
+# assign a root and grow tree
+# train_nodes[1] = EvoTrees.TrainNode(UInt32(0), UInt32(1), ∑, gain)
+
+# 62.530 ms (7229 allocations: 17.43 MiB)
+# 1.25e5: 9.187 ms (7358 allocations: 2.46 MiB)
+tree = EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.λ)))
+@time EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑤, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.X_bin, cache_c.K)
+@btime EvoTrees.grow_tree!($EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.λ))), $cache_c.nodes, $params_c, $cache_c.δ𝑤, $cache_c.edges, $cache_c.𝑗, $cache_c.left, $cache_c.left, $cache_c.right, $cache_c.X_bin, $cache_c.K)
+
+@time EvoTrees.grow_tree!(EvoTrees.Tree(params_c.max_depth, model_c.K, params_c.λ), params_c, cache_c.δ, cache_c.hist, cache_c.histL, cache_c.histR, cache_c.gains, cache_c.edges, 𝑖, 𝑗, 𝑛, cache_c.X_bin);
+@btime EvoTrees.grow_tree!(EvoTrees.Tree($params_c.max_depth, $model_c.K, $params_c.λ), $params_c, $cache_c.δ, $cache_c.hist, $cache_c.histL, $cache_c.histR, $cache_c.gains, $cache_c.edges, $𝑖, $𝑗, $𝑛, $cache_c.X_bin);
+@code_warntype EvoTrees.grow_tree!(EvoTrees.Tree(params_c.max_depth, model_c.K, params_c.λ), params_c, cache_c.δ, cache_c.hist, cache_c.histL, cache_c.histR, cache_c.gains, cache_c.edges, 𝑖, 𝑗, 𝑛, cache_c.X_bin);
+
+# push!(model_c.trees, tree)
+# 1.883 ms (83 allocations: 13.77 KiB)
+@btime EvoTrees.predict!(model_c.params.loss, cache_c.pred_cpu, tree, cache_c.X, model_c.K)
+
+δ𝑤, K, edges, X_bin, nodes, out, left, right = cache_c.δ𝑤, cache_c.K, cache_c.edges, cache_c.X_bin, cache_c.nodes, cache_c.out, cache_c.left, cache_c.right;
+
+# 9.613 ms (81 allocations: 13.55 KiB)
+# 1.25e5: 899.200 μs (81 allocations: 8.22 KiB)
+@time EvoTrees.update_hist!(params_c.loss, nodes[1].h, δ𝑤, X_bin, 𝑖, 𝑗, K)
+@btime EvoTrees.update_hist!($params_c.loss, $nodes[1].h, $δ𝑤, $X_bin, $𝑖, $𝑗, $K)
+@btime EvoTrees.update_hist!($nodes[1].h, $δ𝑤, $X_bin, $nodes[1].𝑖, $𝑗)
+@code_warntype EvoTrees.update_hist!(hist, δ, X_bin, 𝑖, 𝑗, 𝑛)
+
+j = 1
+# 8.399 μs (80 allocations: 13.42 KiB)
+n = 1
+nodes[1].∑ .= vec(sum(δ𝑤[:, 𝑖], dims=2))
+EvoTrees.update_gains!(params_c.loss, nodes[n], 𝑗, params_c, K)
+nodes[1].gains
+# findmax(nodes[1].gains) #1.25e5: 36.500 μs (81 allocations: 8.22 KiB)
+@btime EvoTrees.update_gains!($params_c.loss, $nodes[n], $𝑗, $params_c, $K)
+@code_warntype EvoTrees.update_gains!(params_c.loss, nodes[n], 𝑗, params_c, K)
+
+#1.25e5: 14.100 μs (1 allocation: 32 bytes)
+best = findmax(nodes[n].gains)
+@btime best = findmax(nodes[n].gains)
+@btime best = findmax(view(nodes[n].gains, :, 𝑗))
+
+tree.cond_bin[n] = best[2][1]
+tree.feat[n] = best[2][2]
+
+Int.(tree.cond_bin[n])
+# tree.cond_bin[n] = 32
+
+# 204.900 μs (1 allocation: 96 bytes)
+offset = 0
+@time EvoTrees.split_set!(left, right, 𝑖, X_bin, tree.feat[n], tree.cond_bin[n], offset)
+@btime EvoTrees.split_set!($left, $right, $𝑖, $X_bin, $tree.feat[n], $tree.cond_bin[n], $offset)
+@code_warntype EvoTrees.split_set!(left, right, 𝑖, X_bin, tree.feat[n], tree.cond_bin[n])
+
+# 1.25e5: 227.200 μs (22 allocations: 1.44 KiB)
+@time EvoTrees.split_set_threads!(out, left, right, 𝑖, X_bin, tree.feat[n], tree.cond_bin[n], offset)
+@btime EvoTrees.split_set_threads!($out, $left, $right, $𝑖, $X_bin, $tree.feat[n], $tree.cond_bin[n], $offset, Int(2e15))
diff --git a/experiments/random.jl b/experiments/random.jl
index 4aad5bfd..63e93c49 100644
--- a/experiments/random.jl
+++ b/experiments/random.jl
@@ -33,13 +33,13 @@ params1 = EvoTreeRegressor(T=Float32,
 
 # asus laptopt: for 1.25e6 no eval: 9.650007 seconds (893.53 k allocations: 2.391 GiB, 5.52% gc time)
 @time model = fit_evotree(params1; x_train, y_train);
-@time model = fit_evotree(params1; x_train, y_train, metric=:mse, x_eval, y_eval, print_every_n=10);
-@btime model = fit_evotree($params1; $x_train, $y_train);
+@time model = fit_evotree(params1; x_train, y_train, metric=:mse, x_eval, y_eval, print_every_n=100);
+@btime model = fit_evotree(params1; x_train, y_train);
 @time pred_train = predict(model, x_train);
 @btime pred_train = predict(model, x_train);
-gain = importance(model, 1:100)
+gain = importance(model)
 
-@time model, cache = EvoTrees.init_evotree(params1, x_train, y_train);
+@time model, cache = EvoTrees.init_evotree(params1; x_train, y_train);
 @time EvoTrees.grow_evotree!(model, cache);
 
 #############################
@@ -84,11 +84,17 @@ params1 = EvoTreeRegressor(T=Float32,
     device="gpu")
 
 # Asus laptop:  10.015568 seconds (13.80 M allocations: 1.844 GiB, 4.00% gc time)
-@time model = EvoTrees.fit_evotree(params1, X_train, Y_train);
-@btime model = EvoTrees.fit_evotree(params1, X_train, Y_train);
-@time model, cache = EvoTrees.init_evotree_gpu(params1, X_train, Y_train);
+@time model = EvoTrees.fit_evotree(params1; x_train, y_train);
+@btime model = EvoTrees.fit_evotree(params1; x_train, y_train);
+@time model, cache = EvoTrees.init_evotree_gpu(params1; x_train, y_train);
 @time EvoTrees.grow_evotree!(model, cache);
 
+using MLJBase
+mach1 = machine(EvoTreeRegressor(loss=:linear, device="gpu", max_depth=5, eta=0.01, nrounds=10), x_train, y_train, cache=true)
+mach2 = machine(EvoTreeRegressor(loss=:linear, device="gpu", max_depth=5, eta=0.01, nrounds=10), x_train, y_train, cache=false)
+mach3 = machine(EvoTreeRegressor(loss=:linear, device="gpu", max_depth=5, eta=0.01, nrounds=10), x_train, y_train, cache=false)
+fit!(mach1)
+
 # X_train_32 = Float32.(X_train)
 @time pred_train = EvoTrees.predict(model, X_train);
 @btime pred_train = EvoTrees.predict(model, X_train);
@@ -112,14 +118,14 @@ params1 = EvoTreeRegressor(T=Float32,
 # GPU - Gaussian
 ################################
 params1 = EvoTreeGaussian(T=Float32,
-    loss=:gaussian, metric=:gaussian,
+    loss=:gaussian,
     nrounds=100,
     lambda=1.0, gamma=0, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=0.5, nbins=32,
     device="gpu")
 # Asus laptop: 14.304369 seconds (24.81 M allocations: 2.011 GiB, 1.90% gc time)
-@time model = EvoTrees.fit_evotree(params1, X_train, Y_train);
+@time model = EvoTrees.fit_evotree(params1; x_train, y_train);
 # Auss laptop:  1.888472 seconds (8.40 k allocations: 1.613 GiB, 14.86% gc time)
 @time model, cache = EvoTrees.init_evotree(params1, X_train, Y_train);
 
diff --git a/experiments/readme_plots_cpu.jl b/experiments/readme_plots_cpu.jl
index de2a4864..b660a375 100644
--- a/experiments/readme_plots_cpu.jl
+++ b/experiments/readme_plots_cpu.jl
@@ -51,7 +51,7 @@ sqrt(mean((pred_train_linear .- y_train) .^ 2))
 
 # linear weighted
 params1 = EvoTreeRegressor(T=Float64,
-    loss=:linear, metric=:mse,
+    loss=:linear,
     nrounds=200, nbins=64,
     lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
@@ -61,7 +61,7 @@ params1 = EvoTreeRegressor(T=Float64,
 # W_train = ones(eltype(Y_train), size(Y_train)) .* 5
 w_train = rand(eltype(y_train), size(y_train)) .+ 0
 
-@time model = fit_evotree(params1; x_train, y_train, w_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, w_train, x_eval, y_eval, print_every_n=25, metric=:mse);
 # 67.159 ms (77252 allocations: 28.06 MiB)
 # @time model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 999);
 # @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval);
@@ -77,13 +77,13 @@ sqrt(mean((pred_train_linear_w .- y_train) .^ 2))
 
 # logistic / cross-entropy
 params1 = EvoTreeRegressor(
-    loss=:logistic, metric=:logloss,
+    loss=:logistic,
     nrounds=200, nbins=64,
     lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
 
-@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:logloss);
 # 218.040 ms (123372 allocations: 34.71 MiB)
 # @btime model = fit_evotree($params1, $X_train, $Y_train, X_eval = $X_eval, Y_eval = $Y_eval)
 @time pred_train_logistic = predict(model, x_train);
@@ -97,7 +97,7 @@ params1 = EvoTreeRegressor(
     lambda=0.0, gamma=0.0, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
-@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:mae);
 @time pred_train_L1 = predict(model, x_train)
 @time pred_eval_L1 = predict(model, x_eval)
 sqrt(mean((pred_train_L1 .- y_train) .^ 2))
@@ -112,34 +112,34 @@ savefig("figures/regression_sinus.png")
 
 # Poisson
 params1 = EvoTreeCount(
-    loss=:poisson, metric=:poisson,
+    loss=:poisson,
     nrounds=200, nbins=64,
     lambda=0.5, gamma=0.1, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
-@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:poisson);
 @time pred_train_poisson = predict(model, x_train);
 sqrt(mean((pred_train_poisson .- y_train) .^ 2))
 
 # Gamma
 params1 = EvoTreeRegressor(
-    loss=:gamma, metric=:gamma,
+    loss=:gamma,
     nrounds=200, nbins=64,
     lambda=0.5, gamma=0.1, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
-@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:gamma);
 @time pred_train_gamma = predict(model, x_train);
 sqrt(mean((pred_train_gamma .- y_train) .^ 2))
 
 # Tweedie
 params1 = EvoTreeRegressor(
-    loss=:tweedie, metric=:tweedie,
+    loss=:tweedie,
     nrounds=200, nbins=64,
     lambda=0.5, gamma=0.1, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=1.0)
-@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:tweedie);
 @time pred_train_tweedie = predict(model, x_train);
 sqrt(mean((pred_train_tweedie .- y_train) .^ 2))
 
@@ -180,7 +180,7 @@ sum(pred_train_q20 .< y_train) / length(y_train)
 
 # q80
 params1 = EvoTreeRegressor(
-    loss=:quantile, alpha=0.8, metric=:none,
+    loss=:quantile, alpha=0.8,
     nrounds=200, nbins=64,
     lambda=1.0, gamma=0.0, eta=0.05,
     max_depth=6, min_weight=1.0,
@@ -200,14 +200,14 @@ savefig("figures/quantiles_sinus.png")
 ###############################
 ## gaussian
 ###############################
-params1 = EvoTreeGaussian(
-    loss=:gaussian, metric=:gaussian,
+params1 = EvoTreeMLE(
+    loss=:gaussian,
     nrounds=200, nbins=64,
     lambda=0.1, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=1.0,
     rowsample=1.0, colsample=1.0, rng=123)
 
-@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=10);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=10, metric=:gaussian);
 # @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
 @time pred_train = EvoTrees.predict(model, x_train);
 # @btime pred_train = EvoTrees.predict(model, X_train);
@@ -225,4 +225,35 @@ plot!(x_train[:, 1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5,
 plot!(x_train[:, 1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
 plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="darkgreen", linewidth=1.5, label="q20")
 plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="darkgreen", linewidth=1.5, label="q80")
-savefig("figures/gaussian_sinus.png")
\ No newline at end of file
+savefig("figures/gaussian-sinus.png")
+
+
+###############################
+## Logistic
+###############################
+params1 = EvoTrees.EvoTreeMLE(
+    loss = :logistic,
+    nrounds=200, nbins=64,
+    lambda=1.0, gamma=0.1, eta=0.05,
+    max_depth=6, min_weight=1.0,
+    rowsample=1.0, colsample=1.0, rng=123)
+
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=10, metric=:logistic);
+# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
+@time pred_train = EvoTrees.predict(model, x_train);
+# @btime pred_train = EvoTrees.predict(model, X_train);
+
+pred_logistic = [Distributions.Logistic(pred_train[i, 1], pred_train[i, 2]) for i in axes(pred_train, 1)]
+pred_q80 = quantile.(pred_logistic, 0.8)
+pred_q20 = quantile.(pred_logistic, 0.2)
+
+mean(y_train .< pred_q80)
+mean(y_train .< pred_q20)
+
+x_perm = sortperm(x_train[:, 1])
+plot(x_train[:, 1], y_train, ms=0.5, mcolor="darkgray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
+plot!(x_train[:, 1][x_perm], pred_train[x_perm, 1], color="navy", linewidth=1.5, label="mu")
+plot!(x_train[:, 1][x_perm], pred_train[x_perm, 2], color="darkred", linewidth=1.5, label="s")
+plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="darkgreen", linewidth=1.5, label="q20")
+plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="darkgreen", linewidth=1.5, label="q80")
+savefig("figures/logistic-sinus.png")
\ No newline at end of file
diff --git a/experiments/readme_plots_gpu.jl b/experiments/readme_plots_gpu.jl
index 20ba04ff..e4298c3c 100644
--- a/experiments/readme_plots_gpu.jl
+++ b/experiments/readme_plots_gpu.jl
@@ -37,6 +37,7 @@ params1 = EvoTreeRegressor(T=Float32,
     device="gpu")
 
 @time model = fit_evotree(params1; x_train, y_train);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, metric=:mse, print_every_n=25);
 model_cpu = convert(EvoTrees.GBTree, model);
 pred_train_linear_gpu = predict(model, x_train)
 pred_train_linear_cpu = predict(model_cpu, x_train)
@@ -115,7 +116,6 @@ savefig("figures/regression_sinus_gpu.png")
 ###############################
 EvoTrees.CUDA.allowscalar(false)
 params1 = EvoTreeGaussian(T=Float32,
-    loss=:gaussian, metric=:gaussian,
     nrounds=200, nbins=64,
     lambda=1.0, gamma=0.1, eta=0.05,
     max_depth=6, min_weight=5,
@@ -123,7 +123,7 @@ params1 = EvoTreeGaussian(T=Float32,
     device="gpu")
 
 @time model = fit_evotree(params1; x_train, y_train);
-# @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=25);
+@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25, metric=:gaussian);
 # @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
 @time pred_train_gaussian = EvoTrees.predict(model, x_train)
 
@@ -140,4 +140,4 @@ plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 1], color="navy", linew
 plot!(x_train[:, 1][x_perm], pred_train_gaussian[x_perm, 2], color="darkred", linewidth=1.5, label="sigma")
 plot!(x_train[:, 1][x_perm], pred_q20[x_perm, 1], color="green", linewidth=1.5, label="q20")
 plot!(x_train[:, 1][x_perm], pred_q80[x_perm, 1], color="green", linewidth=1.5, label="q80")
-savefig("figures/gaussian_sinus_gpu.png")
\ No newline at end of file
+savefig("figures/gaussian-sinus-gpu.png")
\ No newline at end of file
diff --git a/experiments/speed_cpu_gpu.jl b/experiments/speed_cpu_gpu.jl
index 91f2c657..3bbf032b 100644
--- a/experiments/speed_cpu_gpu.jl
+++ b/experiments/speed_cpu_gpu.jl
@@ -1,5 +1,5 @@
 using Statistics
-using StatsBase:sample, sample!
+using StatsBase: sample, sample!
 using EvoTrees
 using BenchmarkTools
 using CUDA
@@ -15,22 +15,29 @@ Y = rand(size(X, 1))
 𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
-𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1)) + 1:end]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
 
-X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
-Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
+x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :]
+y_train, y_eval = Y[𝑖_train], Y[𝑖_eval]
 
 
 ###########################
 # Tree CPU
 ###########################
 params_c = EvoTreeRegressor(T=Float32,
-    loss=:linear, metric=:none,
+    loss=:linear,
     nrounds=100,
-    λ=1.0, γ=0.1, η=0.1,
+    lambda=0.1, gamma=0.0, eta=0.1,
     max_depth=6, min_weight=1.0,
     rowsample=0.5, colsample=0.5, nbins=64);
 
+# params_c = EvoTrees.EvoTreeLogistic(T=Float32,
+#     loss=:linear,
+#     nrounds=100,
+#     lambda=1.0, gamma=0.0, eta=0.1,
+#     max_depth=6, min_weight=1.0,
+#     rowsample=0.5, colsample=0.5, nbins=64);
+
 # params_c = EvoTreeGaussian(T=Float32,
 #     loss=:gaussian, metric=:none,
 #     nrounds=100,
@@ -38,11 +45,11 @@ params_c = EvoTreeRegressor(T=Float32,
 #     max_depth=6, min_weight=1.0,
 #     rowsample=0.5, colsample=0.5, nbins=64);
 
-model_c, cache_c = EvoTrees.init_evotree(params_c, X_train, Y_train);
+model_c, cache_c = EvoTrees.init_evotree(params_c; x_train, y_train);
 
 # initialize from cache
 params_c = model_c.params
-X_size = size(cache_c.X_bin)
+X_size = size(cache_c.x_bin)
 
 # select random rows and cols
 sample!(params_c.rng, cache_c.𝑖_, cache_c.nodes[1].𝑖, replace=false, ordered=true);
@@ -53,9 +60,11 @@ sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true);
 𝑖 = cache_c.nodes[1].𝑖
 𝑗 = cache_c.𝑗
 
+L = EvoTrees.Linear
+T = Float32
 # build a new tree
 # 897.800 μs (6 allocations: 736 bytes)
-@time EvoTrees.update_grads!(params_c.loss, cache_c.δ𝑤, cache_c.pred, cache_c.Y, params_c.α)
+@time EvoTrees.update_grads!(L, cache_c.δ𝑤, cache_c.pred, cache_c.y; alpha = params_c.alpha)
 # @btime EvoTrees.update_grads!($params_c.loss, $cache_c.δ𝑤, $cache_c.pred_cpu, $cache_c.Y_cpu, $params_c.α)
 # ∑ = vec(sum(cache_c.δ[𝑖,:], dims=1))
 # gain = EvoTrees.get_gain(params_c.loss, ∑, params_c.λ)
@@ -64,8 +73,10 @@ sample!(params_c.rng, cache_c.𝑗_, cache_c.𝑗, replace=false, ordered=true);
 
 # 62.530 ms (7229 allocations: 17.43 MiB)
 # 1.25e5: 9.187 ms (7358 allocations: 2.46 MiB)
-tree = EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.λ)))
-@time EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑤, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.X_bin, cache_c.K)
+tree = EvoTrees.Tree{L,T}(params_c.max_depth, model_c.K, zero(typeof(params_c.lambda)))
+@time EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑤, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.x_bin, cache_c.K, cache_c.monotone_constraints)
+@code_warntype EvoTrees.grow_tree!(tree, cache_c.nodes, params_c, cache_c.δ𝑤, cache_c.edges, cache_c.𝑗, cache_c.left, cache_c.left, cache_c.right, cache_c.x_bin, cache_c.K, cache_c.monotone_constraints)
+
 @btime EvoTrees.grow_tree!($EvoTrees.Tree(params_c.max_depth, model_c.K, zero(typeof(params_c.λ))), $cache_c.nodes, $params_c, $cache_c.δ𝑤, $cache_c.edges, $cache_c.𝑗, $cache_c.left, $cache_c.left, $cache_c.right, $cache_c.X_bin, $cache_c.K)
 
 @time EvoTrees.grow_tree!(EvoTrees.Tree(params_c.max_depth, model_c.K, params_c.λ), params_c, cache_c.δ, cache_c.hist, cache_c.histL, cache_c.histR, cache_c.gains, cache_c.edges, 𝑖, 𝑗, 𝑛, cache_c.X_bin);
diff --git a/figures/gaussian_sinus.png b/figures/gaussian-sinus.png
similarity index 100%
rename from figures/gaussian_sinus.png
rename to figures/gaussian-sinus.png
diff --git a/figures/logistic-sinus.png b/figures/logistic-sinus.png
new file mode 100644
index 00000000..b1f511dc
Binary files /dev/null and b/figures/logistic-sinus.png differ
diff --git a/src/EvoTrees.jl b/src/EvoTrees.jl
index 9eaa9eea..ab59633a 100644
--- a/src/EvoTrees.jl
+++ b/src/EvoTrees.jl
@@ -1,7 +1,7 @@
 module EvoTrees
 
 export init_evotree, grow_evotree!, grow_tree, fit_evotree, predict
-export EvoTreeRegressor, EvoTreeCount, EvoTreeClassifier, EvoTreeGaussian,
+export EvoTreeRegressor, EvoTreeCount, EvoTreeClassifier, EvoTreeMLE, EvoTreeGaussian,
     importance, Random
 
 using Base.Threads: @threads
diff --git a/src/MLJ.jl b/src/MLJ.jl
index 7599c2c5..72875d3d 100644
--- a/src/MLJ.jl
+++ b/src/MLJ.jl
@@ -1,121 +1,174 @@
 function MMI.fit(model::EvoTypes, verbosity::Int, A, y)
-  if model.device == "gpu"
-    fitresult, cache = init_evotree_gpu(model, A.matrix, y)
-  else
-    fitresult, cache = init_evotree(model, A.matrix, y)
-  end
-  grow_evotree!(fitresult, cache)
-  report = (features=A.names,)
-  return fitresult, cache, report
+    if model.device == "gpu"
+        fitresult, cache = init_evotree_gpu(model; x_train = A.matrix, y_train = y)
+    else
+        fitresult, cache = init_evotree(model; x_train = A.matrix, y_train = y)
+    end
+    grow_evotree!(fitresult, cache)
+    report = (features = A.names,)
+    return fitresult, cache, report
 end
 
 function okay_to_continue(new, old)
-  new.nrounds - old.nrounds >= 0 &&
-    new.lambda == old.lambda &&
-    new.gamma == old.gamma &&
-    new.max_depth == old.max_depth &&
-    new.min_weight == old.min_weight &&
-    new.rowsample == old.rowsample &&
-    new.colsample == old.colsample &&
-    new.nbins == old.nbins &&
-    new.alpha == old.alpha &&
-    new.device == old.device
+    new.nrounds - old.nrounds >= 0 &&
+        new.lambda == old.lambda &&
+        new.gamma == old.gamma &&
+        new.max_depth == old.max_depth &&
+        new.min_weight == old.min_weight &&
+        new.rowsample == old.rowsample &&
+        new.colsample == old.colsample &&
+        new.nbins == old.nbins &&
+        new.alpha == old.alpha &&
+        new.device == old.device
 end
 
 
 # Generate names to be used by feature_importances in the report
-MMI.reformat(::EvoTypes, X, y) = ((matrix=MMI.matrix(X), names=[name for name ∈ schema(X).names]), y)
-MMI.reformat(::EvoTypes, X) = ((matrix=MMI.matrix(X), names=[name for name ∈ schema(X).names]),)
-MMI.reformat(::EvoTypes, X::AbstractMatrix, y) = ((matrix=X, names=["feat_$i" for i = 1:size(X, 2)]), y)
-MMI.reformat(::EvoTypes, X::AbstractMatrix) = ((matrix=X, names=["feat_$i" for i = 1:size(X, 2)]),)
-MMI.selectrows(::EvoTypes, I, A, y) = ((matrix=view(A.matrix, I, :), names=A.names), view(y, I))
-MMI.selectrows(::EvoTypes, I, A) = ((matrix=view(A.matrix, I, :), names=A.names),)
+MMI.reformat(::EvoTypes, X, y) =
+    ((matrix = MMI.matrix(X), names = [name for name ∈ schema(X).names]), y)
+MMI.reformat(::EvoTypes, X) =
+    ((matrix = MMI.matrix(X), names = [name for name ∈ schema(X).names]),)
+MMI.reformat(::EvoTypes, X::AbstractMatrix, y) =
+    ((matrix = X, names = ["feat_$i" for i = 1:size(X, 2)]), y)
+MMI.reformat(::EvoTypes, X::AbstractMatrix) =
+    ((matrix = X, names = ["feat_$i" for i = 1:size(X, 2)]),)
+MMI.selectrows(::EvoTypes, I, A, y) =
+    ((matrix = view(A.matrix, I, :), names = A.names), view(y, I))
+MMI.selectrows(::EvoTypes, I, A) = ((matrix = view(A.matrix, I, :), names = A.names),)
 
 # For EarlyStopping.jl support
 MMI.iteration_parameter(::Type{<:EvoTypes}) = :nrounds
 
 function MMI.update(model::EvoTypes, verbosity::Integer, fitresult, cache, A, y)
-  if okay_to_continue(model, cache.params)
-    grow_evotree!(fitresult, cache)
-  else
-    fitresult, cache = init_evotree(model, A.matrix, y)
-    grow_evotree!(fitresult, cache)
-  end
-  report = (features=A.names,)
-  return fitresult, cache, report
+    if okay_to_continue(model, cache.params)
+        grow_evotree!(fitresult, cache)
+    else
+        if model.device == "gpu"
+            fitresult, cache = init_evotree_gpu(model; x_train = A.matrix, y_train = y)
+        else
+            fitresult, cache = init_evotree(model; x_train = A.matrix, y_train = y)
+        end
+        grow_evotree!(fitresult, cache)
+    end
+    report = (features = A.names,)
+    return fitresult, cache, report
 end
 
 function predict(::EvoTreeRegressor, fitresult, A)
-  pred = vec(predict(fitresult, A.matrix))
-  return pred
+    pred = vec(predict(fitresult, A.matrix))
+    return pred
 end
 
 function predict(::EvoTreeClassifier, fitresult, A)
-  pred = predict(fitresult, A.matrix)
-  return MMI.UnivariateFinite(fitresult.info[:levels], pred, pool=missing)
+    pred = predict(fitresult, A.matrix)
+    return MMI.UnivariateFinite(fitresult.info[:levels], pred, pool = missing)
 end
 
 function predict(::EvoTreeCount, fitresult, A)
-  λs = vec(predict(fitresult, A.matrix))
-  return [Distributions.Poisson(λ) for λ ∈ λs]
+    λs = vec(predict(fitresult, A.matrix))
+    return [Distributions.Poisson(λ) for λ ∈ λs]
 end
 
 function predict(::EvoTreeGaussian, fitresult, A)
-  pred = predict(fitresult, A.matrix)
-  return [Distributions.Normal(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)]
+    pred = predict(fitresult, A.matrix)
+    return [Distributions.Normal(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)]
+end
+
+function predict(::EvoTreeMLE{L,T,S}, fitresult, A) where {L<:GaussianDist,T,S}
+    pred = predict(fitresult, A.matrix)
+    return [Distributions.Normal(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)]
 end
 
+function predict(::EvoTreeMLE{L,T,S}, fitresult, A) where {L<:LogisticDist,T,S}
+    pred = predict(fitresult, A.matrix)
+    return [Distributions.Logistic(pred[i, 1], pred[i, 2]) for i in axes(pred, 1)]
+end
 
 # Feature Importances
 MMI.reports_feature_importances(::Type{<:EvoTypes}) = true
 
 function MMI.feature_importances(m::EvoTypes, fitresult, report)
-  fi_pairs = importance(fitresult)
-  return fi_pairs
+    fi_pairs = importance(fitresult)
+    return fi_pairs
 end
 
 
 # Metadata
-const EvoTreeRegressor_desc = "Regression models with various underlying methods: least square, quantile, logistic, gamma, tweedie."
+const EvoTreeRegressor_desc = "Regression models with various underlying methods: least square, quantile, logistic, gamma (deviance), tweedie (deviance)."
 const EvoTreeClassifier_desc = "Multi-classification with softmax and cross-entropy loss."
-const EvoTreeCount_desc = "Poisson regression fitting λ with max likelihood."
-const EvoTreeGaussian_desc = "Gaussian maximum likelihood of μ and σ."
-
-MMI.metadata_pkg.((EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount, EvoTreeGaussian),
-  name="EvoTrees",
-  uuid="f6006082-12f8-11e9-0c9c-0d5d367ab1e5",
-  url="https://github.com/Evovest/EvoTrees.jl",
-  julia=true,
-  license="Apache",
-  is_wrapper=false)
-
-MMI.metadata_model(EvoTreeRegressor,
-  input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}},
-  target_scitype=AbstractVector{<:MMI.Continuous},
-  weights=false,
-  path="EvoTrees.EvoTreeRegressor",
-  descr=EvoTreeRegressor_desc)
-
-MMI.metadata_model(EvoTreeClassifier,
-  input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}},
-  target_scitype=AbstractVector{<:MMI.Finite},
-  weights=false,
-  path="EvoTrees.EvoTreeClassifier",
-  descr=EvoTreeClassifier_desc)
-
-MMI.metadata_model(EvoTreeCount,
-  input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}},
-  target_scitype=AbstractVector{<:MMI.Count},
-  weights=false,
-  path="EvoTrees.EvoTreeCount",
-  descr=EvoTreeCount_desc)
-
-MMI.metadata_model(EvoTreeGaussian,
-  input_scitype=Union{MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),AbstractMatrix{MMI.Continuous}},
-  target_scitype=AbstractVector{<:MMI.Continuous},
-  weights=false,
-  path="EvoTrees.EvoTreeGaussian",
-  descr=EvoTreeGaussian_desc)
+const EvoTreeCount_desc = "Poisson regression fitting λ with deviance minimization."
+const EvoTreeGaussian_desc = "Deprecated - Use EvoTreeMLE with `loss=:normal` instead. Gaussian maximum likelihood of μ and σ."
+const EvoTreeMLE_desc = "Maximum likelihood methods supporting Normal/Gaussian and Logistic distributions."
+
+MMI.metadata_pkg.(
+    (EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount, EvoTreeGaussian),
+    name = "EvoTrees",
+    uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5",
+    url = "https://github.com/Evovest/EvoTrees.jl",
+    julia = true,
+    license = "Apache",
+    is_wrapper = false,
+)
+
+MMI.metadata_model(
+    EvoTreeRegressor,
+    input_scitype = Union{
+        MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),
+        AbstractMatrix{MMI.Continuous},
+    },
+    target_scitype = AbstractVector{<:MMI.Continuous},
+    weights = false,
+    path = "EvoTrees.EvoTreeRegressor",
+    descr = EvoTreeRegressor_desc,
+)
+
+MMI.metadata_model(
+    EvoTreeClassifier,
+    input_scitype = Union{
+        MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),
+        AbstractMatrix{MMI.Continuous},
+    },
+    target_scitype = AbstractVector{<:MMI.Finite},
+    weights = false,
+    path = "EvoTrees.EvoTreeClassifier",
+    descr = EvoTreeClassifier_desc,
+)
+
+MMI.metadata_model(
+    EvoTreeCount,
+    input_scitype = Union{
+        MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),
+        AbstractMatrix{MMI.Continuous},
+    },
+    target_scitype = AbstractVector{<:MMI.Count},
+    weights = false,
+    path = "EvoTrees.EvoTreeCount",
+    descr = EvoTreeCount_desc,
+)
+
+MMI.metadata_model(
+    EvoTreeGaussian,
+    input_scitype = Union{
+        MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),
+        AbstractMatrix{MMI.Continuous},
+    },
+    target_scitype = AbstractVector{<:MMI.Continuous},
+    weights = false,
+    path = "EvoTrees.EvoTreeGaussian",
+    descr = EvoTreeGaussian_desc,
+)
+
+MMI.metadata_model(
+    EvoTreeMLE,
+    input_scitype = Union{
+        MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor),
+        AbstractMatrix{MMI.Continuous},
+    },
+    target_scitype = AbstractVector{<:MMI.Continuous},
+    weights = false,
+    path = "EvoTrees.EvoTreeMLE",
+    descr = EvoTreeMLE_desc,
+)
 
 """
   EvoTreeRegressor(;kwargs...)
@@ -173,6 +226,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz
 EvoTrees.predict(model, X)
 ```
 
+Alternatively, models act as a functor, returning predictions when called as a function with features as argument:
+
+```julia
+model(X)
+```
+
 # MLJ Interface
 
 From MLJ, the type can be imported using:
@@ -280,6 +339,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz
 EvoTrees.predict(model, X)
 ```
 
+Alternatively, models act as a functor, returning predictions when called as a function with features as argument:
+
+```julia
+model(X)
+```
+
 # MLJ
 
 From MLJ, the type can be imported using:
@@ -395,6 +460,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz
 EvoTrees.predict(model, X)
 ```
 
+Alternatively, models act as a functor, returning predictions when called as a function with features as argument:
+
+```julia
+model(X)
+```
+
 # MLJ
 
 From MLJ, the type can be imported using:
@@ -475,7 +546,7 @@ EvoTreeCount
   EvoTreeGaussian(;kwargs...)
 
 A model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface.
-EvoTreeGaussian is used to perform Gaussain probabilistic regression, fitting μ and σ parameters to maximize likelihood.
+EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.
 
 # Hyper-parameters
 
@@ -516,6 +587,12 @@ Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of siz
 EvoTrees.predict(model, X)
 ```
 
+Alternatively, models act as a functor, returning predictions when called as a function with features as argument:
+
+```julia
+model(X)
+```
+
 # MLJ
 
 From MLJ, the type can be imported using:
@@ -594,6 +671,140 @@ preds = predict_median(mach, X)
 """
 EvoTreeGaussian
 
+
+
+"""
+  EvoTreeMLE(;kwargs...)
+
+A model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface.
+EvoTreeMLE performs maximum likelihood estimation. Assumed distributions is specified through `loss` kwargs. Both Normal/Gaussian and Logistic distributions are supported.
+
+# Hyper-parameters
+
+`loss=:gaussian`:         Loss to be be minimized during training. One of:
+  - `:normal`/ `gaussian`
+  - `:logistic`
+- `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked.
+- `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.
+- `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.
+- `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf.
+  A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes.
+  Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.
+- `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector.
+- `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
+- `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.
+- `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.
+- `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). 
+  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.
+- `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).
+- `metric::Symbol=:none`:       Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`.
+- `device="cpu"`:               Hardware device to use for computations. Can be either `"cpu"` or `"gpu"`.
+
+# Internal API
+
+Do `params = EvoTreeMLE()` to construct an instance with default hyper-parameters.
+Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).
+
+## Training model
+
+A model is built using [`fit_evotree`](@ref):
+
+```julia
+fit_evotree(params, X_train, Y_train, W_train=nothing; kwargs...).
+```
+
+## Inference
+
+Predictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.
+
+```julia
+EvoTrees.predict(model, X)
+```
+
+Alternatively, models act as a functor, returning predictions when called as a function with features as argument:
+
+```julia
+model(X)
+```
+
+# MLJ
+
+From MLJ, the type can be imported using:
+
+```julia
+EvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees
+```
+
+Do `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters.
+Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.
+
+## Training data
+
+In MLJ or MLJBase, bind an instance `model` to data with
+
+    mach = machine(model, X, y)
+
+where
+
+- `X`: any table of input features (eg, a `DataFrame`) whose columns
+  each have one of the following element scitypes: `Continuous`,
+  `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`
+
+- `y`: is the target, which can be any `AbstractVector` whose element
+  scitype is `<:Continuous`; check the scitype
+  with `scitype(y)`
+
+Train the machine using `fit!(mach, rows=...)`.
+
+## Operations
+
+- `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.
+Predictions are probabilistic.
+
+Specific metrics can also be predicted using:
+
+  - `predict_mean(mach, Xnew)`
+  - `predict_mode(mach, Xnew)`
+  - `predict_median(mach, Xnew)`
+
+## Fitted parameters
+
+The fields of `fitted_params(mach)` are:
+
+  - `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.
+
+## Report
+
+The fields of `report(mach)` are:
+  - `:features`: The names of the features encountered in training.
+
+# Examples
+
+```
+# Internal API
+using EvoTrees
+params = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)
+nobs, nfeats = 1_000, 5
+X, y = randn(nobs, nfeats), rand(nobs)
+model = fit_evotree(params, X, y)
+preds = EvoTrees.predict(model, X)
+```
+
+```
+# MLJ Interface
+using MLJ
+EvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees
+model = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)
+X, y = @load_boston
+mach = machine(model, X, y) |> fit!
+preds = predict(mach, X)
+preds = predict_mean(mach, X)
+preds = predict_mode(mach, X)
+preds = predict_median(mach, X)
+```
+"""
+EvoTreeMLE
+
 # function MLJ.clean!(model::EvoTreeRegressor)
 #     warning = ""
 #     if model.nrounds < 1
diff --git a/src/eval.jl b/src/eval.jl
index 87872292..9db5f99d 100644
--- a/src/eval.jl
+++ b/src/eval.jl
@@ -86,6 +86,15 @@ function eval_metric(::Val{:gaussian}, p::AbstractMatrix{T}, y::AbstractVector{T
     return eval
 end
 
+function eval_metric(::Val{:logistic}, p::AbstractMatrix{T}, y::AbstractVector{T}, w::AbstractVector{T}, alpha=0.0) where {T<:AbstractFloat}
+    eval = zero(T)
+    @inbounds for i in eachindex(y)
+        eval += -w[i] * (log(1 / 4 * sech(exp(-p[2, i]) * (y[i] - p[1, i]))^2) - p[2, i])
+    end
+    eval /= sum(w)
+    return eval
+end
+
 function eval_metric(::Val{:quantile}, p::AbstractMatrix{T}, y::AbstractVector{T}, w::AbstractVector{T}, alpha=0.0) where {T<:AbstractFloat}
     eval = zero(T)
     for i in eachindex(y)
diff --git a/src/find_split.jl b/src/find_split.jl
index 62302566..7e233a2c 100644
--- a/src/find_split.jl
+++ b/src/find_split.jl
@@ -30,7 +30,20 @@ end
     Multi-threaded split_set!
         Take a view into left and right placeholders. Right ids are assigned at the end of the length of the current node set.
 """
-function split_set_chunk!(left, right, 𝑖, bid, nblocks, X_bin, feat, cond_bin, offset, chunk_size, lefts, rights)
+function split_set_chunk!(
+    left,
+    right,
+    𝑖,
+    bid,
+    nblocks,
+    X_bin,
+    feat,
+    cond_bin,
+    offset,
+    chunk_size,
+    lefts,
+    rights,
+)
 
     left_count = 0
     right_count = 0
@@ -53,7 +66,19 @@ function split_set_chunk!(left, right, 𝑖, bid, nblocks, X_bin, feat, cond_bin
     return nothing
 end
 
-function split_views_kernel!(out::Vector{S}, left::Vector{S}, right::Vector{S}, bid, offset, chunk_size, lefts, rights, sum_lefts, cumsum_lefts, cumsum_rights) where {S}
+function split_views_kernel!(
+    out::Vector{S},
+    left::Vector{S},
+    right::Vector{S},
+    bid,
+    offset,
+    chunk_size,
+    lefts,
+    rights,
+    sum_lefts,
+    cumsum_lefts,
+    cumsum_rights,
+) where {S}
     iter = 1
     i_max = lefts[bid]
     bid == 1 ? cumsum_left = 0 : cumsum_left = cumsum_lefts[bid-1]
@@ -72,7 +97,16 @@ function split_views_kernel!(out::Vector{S}, left::Vector{S}, right::Vector{S},
     return nothing
 end
 
-function split_set_threads!(out, left, right, 𝑖, X_bin::Matrix{S}, feat, cond_bin, offset) where {S}
+function split_set_threads!(
+    out,
+    left,
+    right,
+    𝑖,
+    X_bin::Matrix{S},
+    feat,
+    cond_bin,
+    offset,
+) where {S}
 
     # iter = Iterators.partition(𝑖, chunk_size)
     nblocks = ceil(Int, min(length(𝑖) / 1024, Threads.nthreads()))
@@ -82,17 +116,45 @@ function split_set_threads!(out, left, right, 𝑖, X_bin::Matrix{S}, feat, cond
     rights = zeros(Int, nblocks)
 
     @threads for bid = 1:nblocks
-        split_set_chunk!(left, right, 𝑖, bid, nblocks, X_bin, feat, cond_bin, offset, chunk_size, lefts, rights)
+        split_set_chunk!(
+            left,
+            right,
+            𝑖,
+            bid,
+            nblocks,
+            X_bin,
+            feat,
+            cond_bin,
+            offset,
+            chunk_size,
+            lefts,
+            rights,
+        )
     end
 
     sum_lefts = sum(lefts)
     cumsum_lefts = cumsum(lefts)
     cumsum_rights = cumsum(rights)
     @threads for bid = 1:nblocks
-        split_views_kernel!(out, left, right, bid, offset, chunk_size, lefts, rights, sum_lefts, cumsum_lefts, cumsum_rights)
+        split_views_kernel!(
+            out,
+            left,
+            right,
+            bid,
+            offset,
+            chunk_size,
+            lefts,
+            rights,
+            sum_lefts,
+            cumsum_lefts,
+            cumsum_rights,
+        )
     end
 
-    return (view(out, offset+1:offset+sum_lefts), view(out, offset+sum_lefts+1:offset+length(𝑖)))
+    return (
+        view(out, offset+1:offset+sum_lefts),
+        view(out, offset+sum_lefts+1:offset+length(𝑖)),
+    )
 end
 
 
@@ -106,8 +168,9 @@ function update_hist!(
     δ𝑤::Matrix{T},
     X_bin::Matrix{UInt8},
     𝑖::AbstractVector{S},
-    𝑗::AbstractVector{S}, K) where {L<:GradientRegression,T,S}
-
+    𝑗::AbstractVector{S},
+    K,
+) where {L<:GradientRegression,T,S}
     @threads for j in 𝑗
         @inbounds @simd for i in 𝑖
             hid = 3 * X_bin[i, j] - 2
@@ -121,7 +184,7 @@ end
 
 """
     update_hist!
-        GaussianRegression
+        MLE2P
 """
 function update_hist!(
     ::Type{L},
@@ -129,8 +192,9 @@ function update_hist!(
     δ𝑤::Matrix{T},
     X_bin::Matrix{UInt8},
     𝑖::AbstractVector{S},
-    𝑗::AbstractVector{S}, K) where {L<:GaussianRegression,T,S}
-
+    𝑗::AbstractVector{S},
+    K,
+) where {L<:MLE2P,T,S}
     @threads for j in 𝑗
         @inbounds @simd for i in 𝑖
             hid = 5 * X_bin[i, j] - 4
@@ -154,12 +218,13 @@ function update_hist!(
     δ𝑤::Matrix{T},
     X_bin::Matrix{UInt8},
     𝑖::AbstractVector{S},
-    𝑗::AbstractVector{S}, K) where {L,T,S}
-
+    𝑗::AbstractVector{S},
+    K,
+) where {L,T,S}
     @threads for j in 𝑗
         @inbounds for i in 𝑖
             hid = (2 * K + 1) * (X_bin[i, j] - 1)
-            for k in 1:(2*K+1)
+            for k = 1:(2*K+1)
                 hist[j][hid+k] += δ𝑤[k, i]
             end
         end
@@ -180,24 +245,35 @@ Generic fallback
 function update_gains!(
     node::TrainNode,
     𝑗::Vector,
-    params::EvoTypes{L,T,S}, K, monotone_constraints) where {L,T,S}
+    params::EvoTypes{L,T,S},
+    K,
+    monotone_constraints,
+) where {L,T,S}
 
     KK = 2 * K + 1
     @inbounds @threads for j in 𝑗
 
-        @inbounds for k in 1:KK
+        @inbounds for k = 1:KK
             node.hL[j][k] = node.h[j][k]
             node.hR[j][k] = node.∑[k] - node.h[j][k]
         end
 
-        @inbounds for bin in 2:params.nbins
+        @inbounds for bin = 2:params.nbins
             @inbounds for k = 1:KK
                 binid = KK * (bin - 1)
                 node.hL[j][binid+k] = node.hL[j][binid-KK+k] + node.h[j][binid+k]
                 node.hR[j][binid+k] = node.hR[j][binid-KK+k] - node.h[j][binid+k]
             end
         end
-        hist_gains_cpu!(L, view(node.gains, :, j), node.hL[j], node.hR[j], params, K, monotone_constraints[j])
+        hist_gains_cpu!(
+            L,
+            view(node.gains, :, j),
+            node.hL[j],
+            node.hR[j],
+            params,
+            K,
+            monotone_constraints[j],
+        )
     end
     return nothing
 end
@@ -207,20 +283,33 @@ end
     hist_gains_cpu!
         GradientRegression
 """
-function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L<:GradientRegression,T}
-    @inbounds for bin in 1:params.nbins
+function hist_gains_cpu!(
+    ::Type{L},
+    gains::AbstractVector{T},
+    hL::Vector{T},
+    hR::Vector{T},
+    params,
+    K,
+    monotone_constraint,
+) where {L<:GradientRegression,T}
+    @inbounds for bin = 1:params.nbins
         i = 3 * bin - 2
         # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split)
         if bin == params.nbins
             gains[bin] = hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) / 2
         elseif hL[i+2] > params.min_weight && hR[i+2] > params.min_weight
-            predL = pred_scalar_cpu!(hL[i:i+2], params, K)
-            predR = pred_scalar_cpu!(hR[i:i+2], params, K)
+            if monotone_constraint != 0
+                predL = pred_scalar_cpu!(view(hL, i:i+2), params, K)
+                predR = pred_scalar_cpu!(view(hR, i:i+2), params, K)
+            end
             if (monotone_constraint == 0) ||
                (monotone_constraint == -1 && predL > predR) ||
                (monotone_constraint == 1 && predL < predR)
-                gains[bin] = (hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) +
-                              hR[i]^2 / (hR[i+1] + params.lambda * hR[i+2])) / 2
+                gains[bin] =
+                    (
+                        hL[i]^2 / (hL[i+1] + params.lambda * hL[i+2]) +
+                        hR[i]^2 / (hR[i+1] + params.lambda * hR[i+2])
+                    ) / 2
             end
         end
     end
@@ -231,8 +320,16 @@ end
     hist_gains_cpu!
         QuantileRegression/L1Regression
 """
-function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L<:Union{QuantileRegression,L1Regression},T}
-    @inbounds for bin in 1:params.nbins
+function hist_gains_cpu!(
+    ::Type{L},
+    gains::AbstractVector{T},
+    hL::Vector{T},
+    hR::Vector{T},
+    params,
+    K,
+    monotone_constraint,
+) where {L<:Union{QuantileRegression,L1Regression},T}
+    @inbounds for bin = 1:params.nbins
         i = 3 * bin - 2
         # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split)
         if bin == params.nbins
@@ -246,25 +343,43 @@ end
 
 """
     hist_gains_cpu!
-        GaussianRegression
+        MLE2P
 """
-function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L<:GaussianRegression,T}
-    @inbounds for bin in 1:params.nbins
+function hist_gains_cpu!(
+    ::Type{L},
+    gains::AbstractVector{T},
+    hL::Vector{T},
+    hR::Vector{T},
+    params,
+    K,
+    monotone_constraint,
+) where {L<:MLE2P,T}
+    @inbounds for bin = 1:params.nbins
         i = 5 * bin - 4
         # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split)
         if bin == params.nbins
-            gains[bin] = (hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) +
-                          hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4])) / 2
+            gains[bin] =
+                (
+                    hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) +
+                    hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4])
+                ) / 2
         elseif hL[i+4] > params.min_weight && hR[i+4] > params.min_weight
-            predL = pred_scalar_cpu!(hL[i:i+4], params, K)
-            predR = pred_scalar_cpu!(hR[i:i+4], params, K)
+            if monotone_constraint != 0
+                predL = pred_scalar_cpu!(view(hL, i:i+4), params, K)
+                predR = pred_scalar_cpu!(view(hR, i:i+4), params, K)
+            end
             if (monotone_constraint == 0) ||
                (monotone_constraint == -1 && predL > predR) ||
                (monotone_constraint == 1 && predL < predR)
-                gains[bin] = (hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) +
-                              hR[i]^2 / (hR[i+2] + params.lambda * hR[i+4])) / 2 +
-                             (hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4]) +
-                              hR[i+1]^2 / (hR[i+3] + params.lambda * hR[i+4])) / 2
+                gains[bin] =
+                    (
+                        hL[i]^2 / (hL[i+2] + params.lambda * hL[i+4]) +
+                        hR[i]^2 / (hR[i+2] + params.lambda * hR[i+4])
+                    ) / 2 +
+                    (
+                        hL[i+1]^2 / (hL[i+3] + params.lambda * hL[i+4]) +
+                        hR[i+1]^2 / (hR[i+3] + params.lambda * hR[i+4])
+                    ) / 2
             end
         end
     end
@@ -275,8 +390,16 @@ end
     hist_gains_cpu!
         Generic
 """
-function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR::Vector{T}, params, K, monotone_constraint) where {L,T}
-    @inbounds for bin in 1:params.nbins
+function hist_gains_cpu!(
+    ::Type{L},
+    gains::AbstractVector{T},
+    hL::Vector{T},
+    hR::Vector{T},
+    params,
+    K,
+    monotone_constraint,
+) where {L,T}
+    @inbounds for bin = 1:params.nbins
         i = (2 * K + 1) * (bin - 1)
         # update gain only if there's non null weight on each of left and right side - except for nbins level, which is used as benchmark for split criteria (gain if no split)
         if bin == params.nbins
@@ -290,11 +413,17 @@ function hist_gains_cpu!(::Type{L}, gains::AbstractVector{T}, hL::Vector{T}, hR:
         elseif hL[i+2*K+1] > params.min_weight && hR[i+2*K+1] > params.min_weight
             for k = 1:K
                 if k == 1
-                    gains[bin] = (hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) +
-                                  hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1])) / 2
+                    gains[bin] =
+                        (
+                            hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) +
+                            hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1])
+                        ) / 2
                 else
-                    gains[bin] += (hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) +
-                                   hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1])) / 2
+                    gains[bin] +=
+                        (
+                            hL[i+k]^2 / (hL[i+k+K] + params.lambda * hL[i+2*K+1]) +
+                            hR[i+k]^2 / (hR[i+k+K] + params.lambda * hR[i+2*K+1])
+                        ) / 2
                 end
             end
         end
diff --git a/src/fit.jl b/src/fit.jl
index 762ef05f..abbe9ee2 100644
--- a/src/fit.jl
+++ b/src/fit.jl
@@ -3,98 +3,119 @@
     
 Initialise EvoTree
 """
-function init_evotree(params::EvoTypes{L,T,S}, X::AbstractMatrix, Y::AbstractVector, W=nothing, offset=nothing; fnames=nothing) where {L,T,S}
+function init_evotree(
+    params::EvoTypes{L,T,S};
+    x_train::AbstractMatrix,
+    y_train::AbstractVector,
+    w_train = nothing,
+    offset_train = nothing,
+    fnames = nothing,
+) where {L,T,S}
 
     K = 1
     levels = nothing
-    X = convert(Matrix{T}, X)
-
+    x = convert(Matrix{T}, x_train)
+    offset = !isnothing(offset_train) ? T.(offset_train) : nothing
     if L == Logistic
-        Y = T.(Y)
-        μ = [logit(mean(Y))]
+        y = T.(y_train)
+        μ = [logit(mean(y))]
         !isnothing(offset) && (offset .= logit.(offset))
-    elseif L ∈ [Poisson, Gamma, Tweedie]
-        Y = T.(Y)
-        μ = fill(log(mean(Y)), 1)
+    elseif L in [Poisson, Gamma, Tweedie]
+        y = T.(y_train)
+        μ = fill(log(mean(y)), 1)
         !isnothing(offset) && (offset .= log.(offset))
     elseif L == Softmax
-        if eltype(Y) <: CategoricalValue
-            levels = CategoricalArrays.levels(Y)
+        if eltype(y_train) <: CategoricalValue
+            levels = CategoricalArrays.levels(y_train)
             K = length(levels)
             μ = zeros(T, K)
-            Y = UInt32.(CategoricalArrays.levelcode.(Y))
+            y = UInt32.(CategoricalArrays.levelcode.(y_train))
         else
-            levels = sort(unique(Y))
-            yc = CategoricalVector(Y, levels=levels)
+            levels = sort(unique(y_train))
+            yc = CategoricalVector(y_train, levels = levels)
             K = length(levels)
             μ = zeros(T, K)
-            Y = UInt32.(CategoricalArrays.levelcode.(yc))
+            y = UInt32.(CategoricalArrays.levelcode.(yc))
         end
         !isnothing(offset) && (offset .= log.(offset))
-    elseif L == Gaussian
+    elseif L == GaussianDist
+        K = 2
+        y = T.(y_train)
+        μ = [mean(y), log(std(y))]
+        !isnothing(offset) && (offset[:, 2] .= log.(offset[:, 2]))
+    elseif L == LogisticDist
         K = 2
-        Y = T.(Y)
-        μ = [mean(Y), log(std(Y))]
+        y = T.(y_train)
+        μ = [mean(y), log(std(y) * sqrt(3) / π)]
         !isnothing(offset) && (offset[:, 2] .= log.(offset[:, 2]))
     else
-        Y = T.(Y)
-        μ = [mean(Y)]
+        y = T.(y_train)
+        μ = [mean(y)]
     end
+    μ = T.(μ)
 
     # force a neutral bias/initial tree when offset is specified
     !isnothing(offset) && (μ .= 0)
     # initialize preds
-    X_size = size(X)
-    pred = zeros(T, K, X_size[1])
-    @inbounds for i = 1:X_size[1]
+    x_size = size(x)
+    pred = zeros(T, K, x_size[1])
+    @inbounds for i = 1:x_size[1]
         pred[:, i] .= μ
     end
     !isnothing(offset) && (pred .+= offset')
 
     # init GBTree
     bias = [Tree{L,T}(μ)]
-    fnames = isnothing(fnames) ? ["feat_$i" for i in axes(X, 2)] : string.(fnames)
-    @assert length(fnames) == size(X, 2)
+    fnames = isnothing(fnames) ? ["feat_$i" for i in axes(x, 2)] : string.(fnames)
+    @assert length(fnames) == size(x, 2)
     info = Dict(:fnames => fnames, :levels => levels)
     evotree = GBTree{L,T,S}(bias, params, Metric(), K, info)
 
     # initialize gradients and weights
-    δ𝑤 = zeros(T, 2 * K + 1, X_size[1])
-    W = isnothing(W) ? ones(T, size(Y)) : Vector{T}(W)
-    @assert (length(Y) == length(W) && minimum(W) > 0)
-    δ𝑤[end, :] .= W
+    δ𝑤 = zeros(T, 2 * K + 1, x_size[1])
+    w = isnothing(w_train) ? ones(T, size(y)) : Vector{T}(w_train)
+    @assert (length(y) == length(w) && minimum(w) > 0)
+    δ𝑤[end, :] .= w
 
     # binarize data into quantiles
-    edges = get_edges(X, params.nbins)
-    X_bin = binarize(X, edges)
+    edges = get_edges(x, params.nbins)
+    x_bin = binarize(x, edges)
 
-    𝑖_ = UInt32.(collect(1:X_size[1]))
-    𝑗_ = UInt32.(collect(1:X_size[2]))
-    𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * X_size[2]))
+    𝑖_ = UInt32.(collect(1:x_size[1]))
+    𝑗_ = UInt32.(collect(1:x_size[2]))
+    𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * x_size[2]))
 
     # initialize histograms
-    nodes = [TrainNode(X_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1]
-    nodes[1].𝑖 = zeros(eltype(𝑖_), ceil(Int, params.rowsample * X_size[1]))
+    nodes = [TrainNode(x_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1]
+    nodes[1].𝑖 = zeros(eltype(𝑖_), ceil(Int, params.rowsample * x_size[1]))
     out = zeros(UInt32, length(nodes[1].𝑖))
     left = zeros(UInt32, length(nodes[1].𝑖))
     right = zeros(UInt32, length(nodes[1].𝑖))
 
     # assign monotone contraints in constraints vector
-    monotone_constraints = zeros(Int32, X_size[2])
-    hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints
+    monotone_constraints = zeros(Int32, x_size[2])
+    hasproperty(params, :monotone_constraints) && for (k, v) in params.monotone_constraints
         monotone_constraints[k] = v
     end
 
-    cache = (params=deepcopy(params),
-        X=X, Y=Y, K=K,
-        nodes=nodes,
-        pred=pred,
-        𝑖_=𝑖_, 𝑗_=𝑗_, 𝑗=𝑗,
-        out=out, left=left, right=right,
-        δ𝑤=δ𝑤,
-        edges=edges,
-        X_bin=X_bin,
-        monotone_constraints=monotone_constraints)
+    cache = (
+        params = deepcopy(params),
+        x = x,
+        y = y,
+        K = K,
+        nodes = nodes,
+        pred = pred,
+        𝑖_ = 𝑖_,
+        𝑗_ = 𝑗_,
+        𝑗 = 𝑗,
+        out = out,
+        left = left,
+        right = right,
+        δ𝑤 = δ𝑤,
+        edges = edges,
+        x_bin = x_bin,
+        monotone_constraints = monotone_constraints,
+    )
 
     cache.params.nrounds = 0
 
@@ -111,16 +132,29 @@ function grow_evotree!(evotree::GBTree{L,T,S}, cache) where {L,T,S}
     # loop over nrounds
     for i = 1:δnrounds
         # select random rows and cols
-        sample!(params.rng, cache.𝑖_, cache.nodes[1].𝑖, replace=false, ordered=true)
-        sample!(params.rng, cache.𝑗_, cache.𝑗, replace=false, ordered=true)
+        sample!(params.rng, cache.𝑖_, cache.nodes[1].𝑖, replace = false, ordered = true)
+        sample!(params.rng, cache.𝑗_, cache.𝑗, replace = false, ordered = true)
 
         # build a new tree
-        update_grads!(L, cache.δ𝑤, cache.pred, cache.Y; alpha=params.alpha)
+        update_grads!(L, cache.δ𝑤, cache.pred, cache.y; alpha = params.alpha)
         # assign a root and grow tree
         tree = Tree{L,T}(params.max_depth, evotree.K, zero(T))
-        grow_tree!(tree, cache.nodes, params, cache.δ𝑤, cache.edges, cache.𝑗, cache.out, cache.left, cache.right, cache.X_bin, cache.K, cache.monotone_constraints)
+        grow_tree!(
+            tree,
+            cache.nodes,
+            params,
+            cache.δ𝑤,
+            cache.edges,
+            cache.𝑗,
+            cache.out,
+            cache.left,
+            cache.right,
+            cache.x_bin,
+            cache.K,
+            cache.monotone_constraints,
+        )
         push!(evotree.trees, tree)
-        predict!(cache.pred, tree, cache.X, cache.K)
+        predict!(cache.pred, tree, cache.x, cache.K)
 
     end # end of nrounds
     cache.params.nrounds = params.nrounds
@@ -134,8 +168,14 @@ function grow_tree!(
     params::EvoTypes{L,T,S},
     δ𝑤::Matrix{T},
     edges,
-    𝑗, out, left, right,
-    X_bin::AbstractMatrix, K, monotone_constraints) where {L,T,S}
+    𝑗,
+    out,
+    left,
+    right,
+    x_bin::AbstractMatrix,
+    K,
+    monotone_constraints,
+) where {L,T,S}
 
     # reset nodes
     @threads for n in eachindex(nodes)
@@ -151,7 +191,7 @@ function grow_tree!(
     depth = 1
 
     # initialize summary stats
-    nodes[1].∑ .= vec(sum(δ𝑤[:, nodes[1].𝑖], dims=2))
+    nodes[1].∑ .= vec(sum(δ𝑤[:, nodes[1].𝑖], dims = 2))
     nodes[1].gain = get_gain(L, nodes[1].∑, params.lambda, K)
     # grow while there are remaining active nodes
     while length(n_current) > 0 && depth <= params.max_depth
@@ -167,7 +207,7 @@ function grow_tree!(
                         nodes[n].h .= nodes[n>>1].h .- nodes[n-1].h
                     end
                 else
-                    update_hist!(L, nodes[n].h, δ𝑤, X_bin, nodes[n].𝑖, 𝑗, K)
+                    update_hist!(L, nodes[n].h, δ𝑤, x_bin, nodes[n].𝑖, 𝑗, K)
                 end
             end
         end
@@ -191,7 +231,16 @@ function grow_tree!(
                     popfirst!(n_next)
                 else
                     # println("typeof(nodes[n].𝑖): ", typeof(nodes[n].𝑖))
-                    _left, _right = split_set_threads!(out, left, right, nodes[n].𝑖, X_bin, tree.feat[n], tree.cond_bin[n], offset)
+                    _left, _right = split_set_threads!(
+                        out,
+                        left,
+                        right,
+                        nodes[n].𝑖,
+                        x_bin,
+                        tree.feat[n],
+                        tree.cond_bin[n],
+                        offset,
+                    )
                     nodes[n<<1].𝑖, nodes[n<<1+1].𝑖 = _left, _right
                     offset += length(nodes[n].𝑖)
                     update_childs_∑!(L, nodes, n, best[2][1], best[2][2], K)
@@ -229,7 +278,7 @@ Main training function. Performs model fitting given configuration `params`, `x_
 
 # Arguments
 
-- `params::EvoTypes`: configuration info providing hyper-paramters. `EvoTypes` comprises EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount or EvoTreeGaussian
+- `params::EvoTypes`: configuration info providing hyper-paramters. `EvoTypes` comprises EvoTreeRegressor, EvoTreeClassifier, EvoTreeCount and EvoTreeMLE.
 
 # Keyword arguments
 
@@ -241,44 +290,70 @@ Main training function. Performs model fitting given configuration `params`, `x_
 - `y_eval::Vector`: vector of evaluation targets of length `#observations`.
 - `w_eval::Vector`: vector of evaluation weights of length `#observations`. Defaults to `nothing` (assumes a vector of 1s).
 - `offset_eval::VecOrMat`: evaluation data offset. Should match the size of the predictions.
+- `metric`: The evaluation metric that wil be tracked on `x_eval`, `y_eval` and optionally `w_eval` / `offset_eval` data. 
+    Supported metrics are: 
+    
+        - `:mse`: mean-squared error. Adapted for general regression models.
+        - `:rmse`: root-mean-squared error (CPU only). Adapted for general regression models.
+        - `:mae`: mean absolute error. Adapted for general regression models.
+        - `:logloss`: Adapted for `:logistic` regression models.
+        - `:mlogloss`: Multi-class cross entropy. Adapted to `EvoTreeClassifier` classification models. 
+        - `:poisson`: Poisson deviance. Adapted to `EvoTreeCount` count models.
+        - `:gamma`: Gamma deviance. Adapted to regression problem on Gamma like, positively distributed targets.
+        - `:tweedie`: Tweedie deviance. Adapted to regression problem on Tweedie like, positively distributed targets with probability mass at `y == 0`.
 - `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped. 
 - `print_every_n`: sets at which frequency logging info should be printed. 
 - `verbosity`: set to 1 to print logging info during training.
+- `fnames`: the names of the `x_train` features. If provided, should be a vector of string with `length(fnames) = size(x_train, 2)`.
 """
-function fit_evotree(params::EvoTypes{L,T,S};
-    x_train::AbstractMatrix, y_train::AbstractVector, w_train=nothing, offset_train=nothing,
-    x_eval=nothing, y_eval=nothing, w_eval=nothing, offset_eval=nothing,
-    metric=nothing, early_stopping_rounds=9999, print_every_n=9999, verbosity=1, fnames=nothing) where {L,T,S}
+function fit_evotree(
+    params::EvoTypes{L,T,S};
+    x_train::AbstractMatrix,
+    y_train::AbstractVector,
+    w_train = nothing,
+    offset_train = nothing,
+    x_eval = nothing,
+    y_eval = nothing,
+    w_eval = nothing,
+    offset_eval = nothing,
+    metric = nothing,
+    early_stopping_rounds = 9999,
+    print_every_n = 9999,
+    verbosity = 1,
+    fnames = nothing,
+) where {L,T,S}
 
     nrounds_max = params.nrounds
     params.nrounds = 0
     iter_since_best = 0
 
     if params.device == "gpu"
-        model, cache = init_evotree_gpu(params, x_train, y_train, w_train, offset_train; fnames)
+        model, cache =
+            init_evotree_gpu(params; x_train, y_train, w_train, offset_train, fnames)
     else
-        model, cache = init_evotree(params, x_train, y_train, w_train, offset_train; fnames)
+        model, cache = init_evotree(params; x_train, y_train, w_train, offset_train, fnames)
     end
 
     if !isnothing(offset_eval)
         L == Logistic && (offset_eval .= logit.(offset_eval))
         L in [Poisson, Gamma, Tweedie] && (offset_eval .= log.(offset_eval))
         L == Softmax && (offset_eval .= log.(offset_eval))
-        L == Gaussian && (offset_eval[:, 2] .= log.(offset_eval[:, 2]))
+        L in [GaussianDist, LogisticDist] && (offset_eval[:, 2] .= log.(offset_eval[:, 2]))
         offset_eval = T.(offset_eval)
     end
 
+    !isnothing(metric) ? metric = Symbol(metric) : nothing
     if !isnothing(metric) && !isnothing(x_eval) && !isnothing(y_eval)
         if params.device == "gpu"
             x_eval = CuArray(T.(x_eval))
-            y_eval = CuArray(eltype(cache.Y).(y_eval))
+            y_eval = CuArray(eltype(cache.y).(y_eval))
             w_eval = isnothing(w_eval) ? CUDA.ones(T, size(y_eval)) : CuArray(T.(w_eval))
             p_eval = predict(model.trees[1], x_eval, model.K)
             !isnothing(offset_eval) && (p_eval .+= CuArray(offset_eval'))
             eval_vec = CUDA.zeros(T, size(y_eval, 1))
         else # params.device == "cpu"
             x_eval = T.(x_eval)
-            y_eval = eltype(cache.Y).(y_eval)
+            y_eval = eltype(cache.y).(y_eval)
             w_eval = isnothing(w_eval) ? ones(T, size(y_eval)) : T.(w_eval)
             p_eval = predict(model.trees[1], x_eval, model.K)
             !isnothing(offset_eval) && (p_eval .+= offset_eval')
@@ -286,9 +361,11 @@ function fit_evotree(params::EvoTypes{L,T,S};
         # initialize metric
         metric_track = Metric()
         metric_best = Metric()
-        metric_track.metric = eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha)
-        tracker = (iter=[0], metric=[metric_track.metric])
-        @info "Initial tracking info" iter = model.params.nrounds metric = metric_track.metric
+        metric_track.metric =
+            eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha)
+        tracker = (iter = [0], metric = [metric_track.metric])
+        @info "Initial tracking info" iter = model.params.nrounds metric =
+            metric_track.metric
     end
 
     while model.params.nrounds < nrounds_max && iter_since_best < early_stopping_rounds
@@ -297,7 +374,8 @@ function fit_evotree(params::EvoTypes{L,T,S};
         # callback function
         if !isnothing(metric) && !isnothing(x_eval) && !isnothing(y_eval)
             predict!(p_eval, model.trees[model.params.nrounds+1], x_eval, model.K)
-            metric_track.metric = eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha)
+            metric_track.metric =
+                eval_metric(Val{metric}(), p_eval, y_eval, w_eval, params.alpha)
             if metric_track.metric < metric_best.metric
                 metric_best.metric = metric_track.metric
                 metric_best.iter = model.params.nrounds
@@ -306,7 +384,8 @@ function fit_evotree(params::EvoTypes{L,T,S};
                 iter_since_best += 1
             end
             if model.params.nrounds % print_every_n == 0 && verbosity > 0
-                @info "Tracking info" iter = model.params.nrounds metric = metric_track.metric
+                @info "Tracking info" iter = model.params.nrounds metric =
+                    metric_track.metric
             end
         end # end of callback
     end
diff --git a/src/gpu/find_split_gpu.jl b/src/gpu/find_split_gpu.jl
index 58278ffb..833d8749 100644
--- a/src/gpu/find_split_gpu.jl
+++ b/src/gpu/find_split_gpu.jl
@@ -111,7 +111,7 @@ function update_hist_gpu!(
     X_bin::CuMatrix{UInt8},
     𝑖::CuVector{S},
     𝑗::CuVector{S}, K;
-    MAX_THREADS=128) where {L<:GaussianRegression,T,S}
+    MAX_THREADS=128) where {L<:MLE2P,T,S}
 
     nbins = size(h, 2)
     thread_i = max(nbins, min(MAX_THREADS, length(𝑖)))
@@ -237,8 +237,10 @@ function hist_gains_gpu_kernel!(gains::CuDeviceMatrix{T}, hL::CuDeviceArray{T,3}
     if i == nbins
         gains[i, j] = hL[1, i, j]^2 / (hL[2, i, j] + lambda * hL[3, i, j]) / 2
     elseif hL[3, i, j] > min_weight && hR[3, i, j] > min_weight
-        predL = -hL[1, i, j] / (hL[2, i, j] + lambda * hL[3, i, j])
-        predR = -hR[1, i, j] / (hR[2, i, j] + lambda * hR[3, i, j])
+        if monotone_constraint != 0
+            predL = -hL[1, i, j] / (hL[2, i, j] + lambda * hL[3, i, j])
+            predR = -hR[1, i, j] / (hR[2, i, j] + lambda * hR[3, i, j])
+        end
         if (monotone_constraint == 0) ||
            (monotone_constraint == -1 && predL > predR) ||
            (monotone_constraint == 1 && predL < predR)
@@ -253,13 +255,13 @@ end
 
 """
     update_gains!
-        GaussianRegression
+        MLE2P
 """
 function update_gains_gpu!(
     node::TrainNodeGPU,
     𝑗::AbstractVector,
     params::EvoTypes{L,T,S}, K, monotone_constraints;
-    MAX_THREADS=512) where {L<:GaussianRegression,T,S}
+    MAX_THREADS=512) where {L<:MLE2P,T,S}
 
     cumsum!(node.hL, node.h, dims=2)
     node.hR .= view(node.hL, :, params.nbins:params.nbins, :) .- node.hL
@@ -281,8 +283,10 @@ function hist_gains_gpu_kernel_gauss!(gains::CuDeviceMatrix{T}, hL::CuDeviceArra
     if i == nbins
         gains[i, j] = (hL[1, i, j]^2 / (hL[3, i, j] + lambda * hL[5, i, j]) + hL[2, i, j]^2 / (hL[4, i, j] + lambda * hL[5, i, j])) / 2
     elseif hL[5, i, j] > min_weight && hR[5, i, j] > min_weight
-        predL = -hL[1, i, j] / (hL[3, i, j] + lambda * hL[5, i, j])
-        predR = -hR[1, i, j] / (hR[3, i, j] + lambda * hR[5, i, j])
+        if monotone_constraint != 0
+            predL = -hL[1, i, j] / (hL[3, i, j] + lambda * hL[5, i, j])
+            predR = -hR[1, i, j] / (hR[3, i, j] + lambda * hR[5, i, j])
+        end
         if (monotone_constraint == 0) ||
            (monotone_constraint == -1 && predL > predR) ||
            (monotone_constraint == 1 && predL < predR)
diff --git a/src/gpu/fit_gpu.jl b/src/gpu/fit_gpu.jl
index da6c33d4..d26cb3ed 100644
--- a/src/gpu/fit_gpu.jl
+++ b/src/gpu/fit_gpu.jl
@@ -1,80 +1,97 @@
-function init_evotree_gpu(params::EvoTypes{L,T,S},
-    X::AbstractMatrix, Y::AbstractVector, W=nothing, offset=nothing; fnames) where {L,T,S}
+function init_evotree_gpu(
+    params::EvoTypes{L,T,S};
+    x_train::AbstractMatrix,
+    y_train::AbstractVector,
+    w_train = nothing,
+    offset_train = nothing,
+    fnames = nothing,
+) where {L,T,S}
 
     K = 1
     levels = nothing
-    X = convert(Matrix{T}, X)
+    x = convert(Matrix{T}, x_train)
 
+    offset = !isnothing(offset_train) ? T.(offset_train) : nothing
     if L == Logistic
-        Y = CuArray(T.(Y))
-        μ = [logit(mean(Y))]
+        y = CuArray(T.(y_train))
+        μ = [logit(mean(y))]
         !isnothing(offset) && (offset .= logit.(offset))
     elseif L ∈ [Poisson, Gamma, Tweedie]
-        Y = CuArray(T.(Y))
-        μ = fill(log(mean(Y)), 1)
+        y = CuArray(T.(y_train))
+        μ = fill(log(mean(y)), 1)
         !isnothing(offset) && (offset .= log.(offset))
-    elseif L == Gaussian
+    elseif L == GaussianDist
         K = 2
-        Y = CuArray(T.(Y))
-        μ = [mean(Y), log(std(Y))]
+        y = CuArray(T.(y_train))
+        μ = [mean(y), log(std(y))]
         !isnothing(offset) && (offset[:, 2] .= log.(offset[:, 2]))
     else
-        Y = CuArray(T.(Y))
-        μ = [mean(Y)]
+        y = CuArray(T.(y_train))
+        μ = [mean(y)]
     end
 
     # force a neutral bias/initial tree when offset is specified
     !isnothing(offset) && (μ .= 0)
     # initialize preds
-    X_size = size(X)
-    pred = CUDA.zeros(T, K, X_size[1])
+    x_size = size(x)
+    pred = CUDA.zeros(T, K, x_size[1])
     pred .= CuArray(μ)
     !isnothing(offset) && (pred .+= CuArray(offset'))
 
     # init GBTree
     bias = [TreeGPU{L,T}(CuArray(μ))]
-    fnames = isnothing(fnames) ? ["feat_$i" for i in axes(X, 2)] : string.(fnames)
-    @assert length(fnames) == size(X, 2)
+    fnames = isnothing(fnames) ? ["feat_$i" for i in axes(x, 2)] : string.(fnames)
+    @assert length(fnames) == size(x, 2)
     info = Dict(:fnames => fnames, :levels => levels)
     evotree = GBTreeGPU{L,T,S}(bias, params, Metric(), K, info)
 
     # initialize gradients and weights
-    δ𝑤 = CUDA.zeros(T, 2 * K + 1, X_size[1])
-    W = isnothing(W) ? CUDA.ones(T, size(Y)) : CuVector{T}(W)
-    @assert (length(Y) == length(W) && minimum(W) > 0)
-    δ𝑤[end, :] .= W
+    δ𝑤 = CUDA.zeros(T, 2 * K + 1, x_size[1])
+    w = isnothing(w_train) ? CUDA.ones(T, size(y)) : CuVector{T}(w_train)
+    @assert (length(y) == length(w) && minimum(w) > 0)
+    δ𝑤[end, :] .= w
 
     # binarize data into quantiles
-    edges = get_edges(X, params.nbins)
-    X_bin = CuArray(binarize(X, edges))
+    edges = get_edges(x, params.nbins)
+    x_bin = CuArray(binarize(x, edges))
 
-    𝑖_ = UInt32.(collect(1:X_size[1]))
-    𝑗_ = UInt32.(collect(1:X_size[2]))
-    𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * X_size[2]))
+    𝑖_ = UInt32.(collect(1:x_size[1]))
+    𝑗_ = UInt32.(collect(1:x_size[2]))
+    𝑗 = zeros(eltype(𝑗_), ceil(Int, params.colsample * x_size[2]))
 
     # initializde histograms
-    nodes = [TrainNodeGPU(X_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1]
-    nodes[1].𝑖 = CUDA.zeros(eltype(𝑖_), ceil(Int, params.rowsample * X_size[1]))
+    nodes = [TrainNodeGPU(x_size[2], params.nbins, K, T) for n = 1:2^params.max_depth-1]
+    nodes[1].𝑖 = CUDA.zeros(eltype(𝑖_), ceil(Int, params.rowsample * x_size[1]))
     out = CUDA.zeros(UInt32, length(nodes[1].𝑖))
     left = CUDA.zeros(UInt32, length(nodes[1].𝑖))
     right = CUDA.zeros(UInt32, length(nodes[1].𝑖))
 
     # assign monotone contraints in constraints vector
-    monotone_constraints = zeros(Int32, X_size[2])
-    hasproperty(params, :monotone_constraint) && for (k, v) in params.monotone_constraints
+    monotone_constraints = zeros(Int32, x_size[2])
+    hasproperty(params, :monotone_constraints) && for (k, v) in params.monotone_constraints
         monotone_constraints[k] = v
     end
 
     # store cache
-    cache = (params=deepcopy(params),
-        X=CuArray(X), X_bin=X_bin, Y=Y, K=K,
-        nodes=nodes,
-        pred=pred,
-        𝑖_=𝑖_, 𝑗_=𝑗_, 𝑗=𝑗, 𝑖=Array(nodes[1].𝑖),
-        out=out, left=left, right=right,
-        δ𝑤=δ𝑤,
-        edges=edges,
-        monotone_constraints=CuArray(monotone_constraints))
+    cache = (
+        params = deepcopy(params),
+        x = CuArray(x),
+        x_bin = x_bin,
+        y = y,
+        K = K,
+        nodes = nodes,
+        pred = pred,
+        𝑖_ = 𝑖_,
+        𝑗_ = 𝑗_,
+        𝑗 = 𝑗,
+        𝑖 = Array(nodes[1].𝑖),
+        out = out,
+        left = left,
+        right = right,
+        δ𝑤 = δ𝑤,
+        edges = edges,
+        monotone_constraints = CuArray(monotone_constraints),
+    )
 
     cache.params.nrounds = 0
 
@@ -86,27 +103,39 @@ function grow_evotree!(evotree::GBTreeGPU{L,T,S}, cache) where {L,T,S}
 
     # initialize from cache
     params = evotree.params
-    X_size = size(cache.X_bin)
     δnrounds = params.nrounds - cache.params.nrounds
 
     # loop over nrounds
     for i = 1:δnrounds
         # select random rows and cols
-        sample!(params.rng, cache.𝑖_, cache.𝑖, replace=false, ordered=true)
-        sample!(params.rng, cache.𝑗_, cache.𝑗, replace=false, ordered=true)
+        sample!(params.rng, cache.𝑖_, cache.𝑖, replace = false, ordered = true)
+        sample!(params.rng, cache.𝑗_, cache.𝑗, replace = false, ordered = true)
         cache.nodes[1].𝑖 .= CuArray(cache.𝑖)
 
         # build a new tree
-        update_grads_gpu!(L, cache.δ𝑤, cache.pred, cache.Y)
+        update_grads_gpu!(L, cache.δ𝑤, cache.pred, cache.y)
         # # assign a root and grow tree
         tree = TreeGPU{L,T}(params.max_depth, evotree.K, zero(T))
-        grow_tree_gpu!(tree, cache.nodes, params, cache.δ𝑤, cache.edges, CuVector(cache.𝑗), cache.out, cache.left, cache.right, cache.X_bin, cache.K, cache.monotone_constraints)
+        grow_tree_gpu!(
+            tree,
+            cache.nodes,
+            params,
+            cache.δ𝑤,
+            cache.edges,
+            CuVector(cache.𝑗),
+            cache.out,
+            cache.left,
+            cache.right,
+            cache.x_bin,
+            cache.K,
+            cache.monotone_constraints,
+        )
         push!(evotree.trees, tree)
         # update predctions
-        predict!(cache.pred, tree, cache.X, cache.K)
+        predict!(cache.pred, tree, cache.x, cache.K)
     end # end of nrounds
     cache.params.nrounds = params.nrounds
-    # return model, cache
+    CUDA.reclaim()
     return evotree
 end
 
@@ -117,8 +146,14 @@ function grow_tree_gpu!(
     params::EvoTypes{L,T,S},
     δ𝑤::AbstractMatrix,
     edges,
-    𝑗, out, left, right,
-    X_bin::AbstractMatrix, K, monotone_constraints) where {L,T,S}
+    𝑗,
+    out,
+    left,
+    right,
+    x_bin::AbstractMatrix,
+    K,
+    monotone_constraints,
+) where {L,T,S}
 
     n_next = [1]
     n_current = copy(n_next)
@@ -133,7 +168,7 @@ function grow_tree_gpu!(
     end
 
     # initialize summary stats
-    nodes[1].∑ .= vec(sum(δ𝑤[:, nodes[1].𝑖], dims=2))
+    nodes[1].∑ .= vec(sum(δ𝑤[:, nodes[1].𝑖], dims = 2))
     nodes[1].gain = get_gain(L, Array(nodes[1].∑), params.lambda, K) # should use a GPU version?
 
     # grow while there are remaining active nodes - TO DO histogram substraction hits issue on GPU
@@ -151,14 +186,15 @@ function grow_tree_gpu!(
                         CUDA.synchronize()
                     end
                 else
-                    update_hist_gpu!(L, nodes[n].h, δ𝑤, X_bin, nodes[n].𝑖, 𝑗, K)
+                    update_hist_gpu!(L, nodes[n].h, δ𝑤, x_bin, nodes[n].𝑖, 𝑗, K)
                 end
             end
         end
 
         # grow while there are remaining active nodes
         for n ∈ sort(n_current)
-            if depth == params.max_depth || @allowscalar(nodes[n].∑[end] <= params.min_weight)
+            if depth == params.max_depth ||
+               @allowscalar(nodes[n].∑[end] <= params.min_weight)
                 pred_leaf_gpu!(tree.pred, n, Array(nodes[n].∑), params)
             else
                 update_gains_gpu!(nodes[n], 𝑗, params, K, monotone_constraints)
@@ -177,14 +213,24 @@ function grow_tree_gpu!(
                     pred_leaf_gpu!(tree.pred, n, Array(nodes[n].∑), params)
                     popfirst!(n_next)
                 else
-                    _left, _right = split_set_threads_gpu!(out, left, right, @allowscalar(nodes[n]).𝑖, X_bin, @allowscalar(tree.feat[n]), @allowscalar(tree.cond_bin[n]), offset)
+                    _left, _right = split_set_threads_gpu!(
+                        out,
+                        left,
+                        right,
+                        @allowscalar(nodes[n]).𝑖,
+                        x_bin,
+                        @allowscalar(tree.feat[n]),
+                        @allowscalar(tree.cond_bin[n]),
+                        offset,
+                    )
                     nodes[n<<1].𝑖, nodes[n<<1+1].𝑖 = _left, _right
                     offset += length(nodes[n].𝑖)
                     # println("length(_left): ", length(_left), " | length(_right): ", length(_right))
                     # println("best: ", best)
                     update_childs_∑_gpu!(L, nodes, n, best[2][1], best[2][2])
                     nodes[n<<1].gain = get_gain(L, Array(nodes[n<<1].∑), params.lambda, K)
-                    nodes[n<<1+1].gain = get_gain(L, Array(nodes[n<<1+1].∑), params.lambda, K)
+                    nodes[n<<1+1].gain =
+                        get_gain(L, Array(nodes[n<<1+1].∑), params.lambda, K)
 
                     if length(_right) >= length(_left)
                         push!(n_next, n << 1)
diff --git a/src/gpu/loss_gpu.jl b/src/gpu/loss_gpu.jl
index 6712bdfd..e6de3f64 100644
--- a/src/gpu/loss_gpu.jl
+++ b/src/gpu/loss_gpu.jl
@@ -4,8 +4,8 @@ function get_gain_gpu(::Type{L}, ∑::AbstractVector{T}, lambda::T) where {L<:Gr
     return gain
 end
 
-# Gaussian regression
-function get_gain_gpu(::Type{L}, ∑::AbstractVector{T}, lambda::T) where {L<:GaussianRegression,T<:AbstractFloat}
+# MLE regression
+function get_gain_gpu(::Type{L}, ∑::AbstractVector{T}, lambda::T) where {L<:MLE2P,T<:AbstractFloat}
     gain = ∑[1]^2 / (∑[3] + lambda * ∑[5]) / 2 + ∑[2]^2 / (∑[4] + lambda * ∑[5]) / 2
     return gain
 end
@@ -129,7 +129,7 @@ function kernel_gauss_δ𝑤!(δ𝑤::CuDeviceMatrix, p::CuDeviceMatrix, y::CuDe
     return
 end
 
-function update_grads_gpu!(::Type{Gaussian}, δ𝑤::CuMatrix, p::CuMatrix, y::CuVector; MAX_THREADS=1024)
+function update_grads_gpu!(::Type{GaussianDist}, δ𝑤::CuMatrix, p::CuMatrix, y::CuVector; MAX_THREADS=1024)
     threads = min(MAX_THREADS, length(y))
     blocks = ceil(Int, (length(y)) / threads)
     @cuda blocks = blocks threads = threads kernel_gauss_δ𝑤!(δ𝑤, p, y)
diff --git a/src/gpu/predict_gpu.jl b/src/gpu/predict_gpu.jl
index e7557661..ec0672d1 100644
--- a/src/gpu/predict_gpu.jl
+++ b/src/gpu/predict_gpu.jl
@@ -52,9 +52,9 @@ end
 
 """
     predict_kernel!
-        GaussianRegression
+        MLE2P
 """
-function predict_kernel!(::Type{L}, pred::AbstractMatrix{T}, split, feat, cond_float, leaf_pred::AbstractMatrix{T}, X::CuDeviceMatrix, K) where {L<:GaussianRegression,T}
+function predict_kernel!(::Type{L}, pred::AbstractMatrix{T}, split, feat, cond_float, leaf_pred::AbstractMatrix{T}, X::CuDeviceMatrix, K) where {L<:MLE2P,T}
     idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x
     nid = 1
     @inbounds if idx <= size(pred, 2)
@@ -97,7 +97,7 @@ function predict(model::GBTreeGPU{L,T,S}, X::AbstractMatrix) where {L,T,S}
         pred .= sigmoid.(pred)
     elseif L ∈ [Poisson, Gamma, Tweedie]
         pred .= exp.(pred)
-    elseif L == Gaussian
+    elseif L == GaussianDist
         pred[2, :] .= exp.(pred[2, :])
     elseif L == Softmax
         pred = transpose(reshape(pred, model.K, :))
@@ -119,12 +119,12 @@ function pred_scalar_gpu!(∑::AbstractVector{T}, lambda) where {L<:GradientRegr
     @allowscalar(-∑[1] / (∑[2] + lambda * ∑[3]))
 end
 
-# prediction in Leaf - Gaussian
-function pred_leaf_gpu!(p::AbstractMatrix{T}, n, ∑::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:GaussianRegression,T,S}
+# prediction in Leaf - MLE2P
+function pred_leaf_gpu!(p::AbstractMatrix{T}, n, ∑::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:MLE2P,T,S}
     @allowscalar(p[1, n] = -params.eta * ∑[1] / (∑[3] + params.lambda * ∑[5]))
     @allowscalar(p[2, n] = -params.eta * ∑[2] / (∑[4] + params.lambda * ∑[5]))
     return nothing
 end
-function pred_scalar_gpu!(∑::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:GradientRegression,T,S}
+function pred_scalar_gpu!(∑::AbstractVector{T}, params::EvoTypes{L,T,S}) where {L<:MLE2P,T,S}
     @allowscalar(-params.eta * ∑[1] / (∑[3] + params.lambda * ∑[5]))
 end
\ No newline at end of file
diff --git a/src/importance.jl b/src/importance.jl
index 4d47152e..0e2ee95a 100644
--- a/src/importance.jl
+++ b/src/importance.jl
@@ -10,6 +10,7 @@ end
     importance(model::GBTree)
 
 Sorted normalized feature importance based on loss function gain.
+Feature names associated to the model are stored in `model.info[:fnames]` as a string `Vector` and can be updated at any time. Eg: `model.info[:fnames] = new_fnames_vec`.
 """
 function importance(model::Union{GBTree,GBTreeGPU})
     fnames = model.info[:fnames]
@@ -22,7 +23,7 @@ function importance(model::Union{GBTree,GBTreeGPU})
 
     gain .= gain ./ sum(gain)
     pairs = collect(Dict(zip(string.(fnames), gain)))
-    sort!(pairs, by=x -> -x[2])
+    sort!(pairs, by = x -> -x[2])
 
     return pairs
 end
diff --git a/src/loss.jl b/src/loss.jl
index f195aba4..a211262c 100644
--- a/src/loss.jl
+++ b/src/loss.jl
@@ -39,21 +39,24 @@ function update_grads!(::Type{Tweedie}, δ𝑤::Matrix, p::Matrix, y::Vector; kw
     @inbounds for i in eachindex(y)
         pred = exp(p[1, i])
         δ𝑤[1, i] = 2 * (pred^(2 - rho) - y[i] * pred^(1 - rho)) * δ𝑤[3, i]
-        δ𝑤[2, i] = 2 * ((2 - rho) * pred^(2 - rho) - (1 - rho) * y[i] * pred^(1 - rho)) * δ𝑤[3, i]
+        δ𝑤[2, i] =
+            2 * ((2 - rho) * pred^(2 - rho) - (1 - rho) * y[i] * pred^(1 - rho)) * δ𝑤[3, i]
     end
 end
 
 # L1
 function update_grads!(::Type{L1}, δ𝑤::Matrix, p::Matrix, y::Vector; alpha, kwargs...)
     @inbounds for i in eachindex(y)
-        δ𝑤[1, i] = (alpha * max(y[i] - p[1, i], 0) - (1 - alpha) * max(p[1, i] - y[i], 0)) * δ𝑤[3, i]
+        δ𝑤[1, i] =
+            (alpha * max(y[i] - p[1, i], 0) - (1 - alpha) * max(p[1, i] - y[i], 0)) *
+            δ𝑤[3, i]
     end
 end
 
 # Softmax
 function update_grads!(::Type{Softmax}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    p .= p .- maximum(p, dims=1)
-    sums = sum(exp.(p), dims=1)
+    p .= p .- maximum(p, dims = 1)
+    sums = sum(exp.(p), dims = 1)
     K = (size(δ𝑤, 1) - 1) ÷ 2
     for i in eachindex(y)
         for k = 1:K
@@ -79,14 +82,42 @@ end
 # Gaussian - http://jrmeyer.github.io/machinelearning/2017/08/18/mle.html
 # pred[i][1] = μ
 # pred[i][2] = log(σ)
-function update_grads!(::Type{Gaussian}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
+function update_grads!(::Type{GaussianDist}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
     @inbounds @simd for i in eachindex(y)
         # first order
         δ𝑤[1, i] = (p[1, i] - y[i]) / exp(2 * p[2, i]) * δ𝑤[5, i]
         δ𝑤[2, i] = (1 - (p[1, i] - y[i])^2 / exp(2 * p[2, i])) * δ𝑤[5, i]
         # second order
         δ𝑤[3, i] = δ𝑤[5, i] / exp(2 * p[2, i])
-        δ𝑤[4, i] = 2 * δ𝑤[5, i] / exp(2 * p[2, i]) * (p[1, i] - y[i])^2
+        δ𝑤[4, i] = δ𝑤[5, i] * 2 / exp(2 * p[2, i]) * (p[1, i] - y[i])^2
+    end
+end
+
+# LogisticProb - https://en.wikipedia.org/wiki/Logistic_distribution
+# pdf = 
+# pred[i][1] = μ
+# pred[i][2] = log(s)
+function update_grads!(::Type{LogisticDist}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
+    ϵ = eltype(p)(2e-7)
+    @inbounds @simd for i in eachindex(y)
+        # first order
+        δ𝑤[1, i] = -tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) * exp(-p[2, i]) * δ𝑤[5, i]
+        δ𝑤[2, i] =
+            -(
+                exp(-p[2, i]) *
+                (y[i] - p[1, i]) *
+                tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) - 1
+            ) * δ𝑤[5, i]
+        # second order
+        δ𝑤[3, i] =
+            sech((y[i] - p[1, i]) / (2 * exp(p[2, i])))^2 / (2 * exp(2 * p[2, i])) *
+            δ𝑤[5, i]
+        δ𝑤[4, i] =
+            (
+                exp(-2 * p[2, i]) *
+                (p[1, i] - y[i]) *
+                (p[1, i] - y[i] + exp(p[2, i]) * sinh(exp(-p[2, i]) * (p[1, i] - y[i])))
+            ) / (1 + cosh(exp(-p[2, i]) * (p[1, i] - y[i]))) * δ𝑤[5, i]
     end
 end
 
@@ -116,17 +147,27 @@ end
 # get the gain metric
 ##############################
 # GradientRegression
-function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:GradientRegression,T<:AbstractFloat}
+function get_gain(
+    ::Type{L},
+    ∑::Vector{T},
+    λ::T,
+    K,
+) where {L<:GradientRegression,T<:AbstractFloat}
     ∑[1]^2 / (∑[2] + λ * ∑[3]) / 2
 end
 
 # GaussianRegression
-function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:GaussianRegression,T<:AbstractFloat}
+function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:MLE2P,T<:AbstractFloat}
     (∑[1]^2 / (∑[3] + λ * ∑[5]) + ∑[2]^2 / (∑[4] + λ * ∑[5])) / 2
 end
 
 # MultiClassRegression
-function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:MultiClassRegression,T<:AbstractFloat}
+function get_gain(
+    ::Type{L},
+    ∑::Vector{T},
+    λ::T,
+    K,
+) where {L<:MultiClassRegression,T<:AbstractFloat}
     gain = zero(T)
     @inbounds for k = 1:K
         gain += ∑[k]^2 / (∑[k+K] + λ * ∑[2*K+1]) / 2
@@ -135,7 +176,12 @@ function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:MultiClassRegre
 end
 
 # QuantileRegression
-function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:QuantileRegression,T<:AbstractFloat}
+function get_gain(
+    ::Type{L},
+    ∑::Vector{T},
+    λ::T,
+    K,
+) where {L<:QuantileRegression,T<:AbstractFloat}
     abs(∑[1])
 end
 
@@ -145,13 +191,20 @@ function get_gain(::Type{L}, ∑::Vector{T}, λ::T, K) where {L<:L1Regression,T<
 end
 
 
-function update_childs_∑!(::Type{L}, nodes, n, bin, feat, K) where {L<:Union{GradientRegression,QuantileRegression,L1Regression}}
+function update_childs_∑!(
+    ::Type{L},
+    nodes,
+    n,
+    bin,
+    feat,
+    K,
+) where {L<:Union{GradientRegression,QuantileRegression,L1Regression}}
     nodes[n<<1].∑ .= nodes[n].hL[feat][(3*bin-2):(3*bin)]
     nodes[n<<1+1].∑ .= nodes[n].hR[feat][(3*bin-2):(3*bin)]
     return nothing
 end
 
-function update_childs_∑!(::Type{L}, nodes, n, bin, feat, K) where {L<:GaussianRegression}
+function update_childs_∑!(::Type{L}, nodes, n, bin, feat, K) where {L<:MLE2P}
     nodes[n<<1].∑ .= nodes[n].hL[feat][(5*bin-4):(5*bin)]
     nodes[n<<1+1].∑ .= nodes[n].hR[feat][(5*bin-4):(5*bin)]
     return nothing
diff --git a/src/models.jl b/src/models.jl
index acb244cc..90959401 100644
--- a/src/models.jl
+++ b/src/models.jl
@@ -3,7 +3,7 @@ abstract type GradientRegression <: ModelType end
 abstract type L1Regression <: ModelType end
 abstract type QuantileRegression <: ModelType end
 abstract type MultiClassRegression <: ModelType end
-abstract type GaussianRegression <: ModelType end
+abstract type MLE2P <: ModelType end # 2-parameters max-likelihood
 struct Linear <: GradientRegression end
 struct Logistic <: GradientRegression end
 struct Poisson <: GradientRegression end
@@ -12,7 +12,8 @@ struct Tweedie <: GradientRegression end
 struct L1 <: L1Regression end
 struct Quantile <: QuantileRegression end
 struct Softmax <: MultiClassRegression end
-struct Gaussian <: GaussianRegression end
+struct GaussianDist <: MLE2P end
+struct LogisticDist <: MLE2P end
 
 # make a Random Number Generator object
 mk_rng(rng::Random.AbstractRNG) = rng
@@ -29,9 +30,9 @@ mutable struct EvoTreeRegressor{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Det
     colsample::T
     nbins::S
     alpha::T
-    monotone_constraints
-    rng
-    device
+    monotone_constraints::Any
+    rng::Any
+    device::Any
 end
 
 function EvoTreeRegressor(; kwargs...)
@@ -52,16 +53,18 @@ function EvoTreeRegressor(; kwargs...)
         :alpha => 0.5,
         :monotone_constraints => Dict{Int,Int}(),
         :rng => 123,
-        :device => "cpu"
+        :device => "cpu",
     )
 
     args_ignored = setdiff(keys(kwargs), keys(args))
     args_ignored_str = join(args_ignored, ", ")
-    length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
+    length(args_ignored) > 0 &&
+        @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
 
     args_default = setdiff(keys(args), keys(kwargs))
     args_default_str = join(args_default, ", ")
-    length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
+    length(args_default) > 0 &&
+        @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
 
     args_override = intersect(keys(args), keys(kwargs))
     for arg in args_override
@@ -85,7 +88,9 @@ function EvoTreeRegressor(; kwargs...)
     elseif args[:loss] == :quantile
         L = Quantile
     else
-        error("Invalid loss: $(args[:loss]). Only [`:linear`, `:logistic`, `:L1`, `:quantile`] are supported at the moment by EvoTreeRegressor.")
+        error(
+            "Invalid loss: $(args[:loss]). Only [`:linear`, `:logistic`, `:L1`, `:quantile`] are supported at the moment by EvoTreeRegressor.",
+        )
     end
 
     model = EvoTreeRegressor{L,T,Int}(
@@ -101,7 +106,8 @@ function EvoTreeRegressor(; kwargs...)
         T(args[:alpha]),
         args[:monotone_constraints],
         args[:rng],
-        args[:device])
+        args[:device],
+    )
 
     return model
 end
@@ -118,9 +124,9 @@ mutable struct EvoTreeCount{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabi
     colsample::T
     nbins::S
     alpha::T
-    monotone_constraints
-    rng
-    device
+    monotone_constraints::Any
+    rng::Any
+    device::Any
 end
 
 function EvoTreeCount(; kwargs...)
@@ -140,16 +146,18 @@ function EvoTreeCount(; kwargs...)
         :alpha => 0.5,
         :monotone_constraints => Dict{Int,Int}(),
         :rng => 123,
-        :device => "cpu"
+        :device => "cpu",
     )
 
     args_ignored = setdiff(keys(kwargs), keys(args))
     args_ignored_str = join(args_ignored, ", ")
-    length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
+    length(args_ignored) > 0 &&
+        @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
 
     args_default = setdiff(keys(args), keys(kwargs))
     args_default_str = join(args_default, ", ")
-    length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
+    length(args_default) > 0 &&
+        @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
 
     args_override = intersect(keys(args), keys(kwargs))
     for arg in args_override
@@ -173,7 +181,8 @@ function EvoTreeCount(; kwargs...)
         T(args[:alpha]),
         args[:monotone_constraints],
         args[:rng],
-        args[:device])
+        args[:device],
+    )
 
     return model
 end
@@ -189,8 +198,8 @@ mutable struct EvoTreeClassifier{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Pr
     colsample::T
     nbins::S
     alpha::T
-    rng
-    device
+    rng::Any
+    device::Any
 end
 
 function EvoTreeClassifier(; kwargs...)
@@ -209,16 +218,18 @@ function EvoTreeClassifier(; kwargs...)
         :nbins => 32,
         :alpha => 0.5,
         :rng => 123,
-        :device => "cpu"
+        :device => "cpu",
     )
 
     args_ignored = setdiff(keys(kwargs), keys(args))
     args_ignored_str = join(args_ignored, ", ")
-    length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
+    length(args_ignored) > 0 &&
+        @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
 
     args_default = setdiff(keys(args), keys(kwargs))
     args_default_str = join(args_default, ", ")
-    length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
+    length(args_default) > 0 &&
+        @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
 
     args_override = intersect(keys(args), keys(kwargs))
     for arg in args_override
@@ -241,12 +252,13 @@ function EvoTreeClassifier(; kwargs...)
         args[:nbins],
         T(args[:alpha]),
         args[:rng],
-        args[:device])
+        args[:device],
+    )
 
     return model
 end
 
-mutable struct EvoTreeGaussian{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabilistic
+mutable struct EvoTreeMLE{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabilistic
     nrounds::S
     lambda::T
     gamma::T
@@ -257,11 +269,96 @@ mutable struct EvoTreeGaussian{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Prob
     colsample::T
     nbins::S
     alpha::T
-    monotone_constraints
-    rng
-    device
+    monotone_constraints::Any
+    rng::Any
+    device::Any
+end
+
+function EvoTreeMLE(; kwargs...)
+
+    # defaults arguments
+    args = Dict{Symbol,Any}(
+        :T => Float64,
+        :loss => :gaussian,
+        :nrounds => 10,
+        :lambda => 0.0,
+        :gamma => 0.0, # min gain to split
+        :eta => 0.1, # learning rate
+        :max_depth => 5,
+        :min_weight => 1.0, # minimal weight, different from xgboost (but same for linear)
+        :rowsample => 1.0,
+        :colsample => 1.0,
+        :nbins => 32,
+        :alpha => 0.5,
+        :monotone_constraints => Dict{Int,Int}(),
+        :rng => 123,
+        :device => "cpu",
+    )
+
+    args_ignored = setdiff(keys(kwargs), keys(args))
+    args_ignored_str = join(args_ignored, ", ")
+    length(args_ignored) > 0 &&
+        @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
+
+    args_default = setdiff(keys(args), keys(kwargs))
+    args_default_str = join(args_default, ", ")
+    length(args_default) > 0 &&
+        @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
+
+    args_override = intersect(keys(args), keys(kwargs))
+    for arg in args_override
+        args[arg] = kwargs[arg]
+    end
+
+    args[:rng] = mk_rng(args[:rng])::Random.AbstractRNG
+    args[:loss] = Symbol(args[:loss])
+    T = args[:T]
+
+    if args[:loss] in [:gaussian, :normal]
+        L = GaussianDist
+    elseif args[:loss] == :logistic
+        L = LogisticDist
+    else
+        error(
+            "Invalid loss: $(args[:loss]). Only `:normal`, `:gaussian` and `:logistic` are supported at the moment by EvoTreeMLE.",
+        )
+    end
+
+    model = EvoTreeMLE{L,T,Int}(
+        args[:nrounds],
+        T(args[:lambda]),
+        T(args[:gamma]),
+        T(args[:eta]),
+        args[:max_depth],
+        T(args[:min_weight]),
+        T(args[:rowsample]),
+        T(args[:colsample]),
+        args[:nbins],
+        T(args[:alpha]),
+        args[:monotone_constraints],
+        args[:rng],
+        args[:device],
+    )
+
+    return model
 end
 
+
+mutable struct EvoTreeGaussian{L<:ModelType,T<:AbstractFloat,S<:Int} <: MMI.Probabilistic
+    nrounds::S
+    lambda::T
+    gamma::T
+    eta::T
+    max_depth::S
+    min_weight::T # real minimum number of observations, different from xgboost (but same for linear)
+    rowsample::T # subsample
+    colsample::T
+    nbins::S
+    alpha::T
+    monotone_constraints::Any
+    rng::Any
+    device::Any
+end
 function EvoTreeGaussian(; kwargs...)
 
     # defaults arguments
@@ -279,16 +376,18 @@ function EvoTreeGaussian(; kwargs...)
         :alpha => 0.5,
         :monotone_constraints => Dict{Int,Int}(),
         :rng => 123,
-        :device => "cpu"
+        :device => "cpu",
     )
 
     args_ignored = setdiff(keys(kwargs), keys(args))
     args_ignored_str = join(args_ignored, ", ")
-    length(args_ignored) > 0 && @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
+    length(args_ignored) > 0 &&
+        @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)."
 
     args_default = setdiff(keys(args), keys(kwargs))
     args_default_str = join(args_default, ", ")
-    length(args_default) > 0 && @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
+    length(args_default) > 0 &&
+        @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)."
 
     args_override = intersect(keys(args), keys(kwargs))
     for arg in args_override
@@ -296,7 +395,7 @@ function EvoTreeGaussian(; kwargs...)
     end
 
     args[:rng] = mk_rng(args[:rng])::Random.AbstractRNG
-    L = Gaussian
+    L = GaussianDist
     T = args[:T]
 
     model = EvoTreeGaussian{L,T,Int}(
@@ -312,10 +411,17 @@ function EvoTreeGaussian(; kwargs...)
         T(args[:alpha]),
         args[:monotone_constraints],
         args[:rng],
-        args[:device])
+        args[:device],
+    )
 
     return model
 end
 
 # const EvoTypes = Union{EvoTreeRegressor,EvoTreeCount,EvoTreeClassifier,EvoTreeGaussian}
-const EvoTypes{L,T,S} = Union{EvoTreeRegressor{L,T,S},EvoTreeCount{L,T,S},EvoTreeClassifier{L,T,S},EvoTreeGaussian{L,T,S}}
+const EvoTypes{L,T,S} = Union{
+    EvoTreeRegressor{L,T,S},
+    EvoTreeCount{L,T,S},
+    EvoTreeClassifier{L,T,S},
+    EvoTreeGaussian{L,T,S},
+    EvoTreeMLE{L,T,S},
+}
diff --git a/src/plot.jl b/src/plot.jl
index e99cefd4..0f04dcaf 100644
--- a/src/plot.jl
+++ b/src/plot.jl
@@ -20,16 +20,21 @@ function get_adj_list(tree::EvoTrees.Tree)
             push!(adj, [])
         end
     end
-    return (map=map, adj=adj)
+    return (map = map, adj = adj)
 end
 
 function get_shapes(tree_layout)
     shapes = Vector(undef, length(tree_layout))
-    for i = 1:length(tree_layout)
+    for i = eachindex(tree_layout)
         x, y = tree_layout[i][1], tree_layout[i][2] # center point
         x_buff = 0.45
         y_buff = 0.45
-        shapes[i] = [(x - x_buff, y + y_buff), (x + x_buff, y + y_buff), (x + x_buff, y - y_buff), (x - x_buff, y - y_buff)]
+        shapes[i] = [
+            (x - x_buff, y + y_buff),
+            (x + x_buff, y + y_buff),
+            (x + x_buff, y - y_buff),
+            (x - x_buff, y - y_buff),
+        ]
     end
     return shapes
 end
@@ -37,13 +42,15 @@ end
 function get_annotations(tree_layout, map, tree, var_names)
     # annotations = Vector{Tuple{Float64, Float64, String, Tuple}}(undef, length(tree_layout))
     annotations = []
-    for i = 1:length(tree_layout)
+    for i = eachindex(tree_layout)
         x, y = tree_layout[i][1], tree_layout[i][2] # center point
         if tree.split[map[i]]
-            feat = isnothing(var_names) ? "feat: " * string(tree.feat[map[i]]) : var_names[tree.feat[map[i]]]
-            txt = "$feat\n" * string(round(tree.cond_float[map[i]], sigdigits=3))
+            feat =
+                isnothing(var_names) ? "feat: " * string(tree.feat[map[i]]) :
+                var_names[tree.feat[map[i]]]
+            txt = "$feat\n" * string(round(tree.cond_float[map[i]], sigdigits = 3))
         else
-            txt = "pred:\n" * string(round(tree.pred[1, map[i]], sigdigits=3))
+            txt = "pred:\n" * string(round(tree.pred[1, map[i]], sigdigits = 3))
         end
         # annotations[i] = (x, y, txt, (9, :white, "helvetica"))
         push!(annotations, (x, y, txt, 10))
@@ -54,16 +61,22 @@ end
 function get_curves(adj, tree_layout, shapes)
     curves = []
     num_curves = sum(length.(adj))
-    for i = 1:length(adj)
-        for j = 1:length(adj[i])
+    for i = eachindex(adj)
+        for j = eachindex(adj[i])
             # curves is a length 2 tuple: (vector Xs, vector Ys)
-            push!(curves, ([tree_layout[i][1], tree_layout[adj[i][j]][1]], [shapes[i][3][2], shapes[adj[i][j]][1][2]]))
+            push!(
+                curves,
+                (
+                    [tree_layout[i][1], tree_layout[adj[i][j]][1]],
+                    [shapes[i][3][2], shapes[adj[i][j]][1][2]],
+                ),
+            )
         end
     end
     return curves
 end
 
-@recipe function plot(tree::EvoTrees.Tree, var_names=nothing)
+@recipe function plot(tree::EvoTrees.Tree, var_names = nothing)
 
     map, adj = EvoTrees.get_adj_list(tree)
     tree_layout = length(adj) == 1 ? [[0.0, 0.0]] : NetworkLayout.buchheim(adj)
@@ -82,7 +95,7 @@ end
     size --> size
     annotations --> annotations
 
-    for i = 1:length(shapes)
+    for i = eachindex(shapes)
         @series begin
             fillcolor = length(adj[i]) == 0 ? "#84DCC6" : "#C8D3D5"
             fillcolor --> fillcolor
@@ -91,7 +104,7 @@ end
         end
     end
 
-    for i = 1:length(curves)
+    for i = eachindex(curves)
         @series begin
             seriestype --> :curves
             return curves[i]
@@ -99,7 +112,7 @@ end
     end
 end
 
-@recipe function plot(model::EvoTrees.GBTree, n=1, var_names=nothing)
+@recipe function plot(model::EvoTrees.GBTree, n = 1, var_names = nothing)
 
     isnothing(var_names)
 
@@ -121,7 +134,7 @@ end
     size --> size
     annotations --> annotations
 
-    for i = 1:length(shapes)
+    for i = eachindex(shapes)
         @series begin
             fillcolor = length(adj[i]) == 0 ? "#84DCC6" : "#C8D3D5"
             fillcolor --> fillcolor
@@ -130,7 +143,7 @@ end
         end
     end
 
-    for i = 1:length(curves)
+    for i = eachindex(curves)
         @series begin
             seriestype --> :curves
             return curves[i]
diff --git a/src/predict.jl b/src/predict.jl
index 8160a429..8f384799 100644
--- a/src/predict.jl
+++ b/src/predict.jl
@@ -2,7 +2,8 @@ function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:GradientRegress
     @inbounds @threads for i in axes(X, 1)
         nid = 1
         @inbounds while tree.split[nid]
-            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1
+            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 :
+            nid = nid << 1 + 1
         end
         @inbounds pred[1, i] += tree.pred[1, nid]
     end
@@ -13,18 +14,20 @@ function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:Logistic,T}
     @inbounds @threads for i in axes(X, 1)
         nid = 1
         @inbounds while tree.split[nid]
-            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1
+            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 :
+            nid = nid << 1 + 1
         end
         @inbounds pred[1, i] = clamp(pred[1, i] + tree.pred[1, nid], -15, 15)
     end
     return nothing
 end
 
-function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:GaussianRegression,T}
+function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L<:MLE2P,T}
     @inbounds @threads for i in axes(X, 1)
         nid = 1
         @inbounds while tree.split[nid]
-            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1
+            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 :
+            nid = nid << 1 + 1
         end
         @inbounds pred[1, i] += tree.pred[1, nid]
         @inbounds pred[2, i] = max(-15, pred[2, i] + tree.pred[2, nid])
@@ -41,7 +44,8 @@ function predict!(pred::Matrix, tree::Tree{L,T}, X, K) where {L,T}
     @inbounds @threads for i in axes(X, 1)
         nid = 1
         @inbounds while tree.split[nid]
-            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 : nid = nid << 1 + 1
+            X[i, tree.feat[nid]] < tree.cond_float[nid] ? nid = nid << 1 :
+            nid = nid << 1 + 1
         end
         @inbounds for k = 1:K
             pred[k, i] += tree.pred[k, nid]
@@ -75,7 +79,7 @@ function predict(model::GBTree{L,T,S}, X::AbstractMatrix) where {L,T,S}
         pred .= sigmoid.(pred)
     elseif L ∈ [Poisson, Gamma, Tweedie]
         pred .= exp.(pred)
-    elseif L == Gaussian
+    elseif L in [GaussianDist, LogisticDist]
         pred[2, :] .= exp.(pred[2, :])
     elseif L == Softmax
         @inbounds for i in axes(pred, 2)
@@ -87,31 +91,67 @@ function predict(model::GBTree{L,T,S}, X::AbstractMatrix) where {L,T,S}
 end
 
 
-function pred_leaf_cpu!(pred, n, ∑::Vector, params::EvoTypes{L,T,S}, K, δ𝑤, 𝑖) where {L<:GradientRegression,T,S}
+function pred_leaf_cpu!(
+    pred,
+    n,
+    ∑::Vector,
+    params::EvoTypes{L,T,S},
+    K,
+    δ𝑤,
+    𝑖,
+) where {L<:GradientRegression,T,S}
     pred[1, n] = -params.eta * ∑[1] / (∑[2] + params.lambda * ∑[3])
 end
-function pred_scalar_cpu!(∑::Vector{T}, params::EvoTypes, K) where {L<:GradientRegression,T,S}
+function pred_scalar_cpu!(
+    ∑::AbstractVector{T},
+    params::EvoTypes,
+    K,
+) where {L<:GradientRegression,T,S}
     -params.eta * ∑[1] / (∑[2] + params.lambda * ∑[3])
 end
 
-# prediction in Leaf - GaussianRegression
-function pred_leaf_cpu!(pred, n, ∑::Vector, params::EvoTypes{L,T,S}, K, δ𝑤, 𝑖) where {L<:GaussianRegression,T,S}
+# prediction in Leaf - MLE2P
+function pred_leaf_cpu!(
+    pred,
+    n,
+    ∑::Vector,
+    params::EvoTypes{L,T,S},
+    K,
+    δ𝑤,
+    𝑖,
+) where {L<:MLE2P,T,S}
     pred[1, n] = -params.eta * ∑[1] / (∑[3] + params.lambda * ∑[5])
     pred[2, n] = -params.eta * ∑[2] / (∑[4] + params.lambda * ∑[5])
 end
-function pred_scalar_cpu!(∑::Vector{T}, params::EvoTypes{L,T,S}, K) where {L<:GaussianRegression,T,S}
+function pred_scalar_cpu!(∑::AbstractVector{T}, params::EvoTypes{L,T,S}, K) where {L<:MLE2P,T,S}
     -params.eta * ∑[1] / (∑[3] + params.lambda * ∑[5])
 end
 
 # prediction in Leaf - MultiClassRegression
-function pred_leaf_cpu!(pred, n, ∑::Vector, params::EvoTypes{L,T,S}, K, δ𝑤, 𝑖) where {L<:MultiClassRegression,T,S}
+function pred_leaf_cpu!(
+    pred,
+    n,
+    ∑::Vector,
+    params::EvoTypes{L,T,S},
+    K,
+    δ𝑤,
+    𝑖,
+) where {L<:MultiClassRegression,T,S}
     @inbounds for k = 1:K
         pred[k, n] = -params.eta * ∑[k] / (∑[k+K] + params.lambda * ∑[2*K+1])
     end
 end
 
 # prediction in Leaf - QuantileRegression
-function pred_leaf_cpu!(pred, n, ∑::Vector, params::EvoTypes{L,T,S}, K, δ𝑤, 𝑖) where {L<:QuantileRegression,T,S}
+function pred_leaf_cpu!(
+    pred,
+    n,
+    ∑::Vector,
+    params::EvoTypes{L,T,S},
+    K,
+    δ𝑤,
+    𝑖,
+) where {L<:QuantileRegression,T,S}
     pred[1, n] = params.eta * quantile(δ𝑤[2, 𝑖], params.alpha) / (1 + params.lambda)
     # pred[1,n] = params.eta * quantile(view(δ𝑤, 2, 𝑖), params.alpha) / (1 + params.lambda)
 end
@@ -120,9 +160,17 @@ end
 # end
 
 # prediction in Leaf - L1Regression
-function pred_leaf_cpu!(pred, n, ∑::Vector, params::EvoTypes{L,T,S}, K, δ𝑤, 𝑖) where {L<:L1Regression,T,S}
+function pred_leaf_cpu!(
+    pred,
+    n,
+    ∑::Vector,
+    params::EvoTypes{L,T,S},
+    K,
+    δ𝑤,
+    𝑖,
+) where {L<:L1Regression,T,S}
     pred[1, n] = params.eta * ∑[1] / (∑[3] * (1 + params.lambda))
 end
-function pred_scalar_cpu!(∑::Vector, params::EvoTypes{L,T,S}, K) where {L<:L1Regression,T,S}
+function pred_scalar_cpu!(∑::AbstractVector{T}, params::EvoTypes{L,T,S}, K) where {L<:L1Regression,T,S}
     params.eta * ∑[1] / (∑[3] * (1 + params.lambda))
 end
\ No newline at end of file
diff --git a/src/structs.jl b/src/structs.jl
index 64a22055..17752dd2 100644
--- a/src/structs.jl
+++ b/src/structs.jl
@@ -19,7 +19,8 @@ function TrainNode(nvars, nbins, K, T)
         [zeros(T, (2 * K + 1) * nbins) for j = 1:nvars],
         [zeros(T, (2 * K + 1) * nbins) for j = 1:nvars],
         [zeros(T, (2 * K + 1) * nbins) for j = 1:nvars],
-        zeros(T, nbins, nvars))
+        zeros(T, nbins, nvars),
+    )
     return node
 end
 
@@ -40,7 +41,8 @@ function Tree{L,T}(x::Vector{T}) where {L,T}
         zeros(T, 1),
         zeros(T, 1),
         reshape(x, :, 1),
-        zeros(Bool, 1))
+        zeros(Bool, 1),
+    )
 end
 
 function Tree{L,T}(depth, K, ::T) where {L,T}
@@ -50,7 +52,7 @@ function Tree{L,T}(depth, K, ::T) where {L,T}
         zeros(T, 2^depth - 1),
         zeros(T, 2^depth - 1),
         zeros(T, K, 2^depth - 1),
-        zeros(Bool, 2^depth - 1)
+        zeros(Bool, 2^depth - 1),
     )
 end
 
@@ -67,6 +69,6 @@ struct GBTree{L,T,S}
     params::EvoTypes
     metric::Metric
     K::Int
-    info
+    info::Any
 end
-(m::GBTree)(x::AbstractMatrix) = predict(m, x)
\ No newline at end of file
+(m::GBTree)(x::AbstractMatrix) = predict(m, x)
diff --git a/test/MLJ.jl b/test/MLJ.jl
index a19dc22b..3d28a3a8 100644
--- a/test/MLJ.jl
+++ b/test/MLJ.jl
@@ -15,18 +15,18 @@ X = MLJBase.table(X)
 
 # @load EvoTreeRegressor
 # linear regression
-tree_model = EvoTreeRegressor(max_depth=5, eta=0.05, nrounds=10)
+tree_model = EvoTreeRegressor(max_depth = 5, eta = 0.05, nrounds = 10)
 # logistic regression
-tree_model = EvoTreeRegressor(loss=:logistic, max_depth=5, eta=0.05, nrounds=10)
+tree_model = EvoTreeRegressor(loss = :logistic, max_depth = 5, eta = 0.05, nrounds = 10)
 # quantile regression
 # tree_model = EvoTreeRegressor(loss=:quantile, alpha=0.75, max_depth=5, eta=0.05, nrounds=10)
 
 mach = machine(tree_model, X, y)
-train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1)
+train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1)
 
 mach.model.nrounds += 10
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
 # predict on train data
 pred_train = predict(mach, selectrows(X, train))
@@ -64,15 +64,16 @@ mean(abs.(pred_test - selectrows(Y, test)))
 ##################################################
 X, y = @load_crabs
 
-tree_model = EvoTreeClassifier(max_depth=4, eta=0.05, lambda=0.0, gamma=0.0, nrounds=10)
+tree_model =
+    EvoTreeClassifier(max_depth = 4, eta = 0.05, lambda = 0.0, gamma = 0.0, nrounds = 10)
 
 # @load EvoTreeRegressor
 mach = machine(tree_model, X, y)
-train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1)
+train, test = partition(eachindex(y), 0.7, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1)
 
 mach.model.nrounds += 50
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
 pred_train = predict(mach, selectrows(X, train))
 pred_train_mode = predict_mode(mach, selectrows(X, train))
@@ -95,7 +96,7 @@ Y = rand(UInt8, size(X, 1))
 𝑖 = collect(1:size(X, 1))
 
 # train-eval split
-𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
@@ -105,22 +106,29 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 
 # @load EvoTreeRegressor
 tree_model = EvoTreeCount(
-    loss=:poisson, metric=:poisson,
-    nrounds=10,
-    lambda=0.0, gamma=0.0, eta=0.1,
-    max_depth=6, min_weight=1.0,
-    rowsample=0.5, colsample=0.5, nbins=32)
+    loss = :poisson,
+    metric = :poisson,
+    nrounds = 10,
+    lambda = 0.0,
+    gamma = 0.0,
+    eta = 0.1,
+    max_depth = 6,
+    min_weight = 1.0,
+    rowsample = 0.5,
+    colsample = 0.5,
+    nbins = 32,
+)
 
 X = MLJBase.table(X)
 X = MLJBase.matrix(X)
 
 # typeof(X)
 mach = machine(tree_model, X, Y)
-train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1, force=true)
+train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1, force = true)
 
 mach.model.nrounds += 10
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
 
 pred = predict(mach, selectrows(X, train))
 pred_mean = predict_mean(mach, selectrows(X, train))
@@ -136,7 +144,7 @@ Y = rand(size(X, 1))
 𝑖 = collect(1:size(X, 1))
 
 # train-eval split
-𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
@@ -146,21 +154,78 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 
 # @load EvoTreeRegressor
 tree_model = EvoTreeGaussian(
-    loss=:gaussian, metric=:gaussian,
-    nrounds=10,
-    lambda=0.0, gamma=0.0, eta=0.1,
-    max_depth=6, min_weight=1.0,
-    rowsample=0.5, colsample=0.5, nbins=32)
+    nrounds = 10,
+    lambda = 0.0,
+    gamma = 0.0,
+    eta = 0.1,
+    max_depth = 6,
+    min_weight = 1.0,
+    rowsample = 0.5,
+    colsample = 0.5,
+    nbins = 32,
+)
 
 X = MLJBase.table(X)
 
 # typeof(X)
 mach = machine(tree_model, X, Y)
-train, test = partition(eachindex(Y), 0.8, shuffle=true); # 70:30 split
-fit!(mach, rows=train, verbosity=1, force=true)
+train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1, force = true)
 
 mach.model.nrounds += 10
-fit!(mach, rows=train, verbosity=1)
+fit!(mach, rows = train, verbosity = 1)
+
+pred = predict(mach, selectrows(X, train))
+pred_mean = predict_mean(mach, selectrows(X, train))
+pred_mode = predict_mode(mach, selectrows(X, train))
+# pred_mode = predict_median(mach, selectrows(X,train))
+mean(abs.(pred_mean - selectrows(Y, train)))
+
+q_20 = quantile.(pred, 0.20)
+q_20 = quantile.(pred, 0.80)
+
+report(mach)
+
+##################################################
+### Logistic - Larger data
+##################################################
+features = rand(10_000, 10)
+X = features
+Y = rand(size(X, 1))
+𝑖 = collect(1:size(X, 1))
+
+# train-eval split
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
+train_size = 0.8
+𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
+
+x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :]
+y_train, y_eval = Y[𝑖_train], Y[𝑖_eval]
+
+# @load EvoTreeRegressor
+tree_model = EvoTreeMLE(
+    loss = :logistic,
+    nrounds = 10,
+    lambda = 1.0,
+    gamma = 0.0,
+    eta = 0.1,
+    max_depth = 6,
+    min_weight = 32.0,
+    rowsample = 0.5,
+    colsample = 0.5,
+    nbins = 32,
+)
+
+X = MLJBase.table(X)
+
+# typeof(X)
+mach = machine(tree_model, X, Y)
+train, test = partition(eachindex(Y), 0.8, shuffle = true); # 70:30 split
+fit!(mach, rows = train, verbosity = 1, force = true)
+
+mach.model.nrounds += 10
+fit!(mach, rows = train, verbosity = 1)
 
 pred = predict(mach, selectrows(X, train))
 pred_mean = predict_mean(mach, selectrows(X, train))
@@ -198,6 +263,7 @@ for model ∈ [
     EvoTreeClassifier(),
     EvoTreeCount(),
     EvoTreeRegressor(),
+    EvoTreeMLE(),
     EvoTreeGaussian(),
 ]
 
diff --git a/test/core.jl b/test/core.jl
index 32e9c522..5bb2c65b 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -13,7 +13,7 @@ Y = sigmoid(Y)
 𝑖 = collect(1:size(X, 1))
 
 # train-eval split
-𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
@@ -24,16 +24,31 @@ y_train, y_eval = Y[𝑖_train], Y[𝑖_eval]
 @testset "EvoTreeRegressor - Linear" begin
     # linear
     params1 = EvoTreeRegressor(
-        loss=:linear, metric=:mse,
-        nrounds=100, nbins=100,
-        lambda=0.5, gamma=0.1, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :linear,
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.5,
+        gamma = 0.1,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :mse,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -43,16 +58,30 @@ end
 
 @testset "EvoTreeRegressor - Logistic" begin
     params1 = EvoTreeRegressor(
-        loss=:logistic, metric=:logloss,
-        nrounds=100,
-        lambda=0.5, gamma=0.1, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :logistic,
+        nrounds = 100,
+        lambda = 0.5,
+        gamma = 0.1,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :logloss,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -62,16 +91,30 @@ end
 
 @testset "EvoTreeRegressor - Gamma" begin
     params1 = EvoTreeRegressor(
-        loss=:gamma, metric=:gamma,
-        nrounds=100,
-        lambda=0.5, gamma=0.1, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :gamma,
+        nrounds = 100,
+        lambda = 0.5,
+        gamma = 0.1,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :gamma,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -81,16 +124,30 @@ end
 
 @testset "EvoTreeRegressor - Tweedie" begin
     params1 = EvoTreeRegressor(
-        loss=:tweedie, metric=:tweedie,
-        nrounds=100,
-        lambda=0.5, gamma=0.1, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :tweedie,
+        nrounds = 100,
+        lambda = 0.5,
+        gamma = 0.1,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :tweedie,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -100,16 +157,32 @@ end
 
 @testset "EvoTreeRegressor - L1" begin
     params1 = EvoTreeRegressor(
-        loss=:L1, alpha=0.5, metric=:mae,
-        nrounds=100, nbins=100,
-        lambda=0.5, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :L1,
+        alpha = 0.5,
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.5,
+        gamma = 0.0,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :mae,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -119,16 +192,32 @@ end
 
 @testset "EvoTreeRegressor - Quantile" begin
     params1 = EvoTreeRegressor(
-        loss=:quantile, alpha=0.5, metric=:quantile,
-        nrounds=100, nbins=100,
-        lambda=0.5, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :quantile,
+        alpha = 0.5,
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.5,
+        gamma = 0.0,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :quantile,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -138,16 +227,30 @@ end
 
 @testset "EvoTreeCount - Count" begin
     params1 = EvoTreeCount(
-        loss=:poisson, metric=:poisson,
-        nrounds=100,
-        lambda=0.5, gamma=0.1, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :poisson,
+        nrounds = 100,
+        lambda = 0.5,
+        gamma = 0.1,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :poisson,
+        print_every_n = 25,
+    )
 
     preds = EvoTrees.predict(model, x_eval)
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -155,18 +258,92 @@ end
     @test mse_gain_pct < -0.75
 end
 
+@testset "EvoTreeMLE - Gaussian" begin
+    params1 = EvoTreeMLE(
+        loss = :gaussian,
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.0,
+        gamma = 0.0,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 10.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
+
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
+    preds_ini = EvoTrees.predict(model, x_eval)[:, 1]
+    mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :gaussian,
+        print_every_n = 25,
+    )
+
+    preds = EvoTrees.predict(model, x_eval)[:, 1]
+    mse_error = mean(abs.(preds .- y_eval) .^ 2)
+    mse_gain_pct = mse_error / mse_error_ini - 1
+    @test mse_gain_pct < -0.75
+end
+
+@testset "EvoTreeMLE - Logistic" begin
+    params1 = EvoTreeMLE(
+        loss = :logistic,
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.0,
+        gamma = 0.0,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 10.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
+
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
+    preds_ini = EvoTrees.predict(model, x_eval)[:, 1]
+    mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
+    model = fit_evotree(
+        params1;
+        x_train,
+        y_train,
+        x_eval,
+        y_eval,
+        metric = :logistic,
+        print_every_n = 25,
+    )
+
+    preds = EvoTrees.predict(model, x_eval)[:, 1]
+    mse_error = mean(abs.(preds .- y_eval) .^ 2)
+    mse_gain_pct = mse_error / mse_error_ini - 1
+    @test mse_gain_pct < -0.75
+end
+
 @testset "EvoTreeGaussian - Gaussian" begin
     params1 = EvoTreeGaussian(
-        loss=:gaussian, metric=:gaussian,
-        nrounds=100, nbins=100,
-        lambda=0.0, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=10.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.0,
+        gamma = 0.0,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 10.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
-    model, cache = EvoTrees.init_evotree(params1, x_train, y_train)
+    model, cache = EvoTrees.init_evotree(params1; x_train, y_train)
     preds_ini = EvoTrees.predict(model, x_eval)[:, 1]
     mse_error_ini = mean(abs.(preds_ini .- y_eval) .^ 2)
-    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25)
 
     preds = EvoTrees.predict(model, x_eval)[:, 1]
     mse_error = mean(abs.(preds .- y_eval) .^ 2)
@@ -176,11 +353,18 @@ end
 
 @testset "EvoTrees - Feature Importance" begin
     params1 = EvoTreeRegressor(
-        loss=:linear, metric=:mae,
-        nrounds=100, nbins=100,
-        lambda=0.5, gamma=0.1, eta=0.05,
-        max_depth=6, min_weight=1.0,
-        rowsample=0.5, colsample=1.0, rng=123)
+        loss = :linear,
+        nrounds = 100,
+        nbins = 100,
+        lambda = 0.5,
+        gamma = 0.1,
+        eta = 0.05,
+        max_depth = 6,
+        min_weight = 1.0,
+        rowsample = 0.5,
+        colsample = 1.0,
+        rng = 123,
+    )
 
     model = fit_evotree(params1; x_train, y_train)
     features_gain = importance(model)
diff --git a/test/gpu_base.jl b/test/gpu_base.jl
index 22b98d5c..78c7c282 100644
--- a/test/gpu_base.jl
+++ b/test/gpu_base.jl
@@ -18,7 +18,7 @@ Y = sigmoid(Y)
 𝑖 = collect(1:size(X, 1))
 
 # train-eval split
-𝑖_sample = sample(𝑖, size(𝑖, 1), replace=false)
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
 𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
 𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
@@ -29,43 +29,85 @@ y_train, y_eval = Y[𝑖_train], Y[𝑖_eval]
 ################################
 # linear
 ################################
-params1 = EvoTreeRegressor(T=Float32,
-    loss=:linear, metric=:none,
-    nrounds=200, nbins=64,
-    lambda=0.5, gamma=0.1, eta=0.1,
-    max_depth=6, min_weight=1.0,
-    rowsample=0.5, colsample=1.0)
+params1 = EvoTreeRegressor(
+    T = Float32,
+    loss = :linear,
+    metric = :none,
+    nrounds = 200,
+    nbins = 64,
+    lambda = 0.5,
+    gamma = 0.1,
+    eta = 0.1,
+    max_depth = 6,
+    min_weight = 1.0,
+    rowsample = 0.5,
+    colsample = 1.0,
+)
 
 @time model = fit_evotree_gpu(params1; x_train, y_train);
 @time pred_train_linear = predict_gpu(model, X_train)
 
 x_perm = sortperm(X_train[:, 1])
-plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-plot!(X_train[:, 1][x_perm], pred_train_linear[x_perm], color="navy", linewidth=1.5, label="Linear")
+plot(
+    X_train,
+    Y_train,
+    msize = 1,
+    mcolor = "gray",
+    mswidth = 0,
+    background_color = RGB(1, 1, 1),
+    seriestype = :scatter,
+    xaxis = ("feature"),
+    yaxis = ("target"),
+    legend = true,
+    label = "",
+)
+plot!(
+    X_train[:, 1][x_perm],
+    pred_train_linear[x_perm],
+    color = "navy",
+    linewidth = 1.5,
+    label = "Linear",
+)
 # savefig("figures/regression_sinus_gpu.png")
 
-params1 = EvoTreeRegressor(T=Float32,
-    loss=:linear, metric=:mse,
-    nrounds=200, nbins=64,
-    lambda=0.5, gamma=0.1, eta=0.1,
-    max_depth=6, min_weight=1.0,
-    rowsample=0.5, colsample=1.0,
-    device="gpu")
+params1 = EvoTreeRegressor(
+    T = Float32,
+    loss = :linear,
+    metric = :mse,
+    nrounds = 200,
+    nbins = 64,
+    lambda = 0.5,
+    gamma = 0.1,
+    eta = 0.1,
+    max_depth = 6,
+    min_weight = 1.0,
+    rowsample = 0.5,
+    colsample = 1.0,
+    device = "gpu",
+)
 
-@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n=25);
-@time model = fit_evotree_gpu(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
+@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n = 25);
+@time model = fit_evotree_gpu(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25);
 @time pred_train_linear = predict_gpu(model, x_train)
 
 ################################
 # Logistic
 ################################
-params1 = EvoTreeRegressor(T=Float32,
-    loss=:logistic, metric=:logloss,
-    nrounds=200, nbins=64,
-    lambda=0.5, gamma=0.1, eta=0.1,
-    max_depth=6, min_weight=1.0,
-    rowsample=0.5, colsample=1.0,
-    device="gpu")
+params1 = EvoTreeRegressor(
+    T = Float32,
+    loss = :logistic,
+    metric = :logloss,
+    nrounds = 200,
+    nbins = 64,
+    lambda = 0.5,
+    gamma = 0.1,
+    eta = 0.1,
+    max_depth = 6,
+    min_weight = 1.0,
+    rowsample = 0.5,
+    colsample = 1.0,
+    device = "gpu",
+)
 
 @time model = fit_evotree_gpu(params1; x_train, y_train);
 @time pred_train_linear = predict_gpu(model, X_train)
@@ -73,18 +115,30 @@ params1 = EvoTreeRegressor(T=Float32,
 ################################
 # Gaussian
 ################################
-params1 = EvoTreeGaussian(T=Float64,
-    loss=:gaussian, metric=:gaussian,
-    nrounds=200, nbins=64,
-    lambda=1.0, gamma=0.1, eta=0.1,
-    max_depth=5, min_weight=100.0,
-    rowsample=0.5, colsample=1.0, rng=123,
-    device="gpu")
+params1 = EvoTreeGaussian(
+    T = Float64,
+    loss = :gaussian,
+    metric = :gaussian,
+    nrounds = 200,
+    nbins = 64,
+    lambda = 1.0,
+    gamma = 0.1,
+    eta = 0.1,
+    max_depth = 5,
+    min_weight = 100.0,
+    rowsample = 0.5,
+    colsample = 1.0,
+    rng = 123,
+    device = "gpu",
+)
 
-@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n=25);
+@time model = fit_evotree_gpu(params1; x_train, y_train, print_every_n = 25);
 @time pred_train_gauss = predict_gpu(model, x_train)
 
-pred_gauss = [Distributions.Normal(pred_train_gauss[i, 1], pred_train_gauss[i, 2]) for i in axes(pred_train_gauss, 1)]
+pred_gauss = [
+    Distributions.Normal(pred_train_gauss[i, 1], pred_train_gauss[i, 2]) for
+    i in axes(pred_train_gauss, 1)
+]
 pred_q20 = quantile.(pred_gauss, 0.2)
 pred_q80 = quantile.(pred_gauss, 0.8)
 
diff --git a/test/monotonic.jl b/test/monotonic.jl
index 6233afb4..873cd4f8 100644
--- a/test/monotonic.jl
+++ b/test/monotonic.jl
@@ -29,11 +29,18 @@
     # benchmark
     params1 = EvoTreeRegressor(
         device="cpu",
-        loss=:linear, metric=:mse,
-        nrounds=200, nbins=32,
-        lambda=1.0, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=0.0,
-        rowsample=0.5, colsample=1.0, rng=seed)
+        loss=:linear,
+        nrounds=20,
+        nbins=32,
+        lambda=1.0,
+        gamma=0.0,
+        eta=0.05,
+        max_depth=6,
+        min_weight=0.0,
+        rowsample=0.5,
+        colsample=1.0,
+        rng=seed,
+    )
 
     model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
     preds_ref = predict(model, x_train)
@@ -41,22 +48,28 @@
     # monotonic constraint
     params1 = EvoTreeRegressor(
         device="cpu",
-        loss=:linear, metric=:mse,
-        nrounds=200, nbins=32,
-        lambda=1.0, gamma=0.0, eta=0.5,
-        max_depth=6, min_weight=0.0,
+        loss=:linear,
+        nrounds=20,
+        nbins=32,
+        lambda=1.0,
+        gamma=0.0,
+        eta=0.5,
+        max_depth=6,
+        min_weight=0.0,
         monotone_constraints=Dict(1 => 1),
-        rowsample=0.5, colsample=1.0, rng=seed)
+        rowsample=0.5,
+        colsample=1.0,
+        rng=seed,
+    )
 
     model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
     preds_mono = predict(model, x_train)
 
     # using Plots
-    # using Colors
-    # x_perm = sortperm(X_train[:, 1])
-    # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-    # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
-    # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
+    # x_perm = sortperm(x_train[:, 1])
+    # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
+    # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
+    # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
 
 
     ######################################
@@ -101,11 +114,19 @@
     # benchmark
     params1 = EvoTreeRegressor(
         device="cpu",
-        loss=:logistic, metric=:logloss,
-        nrounds=200, nbins=32,
-        lambda=0.05, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=0.0,
-        rowsample=0.5, colsample=1.0, rng=seed)
+        loss=:logistic,
+        metric=:logloss,
+        nrounds=200,
+        nbins=32,
+        lambda=0.05,
+        gamma=0.0,
+        eta=0.05,
+        max_depth=6,
+        min_weight=0.0,
+        rowsample=0.5,
+        colsample=1.0,
+        rng=seed,
+    )
 
     model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
     preds_ref = predict(model, x_train)
@@ -113,22 +134,28 @@
     # monotonic constraint
     params1 = EvoTreeRegressor(
         device="cpu",
-        loss=:logistic, metric=:logloss,
-        nrounds=200, nbins=32,
-        lambda=0.05, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=0.0,
+        loss=:logistic,
+        metric=:logloss,
+        nrounds=200,
+        nbins=32,
+        lambda=0.05,
+        gamma=0.0,
+        eta=0.05,
+        max_depth=6,
+        min_weight=0.0,
         monotone_constraints=Dict(1 => 1),
-        rowsample=0.5, colsample=1.0, rng=seed)
+        rowsample=0.5,
+        colsample=1.0,
+        rng=seed,
+    )
 
     model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
     preds_mono = predict(model, x_train)
 
-    # using Plots
-    # using Colors
-    # x_perm = sortperm(X_train[:, 1])
-    # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-    # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
-    # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
+    # x_perm = sortperm(x_train[:, 1])
+    # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
+    # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
+    # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
 
 
     ######################################
@@ -170,14 +197,21 @@
     ######################################
     ### Gaussian - CPU
     ######################################
-    # linear - benchmark
+    # benchmark
     params1 = EvoTreeGaussian(
         device="cpu",
         metric=:gaussian,
-        nrounds=200, nbins=32,
-        lambda=1.0, gamma=0.0, eta=0.05,
-        max_depth=6, min_weight=0.0,
-        rowsample=0.5, colsample=1.0, rng=seed)
+        nrounds=200,
+        nbins=32,
+        lambda=1.0,
+        gamma=0.0,
+        eta=0.05,
+        max_depth=6,
+        min_weight=0.0,
+        rowsample=0.5,
+        colsample=1.0,
+        rng=seed,
+    )
 
     model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
     preds_ref = predict(model, x_train)
@@ -186,27 +220,32 @@
     params1 = EvoTreeGaussian(
         device="cpu",
         metric=:gaussian,
-        nrounds=200, nbins=32,
-        lambda=1.0, gamma=0.0, eta=0.5,
-        max_depth=6, min_weight=0.0,
+        nrounds=200,
+        nbins=32,
+        lambda=1.0,
+        gamma=0.0,
+        eta=0.5,
+        max_depth=6,
+        min_weight=0.0,
         monotone_constraints=Dict(1 => 1),
-        rowsample=0.5, colsample=1.0, rng=seed)
+        rowsample=0.5,
+        colsample=1.0,
+        rng=seed,
+    )
 
     model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
     preds_mono = predict(model, x_train)
 
-    # using Plots
-    # using Colors
-    # x_perm = sortperm(X_train[:, 1])
-    # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-    # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
-    # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
+    # x_perm = sortperm(x_train[:, 1])
+    # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
+    # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
+    # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
 
 
     ######################################
     ### Gaussian - GPU
     ######################################
-    # linear - benchmark
+    # benchmark
     # params1 = EvoTreeGaussian(
     #     device="gpu",
     #     metric=:gaussian,
@@ -215,8 +254,8 @@
     #     max_depth=6, min_weight=0.0,
     #     rowsample=0.5, colsample=1.0, rng=seed)
 
-    # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
-    # preds_ref = predict(model, x_train);
+    # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    # preds_ref = predict(model, x_train)
 
     # # monotonic constraint
     # params1 = EvoTreeGaussian(
@@ -228,14 +267,12 @@
     #     monotone_constraints=Dict(1 => 1),
     #     rowsample=0.5, colsample=1.0, rng=seed)
 
-    # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
-    # preds_mono = predict(model, x_train);
+    # model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
+    # preds_mono = predict(model, x_train)
 
-    # using Plots
-    # using Colors
-    # x_perm = sortperm(X_train[:, 1])
-    # plot(X_train, Y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="")
-    # plot!(X_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
-    # plot!(X_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
+    # x_perm = sortperm(x_train[:, 1])
+    # plot(x_train, y_train, msize=1, mcolor="gray", mswidth=0, background_color=RGB(1, 1, 1), seriestype=:scatter, xaxis=("feature"), yaxis=("target"), legend=true, label="GPU Gauss")
+    # plot!(x_train[:, 1][x_perm], preds_ref[x_perm], color="navy", linewidth=1.5, label="Reference")
+    # plot!(x_train[:, 1][x_perm], preds_mono[x_perm], color="red", linewidth=1.5, label="Monotonic")
 
 end
\ No newline at end of file
diff --git a/test/plot.jl b/test/plot.jl
index 2e8a598f..c182bd54 100644
--- a/test/plot.jl
+++ b/test/plot.jl
@@ -8,7 +8,7 @@ using EvoTrees
 # @load "data/model_gaussian_5.bson" model
 
 model = EvoTrees.load("data/model_linear_4.bson");
-var_names = ["var_$i" for i in 1:100]
+var_names = ["var_$i" for i = 1:100]
 plot(model)
 plot(model, 2)
 plot(model, 3, var_names)
@@ -17,15 +17,15 @@ plot(model.trees[2], var_names)
 
 typeof(tree_layout[1])
 BezierCurve(tree_layout[1])
-mutable struct BCurve{T <: GeometryBasics.Point}
+mutable struct BCurve{T<:GeometryBasics.Point}
     control_points::Vector{T}
 end
 
 function (bc::BCurve)(t::Real)
     p = zero(P2)
     n = length(bc.control_points) - 1
-    for i in 0:n
-        p += bc.control_points[i + 1] * binomial(n, i) * (1 - t)^(n - i) * t^i
+    for i = 0:n
+        p += bc.control_points[i+1] * binomial(n, i) * (1 - t)^(n - i) * t^i
     end
     p
 end
diff --git a/test/save_load.jl b/test/save_load.jl
new file mode 100644
index 00000000..c6d8ca11
--- /dev/null
+++ b/test/save_load.jl
@@ -0,0 +1,69 @@
+using Statistics
+using StatsBase: sample, quantile
+using Distributions
+using Random
+using EvoTrees
+using EvoTrees: sigmoid, logit
+using Serialization
+
+# prepare a dataset
+Random.seed!(12)
+features = rand(10_000) .* 5
+X = reshape(features, (size(features)[1], 1))
+Y = sin.(features) .* 0.5 .+ 0.5
+Y = logit(Y) + randn(size(Y))
+Y = sigmoid(Y)
+𝑖 = collect(1:size(X, 1))
+
+# train-eval split
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
+train_size = 0.8
+𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
+
+x_train, x_eval = X[𝑖_train, :], X[𝑖_eval, :]
+y_train, y_eval = Y[𝑖_train], Y[𝑖_eval]
+
+# linear
+params1 = EvoTreeRegressor(
+    T = Float64,
+    loss = :linear,
+    metric = :mse,
+    nrounds = 200,
+    nbins = 64,
+    lambda = 0.1,
+    gamma = 0.1,
+    eta = 0.05,
+    max_depth = 6,
+    min_weight = 1.0,
+    rowsample = 0.5,
+    colsample = 1.0,
+    rng = 123,
+)
+
+m = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n = 25);
+p = m(x_eval)
+
+# serialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v182.dat"), m);
+# serialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v182.dat"), p);
+
+# m_172 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v172.dat"));
+# p_172 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v172.dat"));
+# pm_172 = m_172(x_eval)
+
+# m_180 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v180.dat"));
+# p_180 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v180.dat"));
+# pm_180 = m_180(x_eval)
+
+# m_182 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-m-v182.dat"));
+# p_182 = deserialize(joinpath(@__DIR__, "..", "data", "save-load-test-p-v182.dat"));
+# pm_182 = m_182(x_eval)
+
+# @assert all(p .== p_172)
+# @assert all(p .== pm_172)
+# @assert all(p .== p_180)
+# @assert all(p .== pm_180)
+# @assert all(p .== p_182)
+# @assert all(p .== pm_182)
+
+# @info "test successful! 🚀"
\ No newline at end of file