diff --git a/Project.toml b/Project.toml
index ba4801ba..56933c60 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "EvoTrees"
 uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 authors = ["jeremiedb <jeremie.db@evovest.com>"]
-version = "0.4.8"
+version = "0.4.9"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
diff --git a/README.md b/README.md
index af4d34ed..9ac8644a 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,53 @@ julia> Pkg.add("EvoTrees")
   - α: float \[0,1\], set the quantile or bias in L1 default=0.5
   - metric: {:mse, :rmse, :mae, :logloss, :quantile},  default=:none
 
-## Getting started
+
+## MLJ Integration
+
+See [official project page](https://github.com/alan-turing-institute/MLJ.jl) for more info.
+
+```julia
+using StatsBase: sample
+using EvoTrees
+using EvoTrees: sigmoid, logit
+using MLJBase
+
+features = rand(10_000) .* 5 .- 2
+X = reshape(features, (size(features)[1], 1))
+Y = sin.(features) .* 0.5 .+ 0.5
+Y = logit(Y) + randn(size(Y))
+Y = sigmoid(Y)
+y = Y
+X = MLJBase.table(X)
+
+# @load EvoTreeRegressor
+# linear regression
+tree_model = EvoTreeRegressor(loss=:linear, max_depth=5, η=0.05, nrounds=10)
+
+# set machine
+mach = machine(tree_model, X, y)
+
+# partition data
+train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
+
+# fit data
+fit!(mach, rows=train, verbosity=1)
+
+# continue training
+mach.model.nrounds += 10
+fit!(mach, rows=train, verbosity=1)
+
+# predict on train data
+pred_train = predict(mach, selectrows(X,train))
+mean(abs.(pred_train - selectrows(Y,train)))
+
+# predict on test data
+pred_test = predict(mach, selectrows(X,test))
+mean(abs.(pred_test - selectrows(Y,test)))
+```
+
+
+## Getting started using internal API
 
 Minimal example to fit a noisy sinus wave.
 
@@ -167,7 +213,7 @@ pred_train_q80 = predict(model, X_train)
 
 ## Gaussian Max Likelihood
 
-![](gaussian_likelihood.png)
+![](gaussian_sinus.png)
 
 ```julia
 params1 = EvoTreeGaussian(
diff --git a/experiments/benchmarks.jl b/experiments/benchmarks.jl
index 913b0d6c..ca00ff01 100644
--- a/experiments/benchmarks.jl
+++ b/experiments/benchmarks.jl
@@ -6,7 +6,7 @@ using EvoTrees
 using BenchmarkTools
 
 # prepare a dataset
-features = rand(Int(2.25e6), 100)
+features = rand(Int(1.25e6), 100)
 # features = rand(100, 10)
 X = features
 Y = rand(size(X, 1))
diff --git a/experiments/gaussian.jl b/experiments/gaussian.jl
index 9305907b..f05e55bb 100644
--- a/experiments/gaussian.jl
+++ b/experiments/gaussian.jl
@@ -1,8 +1,9 @@
+using Plots
 using Statistics
 using StatsBase: sample
-using EvoTrees
 using Distributions
-using Plots
+using Revise
+using EvoTrees
 
 features = rand(Int(1.25e4), 5)
 # prepare a dataset
@@ -14,6 +15,8 @@ Y[(X[:,1] .>= 0.4) .& (X[:,1] .< 0.6)] .*= 5
 Y[(X[:,1] .>= 0.9)] .*= 5
 𝑖 = collect(1:size(X,1))
 
+Y .*= 0.01
+
 # train-eval split
 𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
 train_size = 0.8
@@ -26,17 +29,17 @@ Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
 # train model
 params1 = EvoTreeGaussian(
     loss=:gaussian, metric=:gaussian,
-    nrounds=400,
-    λ = 0.5, γ=1.0, η=0.05,
-    max_depth = 4, min_weight = 200.0,
-    rowsample=0.9, colsample=0.99, nbins=255)
+    nrounds=40,
+    λ = 0.0, γ=0.0, η=0.05,
+    max_depth = 5, min_weight = 50.0,
+    rowsample=0.5, colsample=1.0, nbins=200)
 
 @time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n = 10);
 # @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
 @time pred_train = EvoTrees.predict(model, X_train)
 @time pred_train_gauss = EvoTrees.predict(params1, model, X_train)
 
-pred_gauss = [Distributions.Normal(pred_train[i,1], sqrt(pred_train[i,2])) for i in 1:size(pred_train,1)]
+pred_gauss = [Distributions.Normal(pred_train[i,1], pred_train[i,2]) for i in 1:size(pred_train,1)]
 pred_q90 = quantile.(pred_gauss, 0.9)
 pred_q10 = quantile.(pred_gauss, 0.1)
 
@@ -46,7 +49,40 @@ mean(Y_train .< pred_q10)
 x_perm = sortperm(X_train[:,1])
 plot(X_train[:, 1], Y_train, ms = 1, mcolor = "gray", mscolor = "lightgray", background_color = RGB(1, 1, 1), seriestype=:scatter, xaxis = ("feature"), yaxis = ("target"), legend = true, label = "")
 plot!(X_train[:,1][x_perm], pred_train[x_perm, 1], color = "navy", linewidth = 1.5, label = "mu")
-plot!(X_train[:,1][x_perm], sqrt.(pred_train[x_perm, 2]), color = "blue", linewidth = 1.5, label = "sigma")
+plot!(X_train[:,1][x_perm], pred_train[x_perm, 2], color = "blue", linewidth = 1.5, label = "sigma")
 plot!(X_train[:,1][x_perm], pred_q10[x_perm, 1], color = "red", linewidth = 1.5, label = "q10")
 plot!(X_train[:,1][x_perm], pred_q90[x_perm, 1], color = "green", linewidth = 1.5, label = "q90")
-# savefig("regression_gaussian.png")
+savefig("regression_gaussian_v1.png")
+
+
+# compare with zygote
+using Zygote
+
+pred = [0.0, log(1.0)]
+target = 0.1
+
+δ1 = (target - pred[1]) / max(1e-8, exp(2*pred[2]))
+δ2 = (1 - (pred[1] - target)^2 / max(1e-8, exp(2*pred[2])))
+
+δ²1 = 1 / max(1e-8, exp(2*pred[2]))
+δ²2 = 2 / max(1e-8, exp(2*pred[2])) * (pred[1] - target)^2
+
+
+lpdf(x,μ,σ) = -log(σ) - log(2π)/2 - 1/2*((x-μ)/σ)^2
+lpdf(0, pred[1], pred[2])
+
+lpdf2(x,μ,lσ) = -log(exp(lσ)) - log(2π)/2 - 1/2*((x-μ)/exp(lσ))^2
+lpdf2(0, pred[1], pred[2])
+
+
+n1 = Normal(0, 1)
+Distributions.logpdf(n1, 0)
+
+# gradient(lpdf, target, pred[1], pred[2])[2:end]
+gradient(lpdf2, target, pred[1], pred[2])[2:end]
+Zygote.hessian(lpdf2, target, pred[1], pred[2])
+
+gradient_lpdf(x,pred) = gradient(lpdf2, x, pred[1], pred[2])[3]
+hessian_lpdf(x,pred) = gradient(gradient_lpdf, x, pred)[1]
+gradient_lpdf(target, pred)
+hessian_lpdf(target, pred)
diff --git a/experiments/parametric_type.jl b/experiments/parametric_type.jl
new file mode 100644
index 00000000..07f17250
--- /dev/null
+++ b/experiments/parametric_type.jl
@@ -0,0 +1,102 @@
+using Statistics
+using StatsBase: sample
+# using XGBoost
+using Revise
+using EvoTrees
+using BenchmarkTools
+
+# prepare a dataset
+features = rand(Int(2.25e6), 100)
+# features = rand(100, 10)
+X = features
+Y = rand(size(X, 1))
+𝑖 = collect(1:size(X,1))
+
+# train-eval split
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
+train_size = 0.8
+𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
+
+X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
+Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
+
+config = EvoTrees.EvoTreeRegressor3(T=Float32,
+        loss=:linear, metric=:none,
+        nrounds=100, α = 0.5,
+        λ = 0.0, γ=0.0, η=0.05,
+        max_depth = 6, min_weight = 1.0,
+        rowsample=0.5, colsample=0.5, nbins=32)
+
+
+# for 1.25e5 init_evotree: 2.009 s 0.322925 seconds (2.53 k allocations: 167.345 MiB)
+# for 1.25e5 no eval iter 100: 2.009 s (628514 allocations: 720.62 MiB)
+# for 1.25e6 no eval iter 10: 6.200 s (44330 allocations: 2.19 GiB)
+# for 1.25e6 no eval iter 100: 19.481940 seconds (635.33 k allocations: 6.679 GiB, 3.11% gc time)
+# for 1.25e6 mse with eval data: 6.321 s (45077 allocations: 2.19 GiB)
+@time model, cache = init_evotree(config, X_train, Y_train);
+@time grow_evotree!(model, cache);
+@time model = fit_evotree(config, X_train, Y_train);
+@btime model = fit_evotree(config, X_train, Y_train);
+@time pred_train = EvoTrees.predict(model, X_train)
+
+@time model = fit_evotree(config, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=9999, early_stopping_rounds=9999);
+@btime model = fit_evotree(config, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n=9999, early_stopping_rounds=9999);
+
+@time model = fit_evotree(config, X_train, Y_train, early_stopping_rounds=10);
+@time model = fit_evotree(config, X_train, Y_train, print_every_n=2);
+
+# @time model = grow_gbtree(X_train, Y_train, params1, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 5);
+# @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval);
+@time pred_train = predict(model, X_train)
+
+
+#############################
+# agaricus
+#############################
+function readlibsvm(fname::String, shape)
+    dmx = zeros(Float32, shape)
+    label = Float32[]
+    fi = open(fname, "r")
+    cnt = 1
+    for line in eachline(fi)
+        line = split(line, " ")
+        push!(label, parse(Float64, line[1]))
+        line = line[2:end]
+        for itm in line
+            itm = split(itm, ":")
+            dmx[cnt, parse(Int, itm[1]) + 1] = parse(Int, itm[2])
+        end
+        cnt += 1
+    end
+    close(fi)
+    return (dmx, label)
+end
+
+# we use auxiliary function to read LIBSVM format into julia Matrix
+train_X, train_Y = readlibsvm("data/agaricus.txt.train", (6513, 126))
+test_X, test_Y   = readlibsvm("data/agaricus.txt.test", (1611, 126))
+
+#-------------Basic Training using XGBoost-----------------
+# note: xgboost naturally handles sparse input
+# use sparse matrix when your feature is sparse(e.g. when you using one-hot encoding vector)
+# model parameters can be set as parameters for ```xgboost``` function, or use a Vector{String} / Dict()
+num_round = 100
+# you can directly pass Julia's matrix or sparse matrix as data,
+# by calling xgboost(data, num_round, label=label, training-parameters)
+metrics = ["logloss"]
+@time bst = xgboost(train_X, num_round, label = train_Y, eta = 0.1, max_depth = 3, metrics = metrics, silent=0, objective = "binary:logistic")
+features_xgb = XGBoost.importance(bst)
+
+params1 = EvoTreeRegressor(
+    loss=:logistic, metric=:logloss,
+    nrounds=100,
+    λ = 0.0, γ=0.0, η=0.1,
+    max_depth = 4, min_weight = 1.0,
+    rowsample=1.0, colsample=1.0, nbins=250)
+
+@time model = fit_evotree(params1, train_X, train_Y, print_every_n=20);
+@time model = fit_evotree(params1, X_train, Y_train, X_eval=test_X, Y_eval=test_Y, print_every_n=20);
+@time pred_train = EvoTrees.predict(model, X_train)
+features_evo = importance(model, 1:size(X_train,2))
+sort(collect(values(features_evo)))
diff --git a/experiments/readme_plots.jl b/experiments/readme_plots.jl
index 4c38ef8e..5436c88e 100644
--- a/experiments/readme_plots.jl
+++ b/experiments/readme_plots.jl
@@ -87,7 +87,7 @@ params1 = EvoTreeRegressor(
 sqrt(mean((pred_train_L1 .- Y_train) .^ 2))
 
 x_perm = sortperm(X_train[:,1])
-plot(X_train, Y_train, ms = 1, mcolor = "gray", mscolor = "lightgray", background_color = RGB(1, 1, 1), seriestype=:scatter, xaxis = ("feature"), yaxis = ("target"), legend = true, label = "")
+plot(X_train, Y_train, ms = 1, mcolor = "gray", mscolor = "gray", background_color = RGB(1, 1, 1), seriestype=:scatter, xaxis = ("feature"), yaxis = ("target"), legend = true, label = "")
 plot!(X_train[:,1][x_perm], pred_train_linear[x_perm], color = "navy", linewidth = 1.5, label = "Linear")
 plot!(X_train[:,1][x_perm], pred_train_logistic[x_perm], color = "darkred", linewidth = 1.5, label = "Logistic")
 plot!(X_train[:,1][x_perm], pred_train_poisson[x_perm], color = "green", linewidth = 1.5, label = "Poisson")
@@ -135,8 +135,40 @@ params1 = EvoTreeRegressor(
 sum(pred_train_q80 .< Y_train) / length(Y_train)
 
 x_perm = sortperm(X_train[:,1])
-plot(X_train, Y_train, ms = 1, mcolor = "gray", mscolor = "lightgray", background_color = RGB(1, 1, 1), seriestype=:scatter, xaxis = ("feature"), yaxis = ("target"), legend = true, label = "")
+plot(X_train, Y_train, ms = 1, mcolor = "gray", mscolor = "gray", background_color = RGB(1, 1, 1), seriestype=:scatter, xaxis = ("feature"), yaxis = ("target"), legend = true, label = "")
 plot!(X_train[:,1][x_perm], pred_train_q50[x_perm], color = "navy", linewidth = 1.5, label = "Median")
 plot!(X_train[:,1][x_perm], pred_train_q20[x_perm], color = "darkred", linewidth = 1.5, label = "Q20")
 plot!(X_train[:,1][x_perm], pred_train_q80[x_perm], color = "green", linewidth = 1.5, label = "Q80")
 savefig("quantiles_sinus.png")
+
+
+
+###############################
+## gaussian
+###############################
+params1 = EvoTreeGaussian(
+    loss=:gaussian, metric=:gaussian,
+    nrounds=200, nbins=100,
+    λ = 0.0, γ=0.0, η=0.05,
+    max_depth = 5, min_weight = 1.0,
+    rowsample=0.8, colsample=1.0, seed=123)
+
+@time model = fit_evotree(params1, X_train, Y_train, X_eval=X_eval, Y_eval=Y_eval, print_every_n = 10);
+# @time model = fit_evotree(params1, X_train, Y_train, print_every_n = 10);
+@time pred_train = EvoTrees.predict(model, X_train)
+@time pred_train_gauss = EvoTrees.predict(params1, model, X_train)
+
+pred_gauss = [Distributions.Normal(pred_train[i,1], pred_train[i,2]) for i in 1:size(pred_train,1)]
+pred_q80 = quantile.(pred_gauss, 0.8)
+pred_q20 = quantile.(pred_gauss, 0.2)
+
+mean(Y_train .< pred_q80)
+mean(Y_train .< pred_q20)
+
+x_perm = sortperm(X_train[:,1])
+plot(X_train[:, 1], Y_train, ms = 1, mcolor = "gray", mscolor = "gray", background_color = RGB(1, 1, 1), seriestype=:scatter, xaxis = ("feature"), yaxis = ("target"), legend = true, label = "")
+plot!(X_train[:,1][x_perm], pred_train[x_perm, 1], color = "navy", linewidth = 1.5, label = "mu")
+plot!(X_train[:,1][x_perm], pred_train[x_perm, 2], color = "red", linewidth = 1.5, label = "sigma")
+plot!(X_train[:,1][x_perm], pred_q20[x_perm, 1], color = "green", linewidth = 1.5, label = "q20")
+plot!(X_train[:,1][x_perm], pred_q80[x_perm, 1], color = "green", linewidth = 1.5, label = "q80")
+savefig("gaussian_sinus.png")
diff --git a/gaussian_sinus.png b/gaussian_sinus.png
new file mode 100644
index 00000000..35644cb4
Binary files /dev/null and b/gaussian_sinus.png differ
diff --git a/quantiles_sinus.png b/quantiles_sinus.png
index b0a3be6f..2e925679 100644
Binary files a/quantiles_sinus.png and b/quantiles_sinus.png differ
diff --git a/regression_gaussian_v1.png b/regression_gaussian_v1.png
new file mode 100644
index 00000000..6fd68c5f
Binary files /dev/null and b/regression_gaussian_v1.png differ
diff --git a/regression_sinus.png b/regression_sinus.png
index bdf4875e..d6162807 100644
Binary files a/regression_sinus.png and b/regression_sinus.png differ
diff --git a/src/MLJ.jl b/src/MLJ.jl
index 23adc13e..475da6a1 100644
--- a/src/MLJ.jl
+++ b/src/MLJ.jl
@@ -55,7 +55,7 @@ end
 function predict(model::EvoTreeGaussian, fitresult, Xnew)
     Xnew = MLJModelInterface.matrix(Xnew)
     pred = predict(fitresult, Xnew)
-    return [Distributions.Normal(pred[i,1], sqrt(pred[i,2])) for i in 1:size(pred,1)]
+    return [Distributions.Normal(pred[i,1], pred[i,2]) for i in 1:size(pred,1)]
 end
 
 # Metadata
diff --git a/src/eval.jl b/src/eval.jl
index 6eb83639..9137885d 100644
--- a/src/eval.jl
+++ b/src/eval.jl
@@ -61,11 +61,11 @@ end
 
 # gaussian
 # pred[i][1] = μ
-# pred[i][2] = log(σ²)
+# pred[i][2] = log(σ)
 function eval_metric(::Val{:gaussian}, pred::Vector{SVector{L,T}}, Y::AbstractVector{T}, α=0.0) where {L, T <: AbstractFloat}
     eval = zero(T)
     @inbounds for i in 1:length(pred)
-        eval += pred[i][2]/2 + (Y[i] - pred[i][1])^2 / (2*max(1e-8, exp(pred[i][2])))
+        eval += pred[i][2] + (Y[i] - pred[i][1])^2 / (2*max(1e-8, exp(2*pred[i][2])))
     end
     eval /= length(Y)
     return eval
diff --git a/src/find_split.jl b/src/find_split.jl
index d7b7242e..5263b23d 100644
--- a/src/find_split.jl
+++ b/src/find_split.jl
@@ -1,8 +1,8 @@
 #############################################
 # Get the braking points
 #############################################
-function get_edges(X::Matrix{T}, nbins=250) where {T}
-    edges = Vector{Vector{Float32}}(undef, size(X,2))
+function get_edges(X::AbstractMatrix{T}, nbins=250) where {T}
+    edges = Vector{Vector{T}}(undef, size(X,2))
     @threads for i in 1:size(X, 2)
         edges[i] = quantile(view(X, :,i), (1:nbins)/nbins)
         if length(edges[i]) == 0
diff --git a/src/fit.jl b/src/fit.jl
index 52321e63..d103541b 100644
--- a/src/fit.jl
+++ b/src/fit.jl
@@ -1,45 +1,58 @@
+function allo2(params::EvoTypes)
+    T = typeof(params)
+    # test = T(params.η)
+    return T
+end
+
+function allo3(params::EvoTrees.EvoTypes{T,U,S}) where {T,U,S}
+    # T = typeof(params)
+    test = T(params.η)
+    return test
+end
+
 # initialise evotree
-function init_evotree(params::EvoTypes,
-    X::AbstractMatrix{R}, Y::AbstractVector{S}; verbosity=1) where {R,S}
+function init_evotree(params::EvoTypes{T,U,S},
+    X::AbstractMatrix, Y::AbstractVector; verbosity=1) where {T,U,S}
 
     seed!(params.seed)
 
     K = 1
     levels = ""
+    X = convert(Matrix{T}, X)
     if typeof(params.loss) == Logistic
-        Y = Float32.(Y)
+        Y = T.(Y)
         μ = fill(logit(mean(Y)), 1)
     elseif typeof(params.loss) == Poisson
-        Y = Float32.(Y)
+        Y = T.(Y)
         μ = fill(log(mean(Y)), 1)
     elseif typeof(params.loss) == Softmax
         if typeof(Y) <: AbstractCategoricalVector
             levels = CategoricalArray(CategoricalArrays.levels(Y))
             K = length(levels)
-            μ = zeros(Float32, K)
+            μ = zeros(T, K)
             Y = MLJModelInterface.int.(Y)
         else
             levels = CategoricalArray(sort(unique(Y)))
             K = length(levels)
-            μ = zeros(Float32, K)
+            μ = zeros(T, K)
             Y = UInt32.(Y)
         end
     elseif typeof(params.loss) == Gaussian
         K = 2
-        Y = Float32.(Y)
-        μ = SVector{2}([mean(Y), log(var(Y))])
+        Y = T.(Y)
+        μ = SVector{2}([mean(Y), log(std(Y))])
     else
-        Y = Float32.(Y)
+        Y = T.(Y)
         μ = fill(mean(Y), 1)
     end
 
     # initialize preds
-    pred = zeros(SVector{K,Float32}, size(X,1))
+    pred = zeros(SVector{K,T}, size(X,1))
     for i in eachindex(pred)
         pred[i] += μ
     end
 
-    bias = Tree([TreeNode(SVector{K,Float32}(μ))])
+    bias = Tree([TreeNode(SVector{K,T}(μ))])
     evotree = GBTree([bias], params, Metric(), K, levels)
 
     X_size = size(X)
@@ -47,9 +60,9 @@ function init_evotree(params::EvoTypes,
     𝑗_ = collect(1:X_size[2])
 
     # initialize gradients and weights
-    δ, δ² = zeros(SVector{evotree.K, Float32}, X_size[1]), zeros(SVector{evotree.K, Float32}, X_size[1])
-    𝑤 = zeros(SVector{1, Float32}, X_size[1])
-    𝑤_ini = SVector{1, Float32}(1)
+    δ, δ² = zeros(SVector{evotree.K, T}, X_size[1]), zeros(SVector{evotree.K, T}, X_size[1])
+    𝑤 = zeros(SVector{1, T}, X_size[1])
+    𝑤_ini = SVector{1, T}(1)
     for i in 1:length(𝑤)
         𝑤[i] += 𝑤_ini
     end
@@ -60,24 +73,24 @@ function init_evotree(params::EvoTypes,
 
 
     # initializde histograms
-    hist_δ = Vector{Matrix{SVector{evotree.K, Float32}}}(undef, 2^params.max_depth-1)
-    hist_δ² = Vector{Matrix{SVector{evotree.K, Float32}}}(undef, 2^params.max_depth-1)
-    hist_𝑤 = Vector{Matrix{SVector{1, Float32}}}(undef, 2^params.max_depth-1)
+    hist_δ = Vector{Matrix{SVector{evotree.K, T}}}(undef, 2^params.max_depth-1)
+    hist_δ² = Vector{Matrix{SVector{evotree.K, T}}}(undef, 2^params.max_depth-1)
+    hist_𝑤 = Vector{Matrix{SVector{1, T}}}(undef, 2^params.max_depth-1)
 
     # initialize train nodes
-    train_nodes = Vector{TrainNode{evotree.K, Float32, Int64}}(undef, 2^params.max_depth-1)
+    train_nodes = Vector{TrainNode{evotree.K, T, Int64}}(undef, 2^params.max_depth-1)
 
     for node in 1:2^params.max_depth-1
-        train_nodes[node] = TrainNode(0, 0, SVector{evotree.K, Float32}(fill(Float32(-Inf), evotree.K)), SVector{evotree.K, Float32}(fill(Float32(-Inf), evotree.K)), SVector{1, Float32}(fill(Float32(-Inf), 1)), Float32(-Inf), [0], [0])
+        train_nodes[node] = TrainNode(0, 0, SVector{evotree.K, T}(fill(T(-Inf), evotree.K)), SVector{evotree.K, T}(fill(T(-Inf), evotree.K)), SVector{1, T}(fill(T(-Inf), 1)), T(-Inf), [0], [0])
 
-        hist_δ[node] = zeros(SVector{evotree.K, Float32}, params.nbins, X_size[2])
-        hist_δ²[node] = zeros(SVector{evotree.K, Float32}, params.nbins, X_size[2])
-        hist_𝑤[node] = zeros(SVector{1, Float32}, params.nbins, X_size[2])
+        hist_δ[node] = zeros(SVector{evotree.K, T}, params.nbins, X_size[2])
+        hist_δ²[node] = zeros(SVector{evotree.K, T}, params.nbins, X_size[2])
+        hist_𝑤[node] = zeros(SVector{1, T}, params.nbins, X_size[2])
     end
 
-    splits = Vector{SplitInfo{evotree.K, Float32, Int64}}(undef, X_size[2])
+    splits = Vector{SplitInfo{evotree.K, T, Int64}}(undef, X_size[2])
     for feat in 𝑗_
-        splits[feat] = SplitInfo{evotree.K, Float32, Int}(Float32(-Inf), SVector{evotree.K, Float32}(zeros(evotree.K)), SVector{evotree.K, Float32}(zeros(evotree.K)), SVector{1, Float32}(zeros(1)), SVector{evotree.K, Float32}(zeros(evotree.K)), SVector{evotree.K, Float32}(zeros(evotree.K)), SVector{1, Float32}(zeros(1)), Float32(-Inf), Float32(-Inf), 0, feat, 0.0)
+        splits[feat] = SplitInfo{evotree.K, T, Int}(T(-Inf), SVector{evotree.K, T}(zeros(evotree.K)), SVector{evotree.K, T}(zeros(evotree.K)), SVector{1, T}(zeros(1)), SVector{evotree.K, T}(zeros(evotree.K)), SVector{evotree.K, T}(zeros(evotree.K)), SVector{1, T}(zeros(1)), T(-Inf), T(-Inf), 0, feat, 0.0)
     end
 
     cache = (params=deepcopy(params),
@@ -93,7 +106,7 @@ function init_evotree(params::EvoTypes,
 end
 
 
-function grow_evotree!(evotree::GBTree, cache; verbosity=1)
+function grow_evotree!(evotree::GBTree{L,T,S}, cache; verbosity=1) where {L,T,S}
 
     # initialize from cache
     params = evotree.params
@@ -110,7 +123,7 @@ function grow_evotree!(evotree::GBTree, cache; verbosity=1)
         𝑗 = cache.𝑗_[sample(cache.𝑗_, ceil(Int, params.colsample * X_size[2]), replace=false, ordered=true)]
         # reset gain to -Inf
         for feat in cache.𝑗_
-            splits[feat].gain = Float32(-Inf)
+            splits[feat].gain = T(-Inf)
         end
 
         # build a new tree
@@ -134,10 +147,10 @@ end
 # grow a single tree
 function grow_tree(δ, δ², 𝑤,
     hist_δ, hist_δ², hist_𝑤,
-    params::EvoTypes,
+    params::EvoTypes{T,U,S},
     train_nodes::Vector{TrainNode{L,T,S}},
     splits::Vector{SplitInfo{L,T,Int}},
-    edges, X_bin) where {R<:Real, T<:AbstractFloat, S<:Int, L}
+    edges, X_bin) where {T<:AbstractFloat, U, S, L}
 
     active_id = ones(Int, 1)
     leaf_count = one(Int)
@@ -150,7 +163,7 @@ function grow_tree(δ, δ², 𝑤,
         # grow nodes
         for id in active_id
             node = train_nodes[id]
-            if tree_depth == params.max_depth || node.∑𝑤[1] <= params.min_weight + 1e-12
+            if tree_depth == params.max_depth || node.∑𝑤[1] <= params.min_weight + 1e-8
                 push!(tree.nodes, TreeNode(pred_leaf(params.loss, node, params, δ²)))
             else
                 if id > 1 && id == tree.nodes[node.parent].right
diff --git a/src/loss.jl b/src/loss.jl
index 17ea4c50..cf55b5e2 100644
--- a/src/loss.jl
+++ b/src/loss.jl
@@ -67,11 +67,11 @@ end
 
 # Gaussian - http://jrmeyer.github.io/machinelearning/2017/08/18/mle.html
 # pred[i][1] = μ
-# pred[i][2] = log(σ²)
+# pred[i][2] = log(σ)
 function update_grads!(loss::Gaussian, α, pred::Vector{SVector{L,T}}, target::AbstractArray{T, 1}, δ::Vector{SVector{L,T}}, δ²::Vector{SVector{L,T}}, 𝑤::Vector{SVector{1,T}}) where {T <: AbstractFloat, L}
     @inbounds @threads for i in eachindex(δ)
-        δ[i] = SVector((pred[i][1] - target[i]) / max(1e-8, exp(pred[i][2])) * 𝑤[i][1], 𝑤[i][1] / 2 * (1 - (pred[i][1] - target[i])^2 / max(1e-8, exp(pred[i][2]))))
-        δ²[i] = SVector(𝑤[i][1] / max(1e-8, exp(pred[i][2])), 𝑤[i][1] / max(1e-8, exp(pred[i][2])) * (pred[i][1] - target[i])^2)
+        δ[i] = SVector((pred[i][1] - target[i]) / max(1e-8, exp(2*pred[i][2])) * 𝑤[i][1], (1 - (pred[i][1] - target[i])^2 / max(1e-8, exp(2*pred[i][2]))) * 𝑤[i][1])
+        δ²[i] = SVector(𝑤[i][1] / max(1e-8, exp(2*pred[i][2])),  2*𝑤[i][1] / max(1e-8, exp(2*pred[i][2])) * (pred[i][1] - target[i])^2)
     end
 end
 
diff --git a/src/models.jl b/src/models.jl
index cfdcb52b..ea8b7b99 100644
--- a/src/models.jl
+++ b/src/models.jl
@@ -29,6 +29,7 @@ mutable struct EvoTreeRegressor{T<:AbstractFloat, U<:ModelType, S<:Int} <: MLJMo
 end
 
 function EvoTreeRegressor(;
+    T::Type=Float32,
     loss=:linear,
     nrounds=10,
     λ=0.0, #
@@ -49,7 +50,7 @@ function EvoTreeRegressor(;
     elseif loss == :quantile model_type = Quantile()
     end
 
-    model = EvoTreeRegressor(model_type, nrounds, Float32(λ), Float32(γ), Float32(η), max_depth, Float32(min_weight), Float32(rowsample), Float32(colsample), nbins, Float32(α), metric, seed)
+    model = EvoTreeRegressor(model_type, nrounds, T(λ), T(γ), T(η), max_depth, T(min_weight), T(rowsample), T(colsample), nbins, T(α), metric, seed)
 
     return model
 end
@@ -72,6 +73,7 @@ mutable struct EvoTreeCount{T<:AbstractFloat, U<:ModelType, S<:Int} <: MLJModelI
 end
 
 function EvoTreeCount(;
+    T::Type=Float32,
     loss=:poisson,
     nrounds=10,
     λ=0.0, #
@@ -87,7 +89,7 @@ function EvoTreeCount(;
     seed=444)
 
     if loss == :poisson model_type = Poisson() end
-    model = EvoTreeCount(Poisson(), nrounds, Float32(λ), Float32(γ), Float32(η), max_depth, Float32(min_weight), Float32(rowsample), Float32(colsample), nbins, Float32(α), metric, seed)
+    model = EvoTreeCount(Poisson(), nrounds, T(λ), T(γ), T(η), max_depth, T(min_weight), T(rowsample), T(colsample), nbins, T(α), metric, seed)
 
     return model
 end
@@ -110,6 +112,7 @@ mutable struct EvoTreeClassifier{T<:AbstractFloat, U<:ModelType, S<:Int} <: MLJM
 end
 
 function EvoTreeClassifier(;
+    T::Type=Float32,
     loss=:softmax,
     nrounds=10,
     λ=0.0, #
@@ -125,7 +128,7 @@ function EvoTreeClassifier(;
     seed=444)
 
     if loss == :softmax model_type = Softmax() end
-    model = EvoTreeClassifier(Softmax(), nrounds, Float32(λ), Float32(γ), Float32(η), max_depth, Float32(min_weight), Float32(rowsample), Float32(colsample), nbins, Float32(α), metric, seed)
+    model = EvoTreeClassifier(Softmax(), nrounds, T(λ), T(γ), T(η), max_depth, T(min_weight), T(rowsample), T(colsample), nbins, T(α), metric, seed)
 
     return model
 end
@@ -148,6 +151,7 @@ mutable struct EvoTreeGaussian{T<:AbstractFloat, U<:ModelType, S<:Int} <: MLJMod
 end
 
 function EvoTreeGaussian(;
+    T::Type=Float64,
     loss=:gaussian,
     nrounds=10,
     λ=0.0, #
@@ -163,7 +167,7 @@ function EvoTreeGaussian(;
     seed=444)
 
     if loss == :gaussian model_type = Gaussian() end
-    model = EvoTreeGaussian(Gaussian(), nrounds, Float32(λ), Float32(γ), Float32(η), max_depth, Float32(min_weight), Float32(rowsample), Float32(colsample), nbins, Float32(α), metric, seed)
+    model = EvoTreeGaussian(Gaussian(), nrounds, T(λ), T(γ), T(η), max_depth, T(min_weight), T(rowsample), T(colsample), nbins, T(α), metric, seed)
 
     return model
 end
@@ -197,7 +201,7 @@ function EvoTreeRModels(
     elseif loss == :softmax
         model = EvoTreeClassifier(Softmax(), nrounds, Float32(λ), Float32(γ), Float32(η), max_depth, Float32(min_weight), Float32(rowsample), Float32(colsample), nbins, Float32(α), metric, seed)
     elseif loss == :gaussian
-        model = EvoTreeGaussian(Gaussian(), nrounds, Float32(λ), Float32(γ), Float32(η), max_depth, Float32(min_weight), Float32(rowsample), Float32(colsample), nbins, Float32(α), metric, seed)
+        model = EvoTreeGaussian(Gaussian(), nrounds, Float64(λ), Float64(γ), Float64(η), max_depth, Float64(min_weight), Float64(rowsample), Float64(colsample), nbins, Float64(α), metric, seed)
     else
         throw("invalid loss")
     end
@@ -205,4 +209,5 @@ function EvoTreeRModels(
     return model
 end
 
-const EvoTypes = Union{EvoTreeRegressor,EvoTreeCount,EvoTreeClassifier,EvoTreeGaussian}
+# const EvoTypes = Union{EvoTreeRegressor,EvoTreeCount,EvoTreeClassifier,EvoTreeGaussian}
+const EvoTypes{T,U,S} = Union{EvoTreeRegressor{T,U,S},EvoTreeCount{T,U,S},EvoTreeClassifier{T,U,S},EvoTreeGaussian{T,U,S}}
diff --git a/src/predict.jl b/src/predict.jl
index 426260a2..ba273759 100644
--- a/src/predict.jl
+++ b/src/predict.jl
@@ -1,5 +1,5 @@
 # prediction from single tree - assign each observation to its final leaf
-function predict!(pred, tree::Tree, X::AbstractMatrix{T}) where {T<:Real}
+function predict!(pred, tree::Tree{L,T,S}, X::AbstractMatrix) where {L,T,S}
     @inbounds @threads for i in 1:size(X,1)
         id = 1
         x = view(X, i, :)
@@ -15,19 +15,19 @@ function predict!(pred, tree::Tree, X::AbstractMatrix{T}) where {T<:Real}
 end
 
 # prediction from single tree - assign each observation to its final leaf
-function predict(tree::Tree, X::AbstractMatrix{T}, K) where T<:Real
-    pred = zeros(SVector{K,Float32}, size(X, 1))
+function predict(tree::Tree{L,T,S}, X::AbstractMatrix, K) where {L,T,S}
+    pred = zeros(SVector{K,T}, size(X, 1))
     predict!(pred, tree, X)
     return pred
 end
 
 # prediction from single tree - assign each observation to its final leaf
-function predict(model::GBTree, X::AbstractMatrix{T}) where T<:Real
-    pred = zeros(SVector{model.K,Float32}, size(X, 1))
+function predict(model::GBTree{L,T,S}, X::AbstractMatrix) where {L,T,S}
+    pred = zeros(SVector{model.K,T}, size(X, 1))
     for tree in model.trees
         predict!(pred, tree, X)
     end
-    pred = reinterpret(Float32, pred)
+    pred = reinterpret(T, pred)
     if typeof(model.params.loss) == Logistic
         @. pred = sigmoid(pred)
     elseif typeof(model.params.loss) == Poisson
diff --git a/test/MLJ.jl b/test/MLJ.jl
index 9c05bc5e..9e07a1e6 100644
--- a/test/MLJ.jl
+++ b/test/MLJ.jl
@@ -50,7 +50,7 @@ mach = machine(tree_model, X, y)
 train, test = partition(eachindex(y), 0.7, shuffle=true); # 70:30 split
 fit!(mach, rows=train, verbosity=1)
 
-mach.model.nrounds += 10
+mach.model.nrounds += 50
 fit!(mach, rows=train, verbosity=1)
 
 pred_train = predict(mach, selectrows(X,train))