Merge pull request #182 from Evovest/gradients

threaded gradients
Evovest · Oct 30, 2022 · 3cbf6be · 3cbf6be · jeremiedb · Oct 30, 2022
2 parents c398f45 + d6ba36a
commit 3cbf6be
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 38 deletions.
diff --git a/experiments/gradients.jl b/experiments/gradients.jl
@@ -0,0 +1,34 @@
+using Revise
+using EvoTrees
+using Base.Threads
+
+L = EvoTrees.Logistic
+T = Float64
+nobs = 1_000_000
+y = rand(T, nobs)
+pred = rand(T, 1, nobs)
+K = 1
+δ𝑤 = zeros(T, 2 * K + 1, nobs)
+w = ones(T, nobs)
+δ𝑤[end, :] .= w
+
+# nthreads: 12
+Threads.nthreads()
+
+function update_grads_v1!(::Type{EvoTrees.Linear}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
+    @inbounds for i in eachindex(y)
+        δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
+        δ𝑤[2, i] = 2 * δ𝑤[3, i]
+    end
+end
+# 958.670 μs (0 allocations: 0 bytes)
+@btime update_grads_v1!(L, δ𝑤, pred, y)
+
+function update_grads_v2!(::Type{EvoTrees.Linear}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
+    @threads for i in eachindex(y)
+        @inbounds δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] = 2 * δ𝑤[3, i]
+    end
+end
+# 958.670 μs (0 allocations: 0 bytes)
+@btime update_grads_v2!(L, δ𝑤, pred, y)
diff --git a/src/loss.jl b/src/loss.jl
@@ -1,65 +1,65 @@
 # linear
 function update_grads!(::Type{Linear}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    @inbounds for i in eachindex(y)
-        δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
-        δ𝑤[2, i] = 2 * δ𝑤[3, i]
+    @threads for i in eachindex(y)
+        @inbounds δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] = 2 * δ𝑤[3, i]
     end
 end
 
 # logistic - on linear predictor
 function update_grads!(::Type{Logistic}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    @inbounds for i in eachindex(y)
-        pred = sigmoid(p[1, i])
-        δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
-        δ𝑤[2, i] = pred * (1 - pred) * δ𝑤[3, i]
+    @threads for i in eachindex(y)
+        @inbounds pred = sigmoid(p[1, i])
+        @inbounds δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] = pred * (1 - pred) * δ𝑤[3, i]
     end
 end
 
 # Poisson
 function update_grads!(::Type{Poisson}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    @inbounds for i in eachindex(y)
-        pred = exp(p[1, i])
-        δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
-        δ𝑤[2, i] = pred * δ𝑤[3, i]
+    @threads for i in eachindex(y)
+        @inbounds pred = exp(p[1, i])
+        @inbounds δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] = pred * δ𝑤[3, i]
     end
 end
 
 # Gamma
 function update_grads!(::Type{Gamma}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    @inbounds for i in eachindex(y)
-        pred = exp(p[1, i])
-        δ𝑤[1, i] = 2 * (1 - y[i] / pred) * δ𝑤[3, i]
-        δ𝑤[2, i] = 2 * y[i] / pred * δ𝑤[3, i]
+    @threads for i in eachindex(y)
+        @inbounds pred = exp(p[1, i])
+        @inbounds δ𝑤[1, i] = 2 * (1 - y[i] / pred) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] = 2 * y[i] / pred * δ𝑤[3, i]
     end
 end
 
 # Tweedie
 function update_grads!(::Type{Tweedie}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
     rho = eltype(p)(1.5)
-    @inbounds for i in eachindex(y)
-        pred = exp(p[1, i])
-        δ𝑤[1, i] = 2 * (pred^(2 - rho) - y[i] * pred^(1 - rho)) * δ𝑤[3, i]
-        δ𝑤[2, i] =
+    @threads for i in eachindex(y)
+        @inbounds pred = exp(p[1, i])
+        @inbounds δ𝑤[1, i] = 2 * (pred^(2 - rho) - y[i] * pred^(1 - rho)) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] =
             2 * ((2 - rho) * pred^(2 - rho) - (1 - rho) * y[i] * pred^(1 - rho)) * δ𝑤[3, i]
     end
 end
 
 # L1
 function update_grads!(::Type{L1}, δ𝑤::Matrix, p::Matrix, y::Vector; alpha, kwargs...)
-    @inbounds for i in eachindex(y)
-        δ𝑤[1, i] =
+    @threads for i in eachindex(y)
+        @inbounds δ𝑤[1, i] =
             (alpha * max(y[i] - p[1, i], 0) - (1 - alpha) * max(p[1, i] - y[i], 0)) *
             δ𝑤[3, i]
     end
 end
 
 # Softmax
 function update_grads!(::Type{Softmax}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    p .= p .- maximum(p, dims = 1)
-    sums = sum(exp.(p), dims = 1)
+    p .= p .- maximum(p, dims=1)
+    sums = sum(exp.(p), dims=1)
     K = (size(δ𝑤, 1) - 1) ÷ 2
-    for i in eachindex(y)
-        for k = 1:K
+    @threads for i in eachindex(y)
+        @inbounds for k = 1:K
             # δ𝑤[k, i] = (exp(p[k, i]) / sums[i] - (onehot(y[i], 1:K))) * δ𝑤[2 * K + 1, i]
             if k == y[i]
                 δ𝑤[k, i] = (exp(p[k, i]) / sums[i] - 1) * δ𝑤[2*K+1, i]
@@ -73,23 +73,23 @@ end
 
 # Quantile
 function update_grads!(::Type{Quantile}, δ𝑤::Matrix, p::Matrix, y::Vector; alpha, kwargs...)
-    @inbounds for i in eachindex(y)
-        δ𝑤[1, i] = y[i] > p[1, i] ? alpha * δ𝑤[3, i] : (alpha - 1) * δ𝑤[3, i]
-        δ𝑤[2, i] = y[i] - p[1, i] # δ² serves to calculate the quantile value - hence no weighting on δ²
+    @threads for i in eachindex(y)
+        @inbounds δ𝑤[1, i] = y[i] > p[1, i] ? alpha * δ𝑤[3, i] : (alpha - 1) * δ𝑤[3, i]
+        @inbounds δ𝑤[2, i] = y[i] - p[1, i] # δ² serves to calculate the quantile value - hence no weighting on δ²
     end
 end
 
 # Gaussian - http://jrmeyer.github.io/machinelearning/2017/08/18/mle.html
 # pred[i][1] = μ
 # pred[i][2] = log(σ)
 function update_grads!(::Type{GaussianDist}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
-    @inbounds @simd for i in eachindex(y)
+    @threads for i in eachindex(y)
         # first order
-        δ𝑤[1, i] = (p[1, i] - y[i]) / exp(2 * p[2, i]) * δ𝑤[5, i]
-        δ𝑤[2, i] = (1 - (p[1, i] - y[i])^2 / exp(2 * p[2, i])) * δ𝑤[5, i]
+        @inbounds δ𝑤[1, i] = (p[1, i] - y[i]) / exp(2 * p[2, i]) * δ𝑤[5, i]
+        @inbounds δ𝑤[2, i] = (1 - (p[1, i] - y[i])^2 / exp(2 * p[2, i])) * δ𝑤[5, i]
         # second order
-        δ𝑤[3, i] = δ𝑤[5, i] / exp(2 * p[2, i])
-        δ𝑤[4, i] = δ𝑤[5, i] * 2 / exp(2 * p[2, i]) * (p[1, i] - y[i])^2
+        @inbounds δ𝑤[3, i] = δ𝑤[5, i] / exp(2 * p[2, i])
+        @inbounds δ𝑤[4, i] = δ𝑤[5, i] * 2 / exp(2 * p[2, i]) * (p[1, i] - y[i])^2
     end
 end
 
@@ -99,20 +99,20 @@ end
 # pred[i][2] = log(s)
 function update_grads!(::Type{LogisticDist}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
     ϵ = eltype(p)(2e-7)
-    @inbounds @simd for i in eachindex(y)
+    @threads for i in eachindex(y)
         # first order
-        δ𝑤[1, i] = -tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) * exp(-p[2, i]) * δ𝑤[5, i]
-        δ𝑤[2, i] =
+        @inbounds δ𝑤[1, i] = -tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) * exp(-p[2, i]) * δ𝑤[5, i]
+        @inbounds δ𝑤[2, i] =
             -(
                 exp(-p[2, i]) *
                 (y[i] - p[1, i]) *
                 tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) - 1
             ) * δ𝑤[5, i]
         # second order
-        δ𝑤[3, i] =
+        @inbounds δ𝑤[3, i] =
             sech((y[i] - p[1, i]) / (2 * exp(p[2, i])))^2 / (2 * exp(2 * p[2, i])) *
             δ𝑤[5, i]
-        δ𝑤[4, i] =
+        @inbounds δ𝑤[4, i] =
             (
                 exp(-2 * p[2, i]) *
                 (p[1, i] - y[i]) *