Skip to content

Commit

Permalink
Merge pull request #182 from Evovest/gradients
Browse files Browse the repository at this point in the history
threaded gradients
  • Loading branch information
jeremiedb authored Oct 30, 2022
2 parents c398f45 + d6ba36a commit 3cbf6be
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 38 deletions.
34 changes: 34 additions & 0 deletions experiments/gradients.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using Revise
using EvoTrees
using Base.Threads

L = EvoTrees.Logistic
T = Float64
nobs = 1_000_000
y = rand(T, nobs)
pred = rand(T, 1, nobs)
K = 1
δ𝑤 = zeros(T, 2 * K + 1, nobs)
w = ones(T, nobs)
δ𝑤[end, :] .= w

# nthreads: 12
Threads.nthreads()

function update_grads_v1!(::Type{EvoTrees.Linear}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@inbounds for i in eachindex(y)
δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
δ𝑤[2, i] = 2 * δ𝑤[3, i]
end
end
# 958.670 μs (0 allocations: 0 bytes)
@btime update_grads_v1!(L, δ𝑤, pred, y)

function update_grads_v2!(::Type{EvoTrees.Linear}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@threads for i in eachindex(y)
@inbounds δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] = 2 * δ𝑤[3, i]
end
end
# 958.670 μs (0 allocations: 0 bytes)
@btime update_grads_v2!(L, δ𝑤, pred, y)
76 changes: 38 additions & 38 deletions src/loss.jl
Original file line number Diff line number Diff line change
@@ -1,65 +1,65 @@
# linear
function update_grads!(::Type{Linear}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@inbounds for i in eachindex(y)
δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
δ𝑤[2, i] = 2 * δ𝑤[3, i]
@threads for i in eachindex(y)
@inbounds δ𝑤[1, i] = 2 * (p[1, i] - y[i]) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] = 2 * δ𝑤[3, i]
end
end

# logistic - on linear predictor
function update_grads!(::Type{Logistic}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@inbounds for i in eachindex(y)
pred = sigmoid(p[1, i])
δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
δ𝑤[2, i] = pred * (1 - pred) * δ𝑤[3, i]
@threads for i in eachindex(y)
@inbounds pred = sigmoid(p[1, i])
@inbounds δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] = pred * (1 - pred) * δ𝑤[3, i]
end
end

# Poisson
function update_grads!(::Type{Poisson}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@inbounds for i in eachindex(y)
pred = exp(p[1, i])
δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
δ𝑤[2, i] = pred * δ𝑤[3, i]
@threads for i in eachindex(y)
@inbounds pred = exp(p[1, i])
@inbounds δ𝑤[1, i] = (pred - y[i]) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] = pred * δ𝑤[3, i]
end
end

# Gamma
function update_grads!(::Type{Gamma}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@inbounds for i in eachindex(y)
pred = exp(p[1, i])
δ𝑤[1, i] = 2 * (1 - y[i] / pred) * δ𝑤[3, i]
δ𝑤[2, i] = 2 * y[i] / pred * δ𝑤[3, i]
@threads for i in eachindex(y)
@inbounds pred = exp(p[1, i])
@inbounds δ𝑤[1, i] = 2 * (1 - y[i] / pred) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] = 2 * y[i] / pred * δ𝑤[3, i]
end
end

# Tweedie
function update_grads!(::Type{Tweedie}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
rho = eltype(p)(1.5)
@inbounds for i in eachindex(y)
pred = exp(p[1, i])
δ𝑤[1, i] = 2 * (pred^(2 - rho) - y[i] * pred^(1 - rho)) * δ𝑤[3, i]
δ𝑤[2, i] =
@threads for i in eachindex(y)
@inbounds pred = exp(p[1, i])
@inbounds δ𝑤[1, i] = 2 * (pred^(2 - rho) - y[i] * pred^(1 - rho)) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] =
2 * ((2 - rho) * pred^(2 - rho) - (1 - rho) * y[i] * pred^(1 - rho)) * δ𝑤[3, i]
end
end

# L1
function update_grads!(::Type{L1}, δ𝑤::Matrix, p::Matrix, y::Vector; alpha, kwargs...)
@inbounds for i in eachindex(y)
δ𝑤[1, i] =
@threads for i in eachindex(y)
@inbounds δ𝑤[1, i] =
(alpha * max(y[i] - p[1, i], 0) - (1 - alpha) * max(p[1, i] - y[i], 0)) *
δ𝑤[3, i]
end
end

# Softmax
function update_grads!(::Type{Softmax}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
p .= p .- maximum(p, dims = 1)
sums = sum(exp.(p), dims = 1)
p .= p .- maximum(p, dims=1)
sums = sum(exp.(p), dims=1)
K = (size(δ𝑤, 1) - 1) ÷ 2
for i in eachindex(y)
for k = 1:K
@threads for i in eachindex(y)
@inbounds for k = 1:K
# δ𝑤[k, i] = (exp(p[k, i]) / sums[i] - (onehot(y[i], 1:K))) * δ𝑤[2 * K + 1, i]
if k == y[i]
δ𝑤[k, i] = (exp(p[k, i]) / sums[i] - 1) * δ𝑤[2*K+1, i]
Expand All @@ -73,23 +73,23 @@ end

# Quantile
function update_grads!(::Type{Quantile}, δ𝑤::Matrix, p::Matrix, y::Vector; alpha, kwargs...)
@inbounds for i in eachindex(y)
δ𝑤[1, i] = y[i] > p[1, i] ? alpha * δ𝑤[3, i] : (alpha - 1) * δ𝑤[3, i]
δ𝑤[2, i] = y[i] - p[1, i] # δ² serves to calculate the quantile value - hence no weighting on δ²
@threads for i in eachindex(y)
@inbounds δ𝑤[1, i] = y[i] > p[1, i] ? alpha * δ𝑤[3, i] : (alpha - 1) * δ𝑤[3, i]
@inbounds δ𝑤[2, i] = y[i] - p[1, i] # δ² serves to calculate the quantile value - hence no weighting on δ²
end
end

# Gaussian - http://jrmeyer.github.io/machinelearning/2017/08/18/mle.html
# pred[i][1] = μ
# pred[i][2] = log(σ)
function update_grads!(::Type{GaussianDist}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
@inbounds @simd for i in eachindex(y)
@threads for i in eachindex(y)
# first order
δ𝑤[1, i] = (p[1, i] - y[i]) / exp(2 * p[2, i]) * δ𝑤[5, i]
δ𝑤[2, i] = (1 - (p[1, i] - y[i])^2 / exp(2 * p[2, i])) * δ𝑤[5, i]
@inbounds δ𝑤[1, i] = (p[1, i] - y[i]) / exp(2 * p[2, i]) * δ𝑤[5, i]
@inbounds δ𝑤[2, i] = (1 - (p[1, i] - y[i])^2 / exp(2 * p[2, i])) * δ𝑤[5, i]
# second order
δ𝑤[3, i] = δ𝑤[5, i] / exp(2 * p[2, i])
δ𝑤[4, i] = δ𝑤[5, i] * 2 / exp(2 * p[2, i]) * (p[1, i] - y[i])^2
@inbounds δ𝑤[3, i] = δ𝑤[5, i] / exp(2 * p[2, i])
@inbounds δ𝑤[4, i] = δ𝑤[5, i] * 2 / exp(2 * p[2, i]) * (p[1, i] - y[i])^2
end
end

Expand All @@ -99,20 +99,20 @@ end
# pred[i][2] = log(s)
function update_grads!(::Type{LogisticDist}, δ𝑤::Matrix, p::Matrix, y::Vector; kwargs...)
ϵ = eltype(p)(2e-7)
@inbounds @simd for i in eachindex(y)
@threads for i in eachindex(y)
# first order
δ𝑤[1, i] = -tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) * exp(-p[2, i]) * δ𝑤[5, i]
δ𝑤[2, i] =
@inbounds δ𝑤[1, i] = -tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) * exp(-p[2, i]) * δ𝑤[5, i]
@inbounds δ𝑤[2, i] =
-(
exp(-p[2, i]) *
(y[i] - p[1, i]) *
tanh((y[i] - p[1, i]) / (2 * exp(p[2, i]))) - 1
) * δ𝑤[5, i]
# second order
δ𝑤[3, i] =
@inbounds δ𝑤[3, i] =
sech((y[i] - p[1, i]) / (2 * exp(p[2, i])))^2 / (2 * exp(2 * p[2, i])) *
δ𝑤[5, i]
δ𝑤[4, i] =
@inbounds δ𝑤[4, i] =
(
exp(-2 * p[2, i]) *
(p[1, i] - y[i]) *
Expand Down

2 comments on commit 3cbf6be

@jeremiedb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/71298

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.12.5 -m "<description of version>" 3cbf6be58d52aad84cf94bd9d97e2a0cff3274c8
git push origin v0.12.5

Please sign in to comment.