diff --git a/src/Polar/derivatives.jl b/src/Polar/derivatives.jl index 9d207e26..c5d8c132 100644 --- a/src/Polar/derivatives.jl +++ b/src/Polar/derivatives.jl @@ -98,7 +98,7 @@ function AutoDiff.jacobian!(polar::PolarForm, jac::AutoDiff.Jacobian, buffer) jac.t1sF .= 0.0 end - AutoDiff.seed!(jac.t1sseeds, jac.varx, jac.t1svarx, nbus) + AutoDiff.seed!(jac.t1sseeds, jac.varx, jac.t1svarx) if isa(type, State) jac.func( @@ -120,7 +120,7 @@ function AutoDiff.jacobian!(polar::PolarForm, jac::AutoDiff.Jacobian, buffer) ) end - AutoDiff.getpartials_kernel!(jac.compressedJ, jac.t1sF, nbus) + AutoDiff.getpartials_kernel!(jac.compressedJ, jac.t1sF) AutoDiff.uncompress_kernel!(jac.J, jac.compressedJ, jac.coloring) return jac.J end @@ -176,15 +176,17 @@ function AutoDiff.Hessian(polar::PolarForm{T, VI, VT, MT}, func) where {T, VI, V t1s{N} = ForwardDiff.Dual{Nothing,Float64, N} where N t1sx = A{t1s{1}}(x) t1sF = A{t1s{1}}(zeros(Float64, n_cons)) + host_t1sseeds = Vector{ForwardDiff.Partials{1,Float64}}(undef, nmap) t1sseeds = A{ForwardDiff.Partials{1,Float64}}(undef, nmap) varx = view(x, map) t1svarx = view(t1sx, map) + VHP = typeof(host_t1sseeds) VP = typeof(t1sseeds) VD = typeof(t1sx) adj_t1sx = similar(t1sx) adj_t1sF = similar(t1sF) - return AutoDiff.Hessian{typeof(func), VI, VT, VP, VD, typeof(varx), typeof(t1svarx)}( - func, t1sseeds, t1sF, adj_t1sF, x, t1sx, adj_t1sx, map, varx, t1svarx + return AutoDiff.Hessian{typeof(func), VI, VT, VHP, VP, VD, typeof(varx), typeof(t1svarx)}( + func, host_t1sseeds, t1sseeds, t1sF, adj_t1sF, x, t1sx, adj_t1sx, map, varx, t1svarx ) end @@ -214,10 +216,12 @@ function AutoDiff.adj_hessian_prod!( nmap = length(H.map) # Init seed - @inbounds for i in 1:nmap - H.t1sseeds[i] = ForwardDiff.Partials{1, Float64}(NTuple{1, Float64}(v[i])) + hostv = Array(v) + @inbounds Threads.@threads for i in 1:nmap + H.host_t1sseeds[i] = ForwardDiff.Partials{1, Float64}(NTuple{1, Float64}(hostv[i])) end - AutoDiff.seed!(H.t1sseeds, H.varx, H.t1svarx, nbus) + copyto!(H.t1sseeds, H.host_t1sseeds) + AutoDiff.seed!(H.t1sseeds, H.varx, H.t1svarx) adjoint!( polar, H.func, @@ -228,9 +232,7 @@ function AutoDiff.adj_hessian_prod!( view(t1sx, 3*nbus+1:4*nbus), view(adj_t1sx, 3*nbus+1:4*nbus), # qinj ) - @inbounds for i in 1:length(hv) - hv[i] = ForwardDiff.partials(adj_t1sx[H.map[i]]).values[1] - end + AutoDiff.getpartials_kernel!(hv, adj_t1sx, H.map) return nothing end diff --git a/src/autodiff.jl b/src/autodiff.jl index 3e7128a0..4433b949 100644 --- a/src/autodiff.jl +++ b/src/autodiff.jl @@ -89,8 +89,9 @@ Creates an object for computing Hessian adjoint tangent projections. * `varx::SubT`: View of `map` on `x` * `t1svarx::SubD`: Active (AD) view of `map` on `x` """ -struct Hessian{Func, VI, VT, VP, VD, SubT, SubD} <: AbstractHessian +struct Hessian{Func, VI, VT, VHP, VP, VD, SubT, SubD} <: AbstractHessian func::Func + host_t1sseeds::VHP # Needed because seeds have to be created on the host t1sseeds::VP t1sF::VD ∂t1sF::VD @@ -139,7 +140,7 @@ Calling the seeding kernel. Seeding is parallelized over the `ncolor` number of duals. """ -function seed!(t1sseeds, varx, t1svarx, nbus) where {N, V} +function seed!(t1sseeds, varx, t1svarx) if isa(t1sseeds, Vector) kernel! = seed_kernel!(CPU()) else @@ -163,15 +164,31 @@ end end end +# Get partials for Hessian projection +@kernel function getpartials_hv_kernel!(hv, adj_t1sx, map) + i = @index(Global, Linear) + hv[i] = ForwardDiff.partials(adj_t1sx[map[i]]).values[1] +end + """ - getpartials_kernel!(compressedJ, t1sF, nbus) + getpartials_kernel!(compressedJ, t1sF) Calling the partial extraction kernel. Extract the partials from the AutoDiff dual type on the target device and put it in the compressed Jacobian `compressedJ`. """ -function getpartials_kernel!(compressedJ, t1sF, nbus) +function getpartials_kernel!(hv::AbstractVector, adj_t1sx, map) + if isa(hv, Array) + kernel! = getpartials_hv_kernel!(CPU()) + else + kernel! = getpartials_hv_kernel!(CUDADevice()) + end + ev = kernel!(hv, adj_t1sx, map, ndrange=length(hv)) + wait(ev) +end + +function getpartials_kernel!(compressedJ::AbstractMatrix, t1sF) if isa(compressedJ, Array) kernel! = getpartials_kernel_cpu!(CPU()) else @@ -215,68 +232,6 @@ function uncompress_kernel!(J, compressedJ, coloring) wait(ev) end -""" - residual_hessian_adj_tgt!(H::Hessian, - residual_adj_polar!, - lambda, tgt, - vm, va, ybus_re, ybus_im, pinj, qinj, pv, pq, ref, - nbus) - -Update the sparse Jacobian entries using AutoDiff. No allocations are taking place in this function. - -* `H::Hessian`: Factory created Jacobian object to update -* `residual_adj_polar`: Adjoint function of residual -* `lambda`: Input adjoint, usually lambda from a Langrangian -* `tgt`: A tangent direction or vector that the Hessian should be multiplied with -* `vm, va, ybus_re, ybus_im, pinj, qinj, pv, pq, ref, nbus`: Inputs both - active and passive parameters. Active inputs are mapped to `x` via the preallocated views. -""" -function tgt_adj_residual_hessian!(H::Hessian, - adj_residual_polar!, - lambda, tgt, - v_m, v_a, ybus_re, ybus_im, pinj, qinj, pv, pq, ref, nbus) - @warn("Function `tgt_adj_residual_hessian!` is deprecated.") - x = H.x - ntgt = length(tgt) - nvbus = length(v_m) - ninj = length(pinj) - t1sx = H.t1sx - adj_t1sx = similar(t1sx) - t1sF = H.t1sF - adj_t1sF = similar(t1sF) - x[1:nvbus] .= v_m - x[nvbus+1:2*nvbus] .= v_a - x[2*nvbus+1:2*nvbus+ninj] .= pinj - t1sx .= H.x - adj_t1sx .= 0.0 - t1sF .= 0.0 - adj_t1sF .= lambda - # Seeding - nmap = length(H.map) - t1sseedvec = zeros(Float64, length(x)) - for i in 1:nmap - H.t1sseeds[i] = ForwardDiff.Partials{1, Float64}(NTuple{1, Float64}(tgt[i])) - end - seed!(H.t1sseeds, H.varx, H.t1svarx, nbus) - adj_residual_polar!( - t1sF, adj_t1sF, - view(t1sx, 1:nvbus), view(adj_t1sx, 1:nvbus), - view(t1sx, nvbus+1:2*nvbus), view(adj_t1sx, nvbus+1:2*nvbus), - ybus_re, ybus_im, - view(t1sx, 2*nvbus+1:2*nvbus+ninj), view(adj_t1sx, 2*nvbus+1:2*nvbus+ninj), - qinj, - pv, pq, nbus - ) - # TODO, this is redundant - ps = ForwardDiff.partials.(adj_t1sx[H.map]) - res = similar(tgt) - res .= 0.0 - for i in 1:length(ps) - res[i] = ps[i].values[1] - end - return res -end - function Base.show(io::IO, jacobian::Jacobian) println(io, "A AutoDiff Jacobian for $(jacobian.func) (w.r.t. $(jacobian.var))") ncolor = size(jacobian.compressedJ, 1) diff --git a/test/Polar/hessian.jl b/test/Polar/hessian.jl index fb1fe75b..c0e19910 100644 --- a/test/Polar/hessian.jl +++ b/test/Polar/hessian.jl @@ -132,11 +132,6 @@ const PS = PowerSystem dev_λ = T(λ) tgt[nx+1:end] .= 0.0 dev_tgt = T(tgt) - dev_projxx = ExaPF.AutoDiff.tgt_adj_residual_hessian!( - HessianAD, ExaPF.adj_residual_polar!, dev_λ, dev_tgt, vm, va, - ybus_re, ybus_im, pbus, qbus, dev_pv, dev_pq, dev_ref, nbus) - projxx = Array(dev_projxx) - @test isapprox(projxx[1:nx], ∇²gλ.xx * tgt[1:nx]) AutoDiff.adj_hessian_prod!(polar, HessianAD, dev_projp, cache, dev_λ, dev_tgt) projp = Array(dev_projp) @test isapprox(projp[1:nx], ∇²gλ.xx * tgt[1:nx]) @@ -145,14 +140,9 @@ const PS = PowerSystem # set tangents only for u direction tgt[1:nx] .= 0.0 dev_tgt = T(tgt) - dev_projuu = AutoDiff.tgt_adj_residual_hessian!( - HessianAD, ExaPF.adj_residual_polar!, dev_λ, dev_tgt, vm, va, - ybus_re, ybus_im, pbus, qbus, dev_pv, dev_pq, dev_ref, nbus) - projuu = Array(dev_projuu) AutoDiff.adj_hessian_prod!(polar, HessianAD, dev_projp, cache, dev_λ, dev_tgt) projp = Array(dev_projp) # (we use absolute tolerance as Huu is equal to 0 for case9) - @test isapprox(projuu[nx+1:end], ∇²gλ.uu * tgt[nx+1:end], atol=ATOL) @test isapprox(projp[nx+1:end], ∇²gλ.uu * tgt[nx+1:end], atol=ATOL) # check cross terms ux @@ -164,11 +154,6 @@ const PS = PowerSystem ∇²gλ.xu ∇²gλ.uu ] dev_tgt = T(tgt) - dev_projxu = ExaPF.AutoDiff.tgt_adj_residual_hessian!( - HessianAD, ExaPF.adj_residual_polar!, dev_λ, dev_tgt, vm, va, - ybus_re, ybus_im, pbus, qbus, dev_pv, dev_pq, dev_ref, nbus) - projxu = Array(dev_projxu) - @test isapprox(projxu, H * tgt) AutoDiff.adj_hessian_prod!(polar, HessianAD, dev_projp, cache, dev_λ, dev_tgt) projp = Array(dev_projp) @test isapprox(projp, H * tgt)