diff --git a/src/Polar/derivatives.jl b/src/Polar/derivatives.jl
index 9d207e26..c5d8c132 100644
--- a/src/Polar/derivatives.jl
+++ b/src/Polar/derivatives.jl
@@ -98,7 +98,7 @@ function AutoDiff.jacobian!(polar::PolarForm, jac::AutoDiff.Jacobian, buffer)
         jac.t1sF .= 0.0
     end
 
-    AutoDiff.seed!(jac.t1sseeds, jac.varx, jac.t1svarx, nbus)
+    AutoDiff.seed!(jac.t1sseeds, jac.varx, jac.t1svarx)
 
     if isa(type, State)
         jac.func(
@@ -120,7 +120,7 @@ function AutoDiff.jacobian!(polar::PolarForm, jac::AutoDiff.Jacobian, buffer)
         )
     end
 
-    AutoDiff.getpartials_kernel!(jac.compressedJ, jac.t1sF, nbus)
+    AutoDiff.getpartials_kernel!(jac.compressedJ, jac.t1sF)
     AutoDiff.uncompress_kernel!(jac.J, jac.compressedJ, jac.coloring)
     return jac.J
 end
@@ -176,15 +176,17 @@ function AutoDiff.Hessian(polar::PolarForm{T, VI, VT, MT}, func) where {T, VI, V
     t1s{N} = ForwardDiff.Dual{Nothing,Float64, N} where N
     t1sx = A{t1s{1}}(x)
     t1sF = A{t1s{1}}(zeros(Float64, n_cons))
+    host_t1sseeds = Vector{ForwardDiff.Partials{1,Float64}}(undef, nmap)
     t1sseeds = A{ForwardDiff.Partials{1,Float64}}(undef, nmap)
     varx = view(x, map)
     t1svarx = view(t1sx, map)
+    VHP = typeof(host_t1sseeds)
     VP = typeof(t1sseeds)
     VD = typeof(t1sx)
     adj_t1sx = similar(t1sx)
     adj_t1sF = similar(t1sF)
-    return AutoDiff.Hessian{typeof(func), VI, VT, VP, VD, typeof(varx), typeof(t1svarx)}(
-        func, t1sseeds, t1sF, adj_t1sF, x, t1sx, adj_t1sx, map, varx, t1svarx
+    return AutoDiff.Hessian{typeof(func), VI, VT, VHP, VP, VD, typeof(varx), typeof(t1svarx)}(
+        func, host_t1sseeds, t1sseeds, t1sF, adj_t1sF, x, t1sx, adj_t1sx, map, varx, t1svarx
     )
 end
 
@@ -214,10 +216,12 @@ function AutoDiff.adj_hessian_prod!(
     nmap = length(H.map)
 
     # Init seed
-    @inbounds for i in 1:nmap
-        H.t1sseeds[i] = ForwardDiff.Partials{1, Float64}(NTuple{1, Float64}(v[i]))
+    hostv = Array(v)
+    @inbounds Threads.@threads for i in 1:nmap
+        H.host_t1sseeds[i] = ForwardDiff.Partials{1, Float64}(NTuple{1, Float64}(hostv[i]))
     end
-    AutoDiff.seed!(H.t1sseeds, H.varx, H.t1svarx, nbus)
+    copyto!(H.t1sseeds, H.host_t1sseeds)
+    AutoDiff.seed!(H.t1sseeds, H.varx, H.t1svarx)
 
     adjoint!(
         polar, H.func,
@@ -228,9 +232,7 @@ function AutoDiff.adj_hessian_prod!(
         view(t1sx, 3*nbus+1:4*nbus), view(adj_t1sx, 3*nbus+1:4*nbus), # qinj
     )
 
-    @inbounds for i in 1:length(hv)
-        hv[i] = ForwardDiff.partials(adj_t1sx[H.map[i]]).values[1]
-    end
+    AutoDiff.getpartials_kernel!(hv, adj_t1sx, H.map)
     return nothing
 end
 
diff --git a/src/autodiff.jl b/src/autodiff.jl
index 3e7128a0..4433b949 100644
--- a/src/autodiff.jl
+++ b/src/autodiff.jl
@@ -89,8 +89,9 @@ Creates an object for computing Hessian adjoint tangent projections.
 * `varx::SubT`: View of `map` on `x`
 * `t1svarx::SubD`: Active (AD) view of `map` on `x`
 """
-struct Hessian{Func, VI, VT, VP, VD, SubT, SubD} <: AbstractHessian
+struct Hessian{Func, VI, VT, VHP, VP, VD, SubT, SubD} <: AbstractHessian
     func::Func
+    host_t1sseeds::VHP # Needed because seeds have to be created on the host
     t1sseeds::VP
     t1sF::VD
     ∂t1sF::VD
@@ -139,7 +140,7 @@ Calling the seeding kernel.
 Seeding is parallelized over the `ncolor` number of duals.
 
 """
-function seed!(t1sseeds, varx, t1svarx, nbus) where {N, V}
+function seed!(t1sseeds, varx, t1svarx)
     if isa(t1sseeds, Vector)
         kernel! = seed_kernel!(CPU())
     else
@@ -163,15 +164,31 @@ end
     end
 end
 
+# Get partials for Hessian projection
+@kernel function getpartials_hv_kernel!(hv, adj_t1sx, map)
+    i = @index(Global, Linear)
+    hv[i] = ForwardDiff.partials(adj_t1sx[map[i]]).values[1]
+end
+
 """
-    getpartials_kernel!(compressedJ, t1sF, nbus)
+    getpartials_kernel!(compressedJ, t1sF)
 
 Calling the partial extraction kernel.
 Extract the partials from the AutoDiff dual type on the target
 device and put it in the compressed Jacobian `compressedJ`.
 
 """
-function getpartials_kernel!(compressedJ, t1sF, nbus)
+function getpartials_kernel!(hv::AbstractVector, adj_t1sx, map)
+    if isa(hv, Array)
+        kernel! = getpartials_hv_kernel!(CPU())
+    else
+        kernel! = getpartials_hv_kernel!(CUDADevice())
+    end
+    ev = kernel!(hv, adj_t1sx, map, ndrange=length(hv))
+    wait(ev)
+end
+
+function getpartials_kernel!(compressedJ::AbstractMatrix, t1sF)
     if isa(compressedJ, Array)
         kernel! = getpartials_kernel_cpu!(CPU())
     else
@@ -215,68 +232,6 @@ function uncompress_kernel!(J, compressedJ, coloring)
     wait(ev)
 end
 
-"""
-    residual_hessian_adj_tgt!(H::Hessian,
-                        residual_adj_polar!,
-                        lambda, tgt,
-                        vm, va, ybus_re, ybus_im, pinj, qinj, pv, pq, ref,
-                        nbus)
-
-Update the sparse Jacobian entries using AutoDiff. No allocations are taking place in this function.
-
-* `H::Hessian`: Factory created Jacobian object to update
-* `residual_adj_polar`: Adjoint function of residual
-* `lambda`: Input adjoint, usually lambda from a Langrangian
-* `tgt`: A tangent direction or vector that the Hessian should be multiplied with
-* `vm, va, ybus_re, ybus_im, pinj, qinj, pv, pq, ref, nbus`: Inputs both
-  active and passive parameters. Active inputs are mapped to `x` via the preallocated views.
-"""
-function tgt_adj_residual_hessian!(H::Hessian,
-                             adj_residual_polar!,
-                             lambda, tgt,
-                             v_m, v_a, ybus_re, ybus_im, pinj, qinj, pv, pq, ref, nbus)
-    @warn("Function `tgt_adj_residual_hessian!` is deprecated.")
-    x = H.x
-    ntgt = length(tgt)
-    nvbus = length(v_m)
-    ninj = length(pinj)
-    t1sx = H.t1sx
-    adj_t1sx = similar(t1sx)
-    t1sF = H.t1sF
-    adj_t1sF = similar(t1sF)
-    x[1:nvbus] .= v_m
-    x[nvbus+1:2*nvbus] .= v_a
-    x[2*nvbus+1:2*nvbus+ninj] .= pinj
-    t1sx .= H.x
-    adj_t1sx .= 0.0
-    t1sF .= 0.0
-    adj_t1sF .= lambda
-    # Seeding
-    nmap = length(H.map)
-    t1sseedvec = zeros(Float64, length(x))
-    for i in 1:nmap
-        H.t1sseeds[i] = ForwardDiff.Partials{1, Float64}(NTuple{1, Float64}(tgt[i]))
-    end
-    seed!(H.t1sseeds, H.varx, H.t1svarx, nbus)
-    adj_residual_polar!(
-        t1sF, adj_t1sF,
-        view(t1sx, 1:nvbus), view(adj_t1sx, 1:nvbus),
-        view(t1sx, nvbus+1:2*nvbus), view(adj_t1sx, nvbus+1:2*nvbus),
-        ybus_re, ybus_im,
-        view(t1sx, 2*nvbus+1:2*nvbus+ninj), view(adj_t1sx, 2*nvbus+1:2*nvbus+ninj),
-        qinj,
-        pv, pq, nbus
-    )
-    # TODO, this is redundant
-    ps = ForwardDiff.partials.(adj_t1sx[H.map])
-    res = similar(tgt)
-    res .= 0.0
-    for i in 1:length(ps)
-        res[i] = ps[i].values[1]
-    end
-    return res
-end
-
 function Base.show(io::IO, jacobian::Jacobian)
     println(io, "A AutoDiff Jacobian for $(jacobian.func) (w.r.t. $(jacobian.var))")
     ncolor = size(jacobian.compressedJ, 1)
diff --git a/test/Polar/hessian.jl b/test/Polar/hessian.jl
index fb1fe75b..c0e19910 100644
--- a/test/Polar/hessian.jl
+++ b/test/Polar/hessian.jl
@@ -132,11 +132,6 @@ const PS = PowerSystem
             dev_λ = T(λ)
             tgt[nx+1:end] .= 0.0
             dev_tgt = T(tgt)
-            dev_projxx = ExaPF.AutoDiff.tgt_adj_residual_hessian!(
-                HessianAD, ExaPF.adj_residual_polar!, dev_λ, dev_tgt, vm, va,
-                ybus_re, ybus_im, pbus, qbus, dev_pv, dev_pq, dev_ref, nbus)
-            projxx = Array(dev_projxx)
-            @test isapprox(projxx[1:nx], ∇²gλ.xx * tgt[1:nx])
             AutoDiff.adj_hessian_prod!(polar, HessianAD, dev_projp, cache, dev_λ, dev_tgt)
             projp = Array(dev_projp)
             @test isapprox(projp[1:nx], ∇²gλ.xx * tgt[1:nx])
@@ -145,14 +140,9 @@ const PS = PowerSystem
             # set tangents only for u direction
             tgt[1:nx] .= 0.0
             dev_tgt = T(tgt)
-            dev_projuu = AutoDiff.tgt_adj_residual_hessian!(
-                HessianAD, ExaPF.adj_residual_polar!, dev_λ, dev_tgt, vm, va,
-                ybus_re, ybus_im, pbus, qbus, dev_pv, dev_pq, dev_ref, nbus)
-            projuu = Array(dev_projuu)
             AutoDiff.adj_hessian_prod!(polar, HessianAD, dev_projp, cache, dev_λ, dev_tgt)
             projp = Array(dev_projp)
             # (we use absolute tolerance as Huu is equal to 0 for case9)
-            @test isapprox(projuu[nx+1:end], ∇²gλ.uu * tgt[nx+1:end], atol=ATOL)
             @test isapprox(projp[nx+1:end], ∇²gλ.uu * tgt[nx+1:end], atol=ATOL)
 
             # check cross terms ux
@@ -164,11 +154,6 @@ const PS = PowerSystem
                 ∇²gλ.xu ∇²gλ.uu
             ]
             dev_tgt = T(tgt)
-            dev_projxu = ExaPF.AutoDiff.tgt_adj_residual_hessian!(
-                HessianAD, ExaPF.adj_residual_polar!, dev_λ, dev_tgt, vm, va,
-                ybus_re, ybus_im, pbus, qbus, dev_pv, dev_pq, dev_ref, nbus)
-            projxu = Array(dev_projxu)
-            @test isapprox(projxu, H * tgt)
             AutoDiff.adj_hessian_prod!(polar, HessianAD, dev_projp, cache, dev_λ, dev_tgt)
             projp = Array(dev_projp)
             @test isapprox(projp, H * tgt)