From 83b6b6bb349e7b8e511986bbf32a7b369686047e Mon Sep 17 00:00:00 2001
From: fpacaud <francoispacaud8@gmail.com>
Date: Wed, 10 Mar 2021 16:16:57 +0100
Subject: [PATCH] evaluator: add reduced Hessian in ProxALEvaluator

* add a dedicated function in `src/Polar/objective.jl`
  for Hessian of ProxAL's objective
* add two functions _second_order_adjoint! for z and \psi
* compatible with GPU
---
 src/Evaluators/proxal_evaluators.jl |  94 +++++++++++++++++++------
 src/Evaluators/reduced_evaluator.jl |  91 ++++++++++++++-----------
 src/Polar/objective.jl              | 102 +++++++++++++++++++++++++++-
 test/Evaluators/proxal_evaluator.jl |  14 +++-
 4 files changed, 238 insertions(+), 63 deletions(-)

diff --git a/src/Evaluators/proxal_evaluators.jl b/src/Evaluators/proxal_evaluators.jl
index 54067e8c..ad7a7600 100644
--- a/src/Evaluators/proxal_evaluators.jl
+++ b/src/Evaluators/proxal_evaluators.jl
@@ -175,8 +175,25 @@ function objective(nlp::ProxALEvaluator, w)
     return cost
 end
 
+function _adjoint_objective!(nlp::ProxALEvaluator, ∂f, pg)
+    # Import model
+    model = nlp.inner.model
+    ## Seed left-hand side vector
+    adjoint_cost!(model, ∂f, pg)
+    if nlp.time != Origin
+        ∂f .-= nlp.λf
+        ∂f .-= nlp.ρf .* nlp.ramp_link_prev
+    end
+    if nlp.time != Final
+        ∂f .+= nlp.λt
+        ∂f .+= nlp.ρt .* nlp.ramp_link_next
+    end
+    ∂f .+= nlp.τ .* (pg .- nlp.pg_ref)
+    return
+end
+
 ## Gradient
-function full_gradient!(nlp::ProxALEvaluator, jvx, jvu, w)
+function full_gradient!(nlp::ProxALEvaluator, jx, ju, w)
     # Import model
     model = nlp.inner.model
     # Import buffer (has been updated previously in update!)
@@ -190,29 +207,15 @@ function full_gradient!(nlp::ProxALEvaluator, jvx, jvu, w)
 
     u = @view w[1:nlp.nu]
 
-    ## Objective's coefficients
-    coefs = model.costs_coefficients
-    c3 = @view coefs[:, 3]
-    c4 = @view coefs[:, 4]
-    ## Seed left-hand side vector
-    ∂obj.∂pg .= scale_obj .* (c3 .+ 2.0 .* c4 .* pg)
-    if nlp.time != Origin
-        ∂obj.∂pg .-= nlp.λf
-        ∂obj.∂pg .-= nlp.ρf .* nlp.ramp_link_prev
-    end
-    if nlp.time != Final
-        ∂obj.∂pg .+= nlp.λt
-        ∂obj.∂pg .+= nlp.ρt .* nlp.ramp_link_next
-    end
-    ∂obj.∂pg .+= nlp.τ .* (pg .- nlp.pg_ref)
+    ## Compute adjoint of objective
+    _adjoint_objective!(nlp, ∂obj.∂pg, pg)
 
     ## Evaluate conjointly
     # ∇fₓ = v' * J,  with J = ∂pg / ∂x
     # ∇fᵤ = v' * J,  with J = ∂pg / ∂u
     put(model, PS.Generators(), PS.ActivePower(), ∂obj, buffer)
-
-    copyto!(jvx, ∂obj.∇fₓ)
-    copyto!(jvu, ∂obj.∇fᵤ)
+    copyto!(jx, ∂obj.∇fₓ)
+    copyto!(ju, ∂obj.∇fᵤ)
 end
 
 function gradient_slack!(nlp::ProxALEvaluator, grad, w)
@@ -296,6 +299,59 @@ function ojtprod!(nlp::ProxALEvaluator, jv, u, σ, v)
     reduced_gradient!(nlp, jv, jvx, jvu, u)
 end
 
+function hessprod!(nlp::ProxALEvaluator, hessvec, w, v)
+    model = nlp.inner.model
+    nx = get(model, NumberOfState())
+    nu = get(model, NumberOfControl())
+    buffer = get(nlp.inner, PhysicalState())
+    H = nlp.inner.hessians
+
+    u = @view w[1:nlp.nu]
+    vᵤ = @view v[1:nlp.nu]
+    vₛ = @view v[1+nlp.nu:end]
+
+    fill!(hessvec, 0.0)
+
+    hvu = @view hessvec[1:nlp.nu]
+    hvs = @view hessvec[1+nlp.nu:end]
+
+    z = H.z
+    tgt_xu = H.tmp_tgt  # tangent wrt and u
+    hv_xu = H.tmp_hv    # hv wrt x and u
+
+    _second_order_adjoint_z!(nlp.inner, z, vᵤ)
+
+    # Init tangent
+    tgt_xu[1:nx] .= z
+    tgt_xu[1+nx:nx+nu] .= vᵤ
+
+    # Init adjoint
+    ∂f = similar(buffer.pg)
+    ∂²f = similar(buffer.pg)
+    # Adjoint w.r.t. ProxAL's objective
+    _adjoint_objective!(nlp, ∂f, buffer.pg)
+
+    # Hessian of ProxAL's objective
+    hessian_cost!(model, ∂²f)
+    # Contribution of penalties
+    ∂²f .+= nlp.τ
+    if nlp.time != Origin
+        ∂²f .+= nlp.ρf
+    end
+    if nlp.time != Final
+        ∂²f .+= nlp.ρt
+    end
+
+    ## OBJECTIVE HESSIAN
+    has_slack = (nlp.time != Origin)
+    hessian_prod_objective_proxal!(model, H.obj, nlp.inner.obj_stack, hv_xu, hvs, ∂²f, ∂f, buffer, tgt_xu, vₛ, nlp.ρf, has_slack)
+    ∇²fx = hv_xu[1:nx]       # / x
+    ∇²fu = hv_xu[nx+1:nx+nu] # / u
+
+    _reduced_hessian_prod!(nlp.inner, hvu, ∇²fx, ∇²fu, tgt_xu)
+    return
+end
+
 ## Utils function
 function reset!(nlp::ProxALEvaluator)
     reset!(nlp.inner)
diff --git a/src/Evaluators/reduced_evaluator.jl b/src/Evaluators/reduced_evaluator.jl
index 5bbd1153..fffc6267 100644
--- a/src/Evaluators/reduced_evaluator.jl
+++ b/src/Evaluators/reduced_evaluator.jl
@@ -397,14 +397,47 @@ end
 ###
 # Second-order code
 ####
+# z = -(∇gₓ  \ (∇gᵤ * w))
+function _second_order_adjoint_z!(
+    nlp::ReducedSpaceEvaluator, z, w,
+)
+    ∇gᵤ = nlp.state_jacobian.Ju.J
+    mul!(z, ∇gᵤ, w, -1.0, 0.0)
+
+    if isa(z, CUDA.CuArray)
+        ∇gₓ = nlp.state_jacobian.Jx.J
+        LinearSolvers.ldiv!(nlp.linear_solver, z, ∇gₓ, z)
+    else
+        ∇gₓ = nlp.factorization
+        ldiv!(∇gₓ, z)
+    end
+end
+
+# ψ = -(∇gₓ' \ (∇²fₓₓ .+ ∇²gₓₓ))
+function _second_order_adjoint_ψ!(
+    nlp::ReducedSpaceEvaluator, ψ, ∂fₓ,
+)
+    if isa(ψ, CUDA.CuArray)
+        ∇gₓ = nlp.state_jacobian.Jx.J
+        ∇gT = LinearSolvers.get_transpose(nlp.linear_solver, ∇gₓ)
+        LinearSolvers.ldiv!(nlp.linear_solver, ψ, ∇gT, ∂fₓ)
+    else
+        ∇gₓ = nlp.factorization
+        ldiv!(ψ, ∇gₓ', ∂fₓ)
+    end
+end
+
 function _reduced_hessian_prod!(
-    nlp::ReducedSpaceEvaluator, hessvec, ∂fₓ, ∂fᵤ, hv, ψ, tgt,
+    nlp::ReducedSpaceEvaluator, hessvec, ∂fₓ, ∂fᵤ, tgt,
 )
     nx = get(nlp.model, NumberOfState())
     nu = get(nlp.model, NumberOfControl())
     H = nlp.hessians
     ∇gᵤ = nlp.state_jacobian.Ju.J
     λ = - nlp.λ
+    ψ = H.ψ
+
+    hv = H.tmp_hv
 
     ## POWER BALANCE HESSIAN
     AutoDiff.adj_hessian_prod!(nlp.model, H.state, hv, nlp.buffer, λ, tgt)
@@ -412,17 +445,9 @@ function _reduced_hessian_prod!(
     ∂fᵤ .+= hv[nx+1:nx+nu]
 
     # Second order adjoint
-    # ψ = -(∇gₓ' \ (∇²fx .+ ∇²gx))
-    if isa(hessvec, CUDA.CuArray)
-        ∇gₓ = nlp.state_jacobian.Jx.J
-        ∇gT = LinearSolvers.get_transpose(nlp.linear_solver, ∇gₓ)
-        LinearSolvers.ldiv!(nlp.linear_solver, ψ, ∇gT, ∂fₓ)
-    else
-        ∇gₓ = nlp.factorization
-        ldiv!(ψ, ∇gₓ', ∂fₓ)
-    end
+    _second_order_adjoint_ψ!(nlp, ψ, ∂fₓ)
 
-    hessvec .= ∂fᵤ
+    hessvec .+= ∂fᵤ
     mul!(hessvec, transpose(∇gᵤ), ψ, -1.0, 1.0)
     return
 end
@@ -435,24 +460,14 @@ function hessprod!(nlp::ReducedSpaceEvaluator, hessvec, u, w)
     buffer = nlp.buffer
     H = nlp.hessians
 
-    ∇gᵤ = nlp.state_jacobian.Ju.J
-
-    z = H.z
-    ψ = H.ψ
-    # z = -(∇gₓ  \ (∇gᵤ * w))
-    mul!(z, ∇gᵤ, w, -1.0, 0.0)
-
-    if isa(u, CUDA.CuArray)
-        ∇gₓ = nlp.state_jacobian.Jx.J
-        LinearSolvers.ldiv!(nlp.linear_solver, z, ∇gₓ, z)
-    else
-        ∇gₓ = nlp.factorization
-        ldiv!(∇gₓ, z)
-    end
+    fill!(hessvec, 0.0)
 
     # Two vector products
     tgt = H.tmp_tgt
     hv = H.tmp_hv
+    z = H.z
+
+    _second_order_adjoint_z!(nlp, z, w)
 
     # Init tangent
     tgt[1:nx] .= z
@@ -460,14 +475,17 @@ function hessprod!(nlp::ReducedSpaceEvaluator, hessvec, u, w)
 
     ∂f = similar(buffer.pg)
     ∂²f = similar(buffer.pg)
+
+    # Adjoint and Hessian of cost function
     adjoint_cost!(nlp.model, ∂f, buffer.pg)
     hessian_cost!(nlp.model, ∂²f)
+
     ## OBJECTIVE HESSIAN
     hessian_prod_objective!(nlp.model, H.obj, nlp.obj_stack, hv, ∂²f, ∂f, buffer, tgt)
     ∇²fx = hv[1:nx]
     ∇²fu = hv[nx+1:nx+nu]
 
-    _reduced_hessian_prod!(nlp, hessvec, ∇²fx, ∇²fu, hv, ψ, tgt)
+    _reduced_hessian_prod!(nlp, hessvec, ∇²fx, ∇²fu, tgt)
 
     return
 end
@@ -482,21 +500,10 @@ function hessian_lagrangian_penalty_prod!(
     buffer = nlp.buffer
     H = nlp.hessians
 
-    ∇gᵤ = nlp.state_jacobian.Ju.J
+    fill!(hessvec, 0.0)
 
-    λ = - nlp.λ
     z = H.z
-    ψ = H.ψ
-    # z = -(∇gₓ  \ (∇gᵤ * w))
-    mul!(z, ∇gᵤ, w, -1.0, 0.0)
-
-    if isa(u, CUDA.CuArray)
-        ∇gₓ = nlp.state_jacobian.Jx.J
-        LinearSolvers.ldiv!(nlp.linear_solver, z, ∇gₓ, z)
-    else
-        ∇gₓ = nlp.factorization
-        ldiv!(∇gₓ, z)
-    end
+    _second_order_adjoint_z!(nlp, z, w)
 
     # Two vector products
     tgt = H.tmp_tgt
@@ -506,11 +513,13 @@ function hessian_lagrangian_penalty_prod!(
     tgt[1:nx] .= z
     tgt[1+nx:nx+nu] .= w
 
-    ## OBJECTIVE HESSIAN
     ∂f = similar(buffer.pg)
     ∂²f = similar(buffer.pg)
+    # Adjoint and Hessian of cost function
     adjoint_cost!(nlp.model, ∂f, buffer.pg)
     hessian_cost!(nlp.model, ∂²f)
+
+    ## OBJECTIVE HESSIAN
     hessian_prod_objective!(nlp.model, H.obj, nlp.obj_stack, hv, ∂²f, ∂f, buffer, tgt)
     ∇²Lx = σ .* hv[1:nx]
     ∇²Lu = σ .* hv[nx+1:nx+nu]
@@ -535,7 +544,7 @@ function hessian_lagrangian_penalty_prod!(
     end
 
     # Second order adjoint
-    _reduced_hessian_prod!(nlp, hessvec, ∇²Lx, ∇²Lu, hv, ψ, tgt)
+    _reduced_hessian_prod!(nlp, hessvec, ∇²Lx, ∇²Lu, tgt)
 
     return
 end
diff --git a/src/Polar/objective.jl b/src/Polar/objective.jl
index c68fdc17..0c69003e 100644
--- a/src/Polar/objective.jl
+++ b/src/Polar/objective.jl
@@ -57,7 +57,7 @@ function put(
 
     # Adjoint w.r.t Slack nodes
     adjoint!(polar, active_power_constraints, adj_pg[ref2gen] , buffer.pg, obj_autodiff, buffer)
-    # Adjoint w.t.t. PV nodes
+    # Adjoint w.r.t. PV nodes
     adj_pinj[index_pv] .= adj_pg[pv2gen]
 
     # Adjoint w.r.t. x and u
@@ -126,3 +126,103 @@ function hessian_prod_objective!(
     return nothing
 end
 
+#=
+    Special function to compute Hessian of ProxAL's objective.
+    This avoid to reuse intermediate results, for efficiency.
+
+    This function collect the contribution of the state and
+    the control (in `hv_xu`) and the contribution of the slack
+    variable (`hv_s`).
+
+    For ProxAL, we have:
+    H = [ H_xx  H_ux  J_x' ]
+        [ H_xu  H_uu  J_u' ]
+        [ J_x   J_u   ρ I  ]
+
+    so, if `v = [v_x; v_u; v_s]`, we get
+
+    H * v = [ H_xx v_x  +   H_ux v_u  +  J_x' v_s ]
+            [ H_xu v_x  +   H_uu v_u  +  J_u' v_s ]
+            [  J_x v_x  +    J_u v_u  +   ρ I     ]
+
+=#
+function hessian_prod_objective_proxal!(
+    polar::PolarForm,
+    ∇²f::AutoDiff.Hessian, ∇f::AdjointStackObjective,
+    hv_xu::AbstractVector,
+    hv_s::AbstractVector,
+    ∂²f::AbstractVector, ∂f::AbstractVector,
+    buffer::PolarNetworkState,
+    tgt::AbstractVector,
+    vs::AbstractVector,
+    ρ::Float64,
+    has_slack::Bool,
+)
+    nx = get(polar, NumberOfState())
+    nu = get(polar, NumberOfControl())
+    ngen = get(polar, PS.NumberOfGenerators())
+
+    # Indexing of generators
+    pv2gen = polar.indexing.index_pv_to_gen ; npv = length(pv2gen)
+    ref2gen = polar.indexing.index_ref_to_gen ; nref = length(ref2gen)
+
+    # Split tangent wrt x part and u part
+    tx = @view tgt[1:nx]
+    tu = @view tgt[1+nx:nx+nu]
+
+    #= BLOCK UU
+        [ H_xx v_x  +   H_ux v_u ]
+        [ H_xu v_x  +   H_uu v_u ]
+    =#
+    ## Step 1: evaluate (∂f / ∂pg) * ∂²pg
+    ∂pg_ref = @view ∂f[ref2gen]
+    AutoDiff.adj_hessian_prod!(polar, ∇²f, hv_xu, buffer, ∂pg_ref, tgt)
+
+    ## Step 2: evaluate ∂pg' * (∂²f / ∂²pg) * ∂pg
+    # Compute adjoint w.r.t. ∂pg_ref
+    ∇f.∂pg .= 0.0
+    ∇f.∂pg[ref2gen] .= 1.0
+    put(polar, PS.Generators(), PS.ActivePower(), ∇f, buffer)
+
+    ∇pgₓ = ∇f.∇fₓ
+    ∇pgᵤ = ∇f.∇fᵤ
+
+    @views begin
+        tpg = tgt[nx+nu-npv+1:end]
+
+        scale_x = dot(∇pgₓ, ∂²f[ref2gen[1]], tx)
+        scale_u = dot(∇pgᵤ, ∂²f[ref2gen[1]], tu)
+        # Contribution of slack node
+        hv_xu[1:nx]       .+= (scale_x + scale_u) .* ∇pgₓ
+        hv_xu[1+nx:nx+nu] .+= (scale_x + scale_u) .* ∇pgᵤ
+        # Contribution of PV nodes (only through power generation)
+        hv_xu[nx+nu-npv+1:end] .+= ∂²f[pv2gen] .* tpg
+    end
+
+    if has_slack
+        @views begin
+            #= BLOCK SU
+                [  J_x' v_s ]
+                [  J_u' v_s ]
+                [  J_x v_x + J_u v_u ]
+            =#
+            # 1. Contribution of slack node
+            hv_s[ref2gen] .+= - ρ * (dot(∇pgᵤ, tu) + dot(∇pgₓ, tx))
+            hv_xu[1:nx] .-= ρ * ∇pgₓ .* vs[ref2gen]
+            hv_xu[1+nx:nx+nu] .-= ρ * ∇pgᵤ .* vs[ref2gen]
+
+            # 2. Contribution of PV node
+            hv_s[pv2gen] .-= ρ .* tu[nref+npv+1:end]
+            hv_xu[nx+nref+npv+1:end] .-= ρ .* vs[pv2gen]
+
+            #= BLOCK SS
+                 ρ I
+            =#
+            # Hessian w.r.t. slack
+            hv_s .+= ρ .* vs
+        end
+    end
+
+    return nothing
+end
+
diff --git a/test/Evaluators/proxal_evaluator.jl b/test/Evaluators/proxal_evaluator.jl
index 2cf0beef..5671d757 100644
--- a/test/Evaluators/proxal_evaluator.jl
+++ b/test/Evaluators/proxal_evaluator.jl
@@ -17,7 +17,7 @@ import ExaPF: PS
     end
 
     # Build reference evaluator
-    nlp = ExaPF.ReducedSpaceEvaluator(datafile)
+    nlp = ExaPF.ReducedSpaceEvaluator(datafile; powerflow_solver=NewtonRaphson(tol=1e-12))
     S = ExaPF.array_type(nlp)
 
     u0 = ExaPF.initial(nlp)
@@ -35,7 +35,7 @@ import ExaPF: PS
     # Compute objective
     c = ExaPF.objective(prox, w)
 
-    @testset "Gradient" begin
+    @testset "Gradient & Hessian" begin
         g = similar(w) ; fill!(g, 0)
         ExaPF.gradient!(prox, g, w)
 
@@ -60,6 +60,16 @@ import ExaPF: PS
         ExaPF.gradient!(prox, g, w)
         grad_fd = FiniteDiff.finite_difference_gradient(reduced_cost, w)
         @test isapprox(grad_fd, g, rtol=1e-6)
+
+        hv = similar(w) ; fill!(hv, 0)
+        tgt = similar(w) ; fill!(tgt, 0)
+        tgt[1] = 1.0
+        ExaPF.hessprod!(prox, hv, w, tgt)
+        H = ExaPF.hessian(prox, w)
+
+        hess_fd = FiniteDiff.finite_difference_hessian(reduced_cost, w)
+        # Take attribute data as hess_fd is of type Symmetric
+        @test H ≈ hess_fd.data rtol=1e-6
     end
 
     @testset "Constraints" begin