From 83b6b6bb349e7b8e511986bbf32a7b369686047e Mon Sep 17 00:00:00 2001 From: fpacaud Date: Wed, 10 Mar 2021 16:16:57 +0100 Subject: [PATCH] evaluator: add reduced Hessian in ProxALEvaluator * add a dedicated function in `src/Polar/objective.jl` for Hessian of ProxAL's objective * add two functions _second_order_adjoint! for z and \psi * compatible with GPU --- src/Evaluators/proxal_evaluators.jl | 94 +++++++++++++++++++------ src/Evaluators/reduced_evaluator.jl | 91 ++++++++++++++----------- src/Polar/objective.jl | 102 +++++++++++++++++++++++++++- test/Evaluators/proxal_evaluator.jl | 14 +++- 4 files changed, 238 insertions(+), 63 deletions(-) diff --git a/src/Evaluators/proxal_evaluators.jl b/src/Evaluators/proxal_evaluators.jl index 54067e8c..ad7a7600 100644 --- a/src/Evaluators/proxal_evaluators.jl +++ b/src/Evaluators/proxal_evaluators.jl @@ -175,8 +175,25 @@ function objective(nlp::ProxALEvaluator, w) return cost end +function _adjoint_objective!(nlp::ProxALEvaluator, ∂f, pg) + # Import model + model = nlp.inner.model + ## Seed left-hand side vector + adjoint_cost!(model, ∂f, pg) + if nlp.time != Origin + ∂f .-= nlp.λf + ∂f .-= nlp.ρf .* nlp.ramp_link_prev + end + if nlp.time != Final + ∂f .+= nlp.λt + ∂f .+= nlp.ρt .* nlp.ramp_link_next + end + ∂f .+= nlp.τ .* (pg .- nlp.pg_ref) + return +end + ## Gradient -function full_gradient!(nlp::ProxALEvaluator, jvx, jvu, w) +function full_gradient!(nlp::ProxALEvaluator, jx, ju, w) # Import model model = nlp.inner.model # Import buffer (has been updated previously in update!) @@ -190,29 +207,15 @@ function full_gradient!(nlp::ProxALEvaluator, jvx, jvu, w) u = @view w[1:nlp.nu] - ## Objective's coefficients - coefs = model.costs_coefficients - c3 = @view coefs[:, 3] - c4 = @view coefs[:, 4] - ## Seed left-hand side vector - ∂obj.∂pg .= scale_obj .* (c3 .+ 2.0 .* c4 .* pg) - if nlp.time != Origin - ∂obj.∂pg .-= nlp.λf - ∂obj.∂pg .-= nlp.ρf .* nlp.ramp_link_prev - end - if nlp.time != Final - ∂obj.∂pg .+= nlp.λt - ∂obj.∂pg .+= nlp.ρt .* nlp.ramp_link_next - end - ∂obj.∂pg .+= nlp.τ .* (pg .- nlp.pg_ref) + ## Compute adjoint of objective + _adjoint_objective!(nlp, ∂obj.∂pg, pg) ## Evaluate conjointly # ∇fₓ = v' * J, with J = ∂pg / ∂x # ∇fᵤ = v' * J, with J = ∂pg / ∂u put(model, PS.Generators(), PS.ActivePower(), ∂obj, buffer) - - copyto!(jvx, ∂obj.∇fₓ) - copyto!(jvu, ∂obj.∇fᵤ) + copyto!(jx, ∂obj.∇fₓ) + copyto!(ju, ∂obj.∇fᵤ) end function gradient_slack!(nlp::ProxALEvaluator, grad, w) @@ -296,6 +299,59 @@ function ojtprod!(nlp::ProxALEvaluator, jv, u, σ, v) reduced_gradient!(nlp, jv, jvx, jvu, u) end +function hessprod!(nlp::ProxALEvaluator, hessvec, w, v) + model = nlp.inner.model + nx = get(model, NumberOfState()) + nu = get(model, NumberOfControl()) + buffer = get(nlp.inner, PhysicalState()) + H = nlp.inner.hessians + + u = @view w[1:nlp.nu] + vᵤ = @view v[1:nlp.nu] + vₛ = @view v[1+nlp.nu:end] + + fill!(hessvec, 0.0) + + hvu = @view hessvec[1:nlp.nu] + hvs = @view hessvec[1+nlp.nu:end] + + z = H.z + tgt_xu = H.tmp_tgt # tangent wrt and u + hv_xu = H.tmp_hv # hv wrt x and u + + _second_order_adjoint_z!(nlp.inner, z, vᵤ) + + # Init tangent + tgt_xu[1:nx] .= z + tgt_xu[1+nx:nx+nu] .= vᵤ + + # Init adjoint + ∂f = similar(buffer.pg) + ∂²f = similar(buffer.pg) + # Adjoint w.r.t. ProxAL's objective + _adjoint_objective!(nlp, ∂f, buffer.pg) + + # Hessian of ProxAL's objective + hessian_cost!(model, ∂²f) + # Contribution of penalties + ∂²f .+= nlp.τ + if nlp.time != Origin + ∂²f .+= nlp.ρf + end + if nlp.time != Final + ∂²f .+= nlp.ρt + end + + ## OBJECTIVE HESSIAN + has_slack = (nlp.time != Origin) + hessian_prod_objective_proxal!(model, H.obj, nlp.inner.obj_stack, hv_xu, hvs, ∂²f, ∂f, buffer, tgt_xu, vₛ, nlp.ρf, has_slack) + ∇²fx = hv_xu[1:nx] # / x + ∇²fu = hv_xu[nx+1:nx+nu] # / u + + _reduced_hessian_prod!(nlp.inner, hvu, ∇²fx, ∇²fu, tgt_xu) + return +end + ## Utils function function reset!(nlp::ProxALEvaluator) reset!(nlp.inner) diff --git a/src/Evaluators/reduced_evaluator.jl b/src/Evaluators/reduced_evaluator.jl index 5bbd1153..fffc6267 100644 --- a/src/Evaluators/reduced_evaluator.jl +++ b/src/Evaluators/reduced_evaluator.jl @@ -397,14 +397,47 @@ end ### # Second-order code #### +# z = -(∇gₓ \ (∇gᵤ * w)) +function _second_order_adjoint_z!( + nlp::ReducedSpaceEvaluator, z, w, +) + ∇gᵤ = nlp.state_jacobian.Ju.J + mul!(z, ∇gᵤ, w, -1.0, 0.0) + + if isa(z, CUDA.CuArray) + ∇gₓ = nlp.state_jacobian.Jx.J + LinearSolvers.ldiv!(nlp.linear_solver, z, ∇gₓ, z) + else + ∇gₓ = nlp.factorization + ldiv!(∇gₓ, z) + end +end + +# ψ = -(∇gₓ' \ (∇²fₓₓ .+ ∇²gₓₓ)) +function _second_order_adjoint_ψ!( + nlp::ReducedSpaceEvaluator, ψ, ∂fₓ, +) + if isa(ψ, CUDA.CuArray) + ∇gₓ = nlp.state_jacobian.Jx.J + ∇gT = LinearSolvers.get_transpose(nlp.linear_solver, ∇gₓ) + LinearSolvers.ldiv!(nlp.linear_solver, ψ, ∇gT, ∂fₓ) + else + ∇gₓ = nlp.factorization + ldiv!(ψ, ∇gₓ', ∂fₓ) + end +end + function _reduced_hessian_prod!( - nlp::ReducedSpaceEvaluator, hessvec, ∂fₓ, ∂fᵤ, hv, ψ, tgt, + nlp::ReducedSpaceEvaluator, hessvec, ∂fₓ, ∂fᵤ, tgt, ) nx = get(nlp.model, NumberOfState()) nu = get(nlp.model, NumberOfControl()) H = nlp.hessians ∇gᵤ = nlp.state_jacobian.Ju.J λ = - nlp.λ + ψ = H.ψ + + hv = H.tmp_hv ## POWER BALANCE HESSIAN AutoDiff.adj_hessian_prod!(nlp.model, H.state, hv, nlp.buffer, λ, tgt) @@ -412,17 +445,9 @@ function _reduced_hessian_prod!( ∂fᵤ .+= hv[nx+1:nx+nu] # Second order adjoint - # ψ = -(∇gₓ' \ (∇²fx .+ ∇²gx)) - if isa(hessvec, CUDA.CuArray) - ∇gₓ = nlp.state_jacobian.Jx.J - ∇gT = LinearSolvers.get_transpose(nlp.linear_solver, ∇gₓ) - LinearSolvers.ldiv!(nlp.linear_solver, ψ, ∇gT, ∂fₓ) - else - ∇gₓ = nlp.factorization - ldiv!(ψ, ∇gₓ', ∂fₓ) - end + _second_order_adjoint_ψ!(nlp, ψ, ∂fₓ) - hessvec .= ∂fᵤ + hessvec .+= ∂fᵤ mul!(hessvec, transpose(∇gᵤ), ψ, -1.0, 1.0) return end @@ -435,24 +460,14 @@ function hessprod!(nlp::ReducedSpaceEvaluator, hessvec, u, w) buffer = nlp.buffer H = nlp.hessians - ∇gᵤ = nlp.state_jacobian.Ju.J - - z = H.z - ψ = H.ψ - # z = -(∇gₓ \ (∇gᵤ * w)) - mul!(z, ∇gᵤ, w, -1.0, 0.0) - - if isa(u, CUDA.CuArray) - ∇gₓ = nlp.state_jacobian.Jx.J - LinearSolvers.ldiv!(nlp.linear_solver, z, ∇gₓ, z) - else - ∇gₓ = nlp.factorization - ldiv!(∇gₓ, z) - end + fill!(hessvec, 0.0) # Two vector products tgt = H.tmp_tgt hv = H.tmp_hv + z = H.z + + _second_order_adjoint_z!(nlp, z, w) # Init tangent tgt[1:nx] .= z @@ -460,14 +475,17 @@ function hessprod!(nlp::ReducedSpaceEvaluator, hessvec, u, w) ∂f = similar(buffer.pg) ∂²f = similar(buffer.pg) + + # Adjoint and Hessian of cost function adjoint_cost!(nlp.model, ∂f, buffer.pg) hessian_cost!(nlp.model, ∂²f) + ## OBJECTIVE HESSIAN hessian_prod_objective!(nlp.model, H.obj, nlp.obj_stack, hv, ∂²f, ∂f, buffer, tgt) ∇²fx = hv[1:nx] ∇²fu = hv[nx+1:nx+nu] - _reduced_hessian_prod!(nlp, hessvec, ∇²fx, ∇²fu, hv, ψ, tgt) + _reduced_hessian_prod!(nlp, hessvec, ∇²fx, ∇²fu, tgt) return end @@ -482,21 +500,10 @@ function hessian_lagrangian_penalty_prod!( buffer = nlp.buffer H = nlp.hessians - ∇gᵤ = nlp.state_jacobian.Ju.J + fill!(hessvec, 0.0) - λ = - nlp.λ z = H.z - ψ = H.ψ - # z = -(∇gₓ \ (∇gᵤ * w)) - mul!(z, ∇gᵤ, w, -1.0, 0.0) - - if isa(u, CUDA.CuArray) - ∇gₓ = nlp.state_jacobian.Jx.J - LinearSolvers.ldiv!(nlp.linear_solver, z, ∇gₓ, z) - else - ∇gₓ = nlp.factorization - ldiv!(∇gₓ, z) - end + _second_order_adjoint_z!(nlp, z, w) # Two vector products tgt = H.tmp_tgt @@ -506,11 +513,13 @@ function hessian_lagrangian_penalty_prod!( tgt[1:nx] .= z tgt[1+nx:nx+nu] .= w - ## OBJECTIVE HESSIAN ∂f = similar(buffer.pg) ∂²f = similar(buffer.pg) + # Adjoint and Hessian of cost function adjoint_cost!(nlp.model, ∂f, buffer.pg) hessian_cost!(nlp.model, ∂²f) + + ## OBJECTIVE HESSIAN hessian_prod_objective!(nlp.model, H.obj, nlp.obj_stack, hv, ∂²f, ∂f, buffer, tgt) ∇²Lx = σ .* hv[1:nx] ∇²Lu = σ .* hv[nx+1:nx+nu] @@ -535,7 +544,7 @@ function hessian_lagrangian_penalty_prod!( end # Second order adjoint - _reduced_hessian_prod!(nlp, hessvec, ∇²Lx, ∇²Lu, hv, ψ, tgt) + _reduced_hessian_prod!(nlp, hessvec, ∇²Lx, ∇²Lu, tgt) return end diff --git a/src/Polar/objective.jl b/src/Polar/objective.jl index c68fdc17..0c69003e 100644 --- a/src/Polar/objective.jl +++ b/src/Polar/objective.jl @@ -57,7 +57,7 @@ function put( # Adjoint w.r.t Slack nodes adjoint!(polar, active_power_constraints, adj_pg[ref2gen] , buffer.pg, obj_autodiff, buffer) - # Adjoint w.t.t. PV nodes + # Adjoint w.r.t. PV nodes adj_pinj[index_pv] .= adj_pg[pv2gen] # Adjoint w.r.t. x and u @@ -126,3 +126,103 @@ function hessian_prod_objective!( return nothing end +#= + Special function to compute Hessian of ProxAL's objective. + This avoid to reuse intermediate results, for efficiency. + + This function collect the contribution of the state and + the control (in `hv_xu`) and the contribution of the slack + variable (`hv_s`). + + For ProxAL, we have: + H = [ H_xx H_ux J_x' ] + [ H_xu H_uu J_u' ] + [ J_x J_u ρ I ] + + so, if `v = [v_x; v_u; v_s]`, we get + + H * v = [ H_xx v_x + H_ux v_u + J_x' v_s ] + [ H_xu v_x + H_uu v_u + J_u' v_s ] + [ J_x v_x + J_u v_u + ρ I ] + +=# +function hessian_prod_objective_proxal!( + polar::PolarForm, + ∇²f::AutoDiff.Hessian, ∇f::AdjointStackObjective, + hv_xu::AbstractVector, + hv_s::AbstractVector, + ∂²f::AbstractVector, ∂f::AbstractVector, + buffer::PolarNetworkState, + tgt::AbstractVector, + vs::AbstractVector, + ρ::Float64, + has_slack::Bool, +) + nx = get(polar, NumberOfState()) + nu = get(polar, NumberOfControl()) + ngen = get(polar, PS.NumberOfGenerators()) + + # Indexing of generators + pv2gen = polar.indexing.index_pv_to_gen ; npv = length(pv2gen) + ref2gen = polar.indexing.index_ref_to_gen ; nref = length(ref2gen) + + # Split tangent wrt x part and u part + tx = @view tgt[1:nx] + tu = @view tgt[1+nx:nx+nu] + + #= BLOCK UU + [ H_xx v_x + H_ux v_u ] + [ H_xu v_x + H_uu v_u ] + =# + ## Step 1: evaluate (∂f / ∂pg) * ∂²pg + ∂pg_ref = @view ∂f[ref2gen] + AutoDiff.adj_hessian_prod!(polar, ∇²f, hv_xu, buffer, ∂pg_ref, tgt) + + ## Step 2: evaluate ∂pg' * (∂²f / ∂²pg) * ∂pg + # Compute adjoint w.r.t. ∂pg_ref + ∇f.∂pg .= 0.0 + ∇f.∂pg[ref2gen] .= 1.0 + put(polar, PS.Generators(), PS.ActivePower(), ∇f, buffer) + + ∇pgₓ = ∇f.∇fₓ + ∇pgᵤ = ∇f.∇fᵤ + + @views begin + tpg = tgt[nx+nu-npv+1:end] + + scale_x = dot(∇pgₓ, ∂²f[ref2gen[1]], tx) + scale_u = dot(∇pgᵤ, ∂²f[ref2gen[1]], tu) + # Contribution of slack node + hv_xu[1:nx] .+= (scale_x + scale_u) .* ∇pgₓ + hv_xu[1+nx:nx+nu] .+= (scale_x + scale_u) .* ∇pgᵤ + # Contribution of PV nodes (only through power generation) + hv_xu[nx+nu-npv+1:end] .+= ∂²f[pv2gen] .* tpg + end + + if has_slack + @views begin + #= BLOCK SU + [ J_x' v_s ] + [ J_u' v_s ] + [ J_x v_x + J_u v_u ] + =# + # 1. Contribution of slack node + hv_s[ref2gen] .+= - ρ * (dot(∇pgᵤ, tu) + dot(∇pgₓ, tx)) + hv_xu[1:nx] .-= ρ * ∇pgₓ .* vs[ref2gen] + hv_xu[1+nx:nx+nu] .-= ρ * ∇pgᵤ .* vs[ref2gen] + + # 2. Contribution of PV node + hv_s[pv2gen] .-= ρ .* tu[nref+npv+1:end] + hv_xu[nx+nref+npv+1:end] .-= ρ .* vs[pv2gen] + + #= BLOCK SS + ρ I + =# + # Hessian w.r.t. slack + hv_s .+= ρ .* vs + end + end + + return nothing +end + diff --git a/test/Evaluators/proxal_evaluator.jl b/test/Evaluators/proxal_evaluator.jl index 2cf0beef..5671d757 100644 --- a/test/Evaluators/proxal_evaluator.jl +++ b/test/Evaluators/proxal_evaluator.jl @@ -17,7 +17,7 @@ import ExaPF: PS end # Build reference evaluator - nlp = ExaPF.ReducedSpaceEvaluator(datafile) + nlp = ExaPF.ReducedSpaceEvaluator(datafile; powerflow_solver=NewtonRaphson(tol=1e-12)) S = ExaPF.array_type(nlp) u0 = ExaPF.initial(nlp) @@ -35,7 +35,7 @@ import ExaPF: PS # Compute objective c = ExaPF.objective(prox, w) - @testset "Gradient" begin + @testset "Gradient & Hessian" begin g = similar(w) ; fill!(g, 0) ExaPF.gradient!(prox, g, w) @@ -60,6 +60,16 @@ import ExaPF: PS ExaPF.gradient!(prox, g, w) grad_fd = FiniteDiff.finite_difference_gradient(reduced_cost, w) @test isapprox(grad_fd, g, rtol=1e-6) + + hv = similar(w) ; fill!(hv, 0) + tgt = similar(w) ; fill!(tgt, 0) + tgt[1] = 1.0 + ExaPF.hessprod!(prox, hv, w, tgt) + H = ExaPF.hessian(prox, w) + + hess_fd = FiniteDiff.finite_difference_hessian(reduced_cost, w) + # Take attribute data as hess_fd is of type Symmetric + @test H ≈ hess_fd.data rtol=1e-6 end @testset "Constraints" begin