diff --git a/Project.toml b/Project.toml index e2c602ca..87b03f0a 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.11.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +KLU = "ef3ab10e-7fda-4108-b977-705223b18434" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" KrylovPreconditioners = "45d422c2-293f-44ce-8315-2cb988662dec" @@ -30,6 +31,7 @@ Adapt = "3.3" CUDA = "4.1, 5" ForwardDiff = "0.10" KernelAbstractions = "0.9" +KLU = "0.4" Krylov = "0.9" KrylovPreconditioners = "0.2" LazyArtifacts = "1.9" diff --git a/benchmark.jl b/benchmark.jl deleted file mode 100644 index 65939923..00000000 --- a/benchmark.jl +++ /dev/null @@ -1,70 +0,0 @@ -using CUDA -using KernelAbstractions -using ExaPF -import ExaPF: AutoDiff -using LazyArtifacts -using LinearAlgebra -using KrylovPreconditioners -using PProf -using Profile - - -const PS = ExaPF.PowerSystem -const LS = ExaPF.LinearSolvers -# datafile = joinpath(artifact"ExaData", "ExaData", "matpower", "case_ACTIVSg70k.m") -# datafile = joinpath(artifact"ExaData", "ExaData", "case1354.m") -datafile = joinpath(artifact"ExaData", "ExaData", "case9241pegase.m") -polar = ExaPF.PolarForm(datafile, CPU()) -stack = ExaPF.NetworkStack(polar) -ExaPF.init!(polar, stack) -@time convergence = run_pf(polar, stack; verbose=1) - -pf = PS.PowerNetwork(datafile) -polar_gpu = ExaPF.PolarForm(pf, CUDABackend()) -stack_gpu = ExaPF.NetworkStack(polar_gpu) -basis_gpu = ExaPF.PolarBasis(polar_gpu) -pflow_gpu = ExaPF.PowerFlowBalance(polar_gpu) ∘ basis_gpu -mapx = ExaPF.mapping(polar, State()); -jx_gpu = ExaPF.Jacobian(polar_gpu, pflow_gpu, mapx) -direct_linear_solver = LS.DirectSolver(jx_gpu.J) -pf_algo = NewtonRaphson(; verbose=1, tol=1e-10) - - -jac_gpu = jx_gpu.J; - -npartitions = div(size(jac_gpu,1), 32); -precond = BlockJacobiPreconditioner(jac_gpu, npartitions, CUDABackend(), 0); -# precond = KrylovPreconditioners.kp_ilu0(jac_gpu) -# linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond, ldiv=false, scaling=true, atol=1e-10, verbose=0); -# linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond, ldiv=false, scaling=false, atol=1e-10, verbose=0); -# linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond, ldiv=false, scaling=false, atol=1e-10, verbose=0, maxiter=500); -linear_solver = ExaPF.KrylovBICGSTAB( - jac_gpu; P=precond, ldiv=false, scaling=true, - rtol=1e-7, atol=1e-7, verbose=0 -); -pf_algo = NewtonRaphson(; verbose=1, tol=1e-7, maxiter=20) -ExaPF.init!(polar_gpu, stack_gpu) -reset_timer!(linear_solver.precond) -@time convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=linear_solver) -@show get_timer(linear_solver.precond) -ExaPF.init!(polar_gpu, stack_gpu) -@time convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=direct_linear_solver) -@show linear_solver.inner.stats.niter -@show convergence -# Profiling -ExaPF.init!(polar_gpu, stack_gpu) -Profile.clear() -Profile.@profile begin - linear_solver.precond.timer_update = 0.0 - convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=linear_solver) - @show linear_solver.precond.timer_update -end -Profile.clear() -ExaPF.init!(polar_gpu, stack_gpu) -Profile.@profile begin - linear_solver.precond.timer_update = 0.0 - convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=linear_solver) - @show linear_solver.precond.timer_update -end -PProf.pprof() -@show convergence diff --git a/examples/power_flow.jl b/examples/power_flow.jl index b0232030..6fa7df01 100644 --- a/examples/power_flow.jl +++ b/examples/power_flow.jl @@ -54,8 +54,8 @@ precond = LS.BlockJacobiPreconditioner(J, npartitions, localdevice, noverlap) #= Instantiate iterative linear solver =# -lin_solver = ExaPF.LinearSolvers.KrylovBICGSTAB -algo = LS.KrylovBICGSTAB(J; P=precond) +lin_solver = ExaPF.LinearSolvers.Bicgstab +algo = LS.Bicgstab(J; P=precond) #= Power flow resolution diff --git a/ext/ExaPFAMDGPUExt.jl b/ext/ExaPFAMDGPUExt.jl index 0df68a04..0923a5b6 100644 --- a/ext/ExaPFAMDGPUExt.jl +++ b/ext/ExaPFAMDGPUExt.jl @@ -21,10 +21,10 @@ const KP = KrylovPreconditioners LS.DirectSolver(J::ROCSparseMatrixCSR; options...) = ExaPF.LS.DirectSolver(nothing) LS.update!(solver::ExaPF.LS.AbstractIterativeLinearSolver, J::ROCSparseMatrixCSR) = KP.update!(solver.precond, J) LS._get_type(J::ROCSparseMatrixCSR) = ROCArray{Float64, 1, AMDGPU.Mem.HIPBuffer} -LS.default_linear_solver(A::ROCSparseMatrixCSR, device::ROCBackend) = ExaPF.LS.KrylovBICGSTAB(A) +LS.default_linear_solver(A::ROCSparseMatrixCSR, device::ROCBackend) = ExaPF.LS.Bicgstab(A) ExaPF._iscsr(::ROCSparseMatrixCSR) = true ExaPF._iscsc(::ROCSparseMatrixCSR) = false -function LS.scaling!(::LS.KrylovBICGSTAB,A::ROCSparseMatrixCSR,b) +function LS.scaling!(::LS.Bicgstab, A::ROCSparseMatrixCSR, b) KP.scaling_csr!(A,b) end @@ -33,7 +33,7 @@ end List all linear solvers available solving the power flow on an NVIDIA GPU. """ -ExaPF.list_solvers(::ROCBackend) = [LS.BICGSTAB, LS.DQGMRES, LS.EigenBICGSTAB, LS.KrylovBICGSTAB] +ExaPF.list_solvers(::ROCBackend) = [LS.Dqgmres, LS.Bicgstab] include("amdgpu_wrapper.jl") end diff --git a/ext/ExaPFCUDAExt.jl b/ext/ExaPFCUDAExt.jl index 7a52c62c..2c831e1b 100644 --- a/ext/ExaPFCUDAExt.jl +++ b/ext/ExaPFCUDAExt.jl @@ -24,7 +24,7 @@ LS._get_type(J::CuSparseMatrixCSR) = CuArray{Float64, 1, CUDA.Mem.DeviceBuffer} LS.default_linear_solver(A::CuSparseMatrixCSR, device::CUDABackend) = ExaPF.LS.DirectSolver(A) ExaPF._iscsr(::CuSparseMatrixCSR) = true ExaPF._iscsc(::CuSparseMatrixCSR) = false -function LS.scaling!(::LS.KrylovBICGSTAB,A::CuSparseMatrixCSR,b) +function LS.scaling!(::LS.Bicgstab, A::CuSparseMatrixCSR, b) KP.scaling_csr!(A,b) end """ @@ -32,7 +32,7 @@ end List all linear solvers available solving the power flow on an NVIDIA GPU. """ -ExaPF.list_solvers(::CUDABackend) = [LS.DirectSolver, LS.BICGSTAB, LS.DQGMRES, LS.EigenBICGSTAB, LS.KrylovBICGSTAB] +ExaPF.list_solvers(::CUDABackend) = [LS.DirectSolver, LS.Dqgmres, LS.Bicgstab] include("cuda_wrapper.jl") end diff --git a/scripts/linear_solvers/benchmark.jl b/scripts/linear_solvers/benchmark.jl index 24ebc53f..9b5e2de7 100644 --- a/scripts/linear_solvers/benchmark.jl +++ b/scripts/linear_solvers/benchmark.jl @@ -65,7 +65,7 @@ function benchmark_gpu_krylov(datafile, pf_solver; ntrials=3) n_partitions = div(n_states, n_blocks) jac_gpu = instance.jacobian.J precond = BlockJacobiPreconditioner(jac_gpu, n_partitions, CUDABackend(), 0) - krylov_solver = ExaPF.KrylovBICGSTAB( + krylov_solver = ExaPF.Bicgstab( jac_gpu; P=precond, ldiv=false, scaling=true, rtol=1e-7, atol=1e-7, verbose=0, ) diff --git a/scripts/linear_solvers/benchmark_block.jl b/scripts/linear_solvers/benchmark_block.jl index da8d7856..140d1fcb 100644 --- a/scripts/linear_solvers/benchmark_block.jl +++ b/scripts/linear_solvers/benchmark_block.jl @@ -85,7 +85,7 @@ function benchmark_gpu_krylov(datafile, n_scenarios, pf_solver; ntrials=3, magni n_partitions = div(n_states, n_blocks) jac_gpu = instance.jacobian.J precond = BlockJacobiPreconditioner(jac_gpu, n_partitions, CUDABackend(), 0) - krylov_solver = ExaPF.KrylovBICGSTAB( + krylov_solver = ExaPF.Bicgstab( jac_gpu; P=precond, ldiv=false, scaling=true, rtol=1e-7, atol=1e-7, verbose=0, ) diff --git a/src/LinearSolvers/LinearSolvers.jl b/src/LinearSolvers/LinearSolvers.jl index 7f1f63c0..06295ef2 100644 --- a/src/LinearSolvers/LinearSolvers.jl +++ b/src/LinearSolvers/LinearSolvers.jl @@ -16,6 +16,8 @@ import Metis import ..ExaPF: xnorm import Base.size, Base.sizeof, Base.format_bytes + +using KLU import Krylov: KrylovStats, allocate_if, ksizeof, FloatOrComplex, ktimer, matrix_to_vector, kdisplay, mulorldiv! using KrylovPreconditioners @@ -23,7 +25,7 @@ const KA = KernelAbstractions const KP = KrylovPreconditioners export bicgstab, list_solvers, default_linear_solver -export DirectSolver, BICGSTAB, EigenBICGSTAB, KrylovBICGSTAB +export DirectSolver, BICGSTAB, EigenBICGSTAB, Bicgstab export do_scaling, scaling! @enum( @@ -35,9 +37,6 @@ export do_scaling, scaling! Diverged, ) -include("bicgstab.jl") -include("bicgstab_eigen.jl") - include("utils.jl") include("block_gmres.jl") @@ -102,11 +101,11 @@ struct DirectSolver{Fac<:Union{Nothing, LinearAlgebra.Factorization}} <: Abstrac factorization::Fac end -DirectSolver(J; options...) = DirectSolver(lu(J)) +DirectSolver(J; options...) = DirectSolver(klu(J)) DirectSolver() = DirectSolver(nothing) function update!(s::DirectSolver, J::AbstractMatrix) - lu!(s.factorization, J) # Update factorization inplace + klu!(s.factorization, J) # Update factorization inplace end # Reuse factorization in update @@ -138,89 +137,28 @@ end update!(solver::AbstractIterativeLinearSolver, J::SparseMatrixCSC) = KP.update!(solver.precond, J) """ - BICGSTAB <: AbstractIterativeLinearSolver - BICGSTAB(precond; maxiter=2_000, tol=1e-8, verbose=false) - -Custom BICGSTAB implementation to solve iteratively the linear system -``A x = y``. -""" -struct BICGSTAB <: AbstractIterativeLinearSolver - precond::AbstractKrylovPreconditioner - maxiter::Int - tol::Float64 - verbose::Bool -end -function BICGSTAB(J::AbstractSparseMatrix; - P=BlockJacobiPreconditioner(J), maxiter=2_000, tol=1e-8, verbose=false -) - return BICGSTAB(P, maxiter, tol, verbose) -end - -function ldiv!(solver::BICGSTAB, - y::AbstractVector, J::AbstractMatrix, x::AbstractVector; options... -) - y[:], n_iters, status = bicgstab(J, x, solver.precond, y; maxiter=solver.maxiter, - verbose=solver.verbose, tol=solver.tol) - if status != Converged - @warn("BICGSTAB failed to converge. Final status is $(status)") - end - return n_iters -end - -""" - EigenBICGSTAB <: AbstractIterativeLinearSolver - EigenBICGSTAB(precond; maxiter=2_000, tol=1e-8, verbose=false) - -Julia's port of Eigen's BICGSTAB to solve iteratively the linear system -``A x = y``. -""" -struct EigenBICGSTAB <: AbstractIterativeLinearSolver - precond::AbstractKrylovPreconditioner - maxiter::Int - tol::Float64 - verbose::Bool -end -function EigenBICGSTAB(J::AbstractSparseMatrix; - P=BlockJacobiPreconditioner(J), maxiter=2_000, tol=1e-8, verbose=false -) - return EigenBICGSTAB(P, maxiter, tol, verbose) -end - -function ldiv!(solver::EigenBICGSTAB, - y::AbstractVector, J::AbstractMatrix, x::AbstractVector; options... -) - y[:], n_iters, status = bicgstab_eigen(J, x, solver.precond, y; maxiter=solver.maxiter, - verbose=solver.verbose, tol=solver.tol) - if status != Converged - @warn("EigenBICGSTAB failed to converge. Final status is $(status)") - end - - return n_iters -end - -""" - DQGMRES <: AbstractIterativeLinearSolver - DQGMRES(precond; verbose=false, memory=4) + Dqgmres <: AbstractIterativeLinearSolver + Dqgmres(precond; verbose=false, memory=4) -Wrap `Krylov.jl`'s DQGMRES algorithm to solve iteratively the linear system +Wrap `Krylov.jl`'s Dqgmres algorithm to solve iteratively the linear system ``A x = y``. """ -struct DQGMRES <: AbstractIterativeLinearSolver +struct Dqgmres <: AbstractIterativeLinearSolver inner::Krylov.DqgmresSolver precond::AbstractKrylovPreconditioner memory::Int verbose::Bool end -function DQGMRES(J::AbstractSparseMatrix; +function Dqgmres(J::AbstractSparseMatrix; P=BlockJacobiPreconditioner(J), memory=4, verbose=false ) n, m = size(J) S = _get_type(J) solver = Krylov.DqgmresSolver(n, m, memory, S) - return DQGMRES(solver, P, memory, verbose) + return Dqgmres(solver, P, memory, verbose) end -function ldiv!(solver::DQGMRES, +function ldiv!(solver::Dqgmres, y::AbstractVector, J::AbstractMatrix, x::AbstractVector; options... ) Krylov.dqgmres!(solver.inner, J, x; N=solver.precond) @@ -229,13 +167,13 @@ function ldiv!(solver::DQGMRES, end """ - KrylovBICGSTAB <: AbstractIterativeLinearSolver - KrylovBICGSTAB(precond; verbose=0, rtol=1e-10, atol=1e-10) + Bicgstab <: AbstractIterativeLinearSolver + Bicgstab(precond; verbose=0, rtol=1e-10, atol=1e-10) Wrap `Krylov.jl`'s BICGSTAB algorithm to solve iteratively the linear system ``A x = y``. """ -struct KrylovBICGSTAB <: AbstractIterativeLinearSolver +struct Bicgstab <: AbstractIterativeLinearSolver inner::Krylov.BicgstabSolver precond::AbstractKrylovPreconditioner verbose::Int @@ -245,17 +183,17 @@ struct KrylovBICGSTAB <: AbstractIterativeLinearSolver scaling::Bool maxiter::Int64 end -do_scaling(linear_solver::KrylovBICGSTAB) = linear_solver.scaling -function KrylovBICGSTAB(J::AbstractSparseMatrix; +do_scaling(linear_solver::Bicgstab) = linear_solver.scaling +function Bicgstab(J::AbstractSparseMatrix; P=BlockJacobiPreconditioner(J), verbose=0, rtol=1e-10, atol=1e-10, ldiv=false, scaling=false, maxiter=size(J,1) ) n, m = size(J) S = _get_type(J) solver = Krylov.BicgstabSolver(n, m, S) - return KrylovBICGSTAB(solver, P, verbose, atol, rtol, ldiv, scaling, maxiter) + return Bicgstab(solver, P, verbose, atol, rtol, ldiv, scaling, maxiter) end -function ldiv!(solver::KrylovBICGSTAB, +function ldiv!(solver::Bicgstab, y::AbstractVector, J::AbstractMatrix, x::AbstractVector; max_atol = solver.atol, max_rtol = solver.rtol, options... ) @@ -284,7 +222,7 @@ end List all linear solvers available solving the power flow on the CPU. """ -list_solvers(::KA.CPU) = [DirectSolver, DQGMRES, BICGSTAB, EigenBICGSTAB, KrylovBICGSTAB] +list_solvers(::KA.CPU) = [DirectSolver, Dqgmres, Bicgstab] """ default_linear_solver(A, ::KA.CPU) diff --git a/src/LinearSolvers/bicgstab.jl b/src/LinearSolvers/bicgstab.jl deleted file mode 100644 index 7276d683..00000000 --- a/src/LinearSolvers/bicgstab.jl +++ /dev/null @@ -1,111 +0,0 @@ -""" - bicgstab(A, b, P, xi; - tol=1e-8, - maxiter=size(A, 1), - verbose=false, - maxtol=1e20) - -BiCGSTAB implementation according to - -> Van der Vorst, Henk A. -> "Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems." -> SIAM Journal on scientific and Statistical Computing 13, no. 2 (1992): 631-644. - -""" -function bicgstab(A, b, P, xi; - tol=1e-8, maxiter=size(A, 1), verbose=false, maxtol=1e20) - # parameters - n = size(b, 1) - mul!(xi, P, b) - ri = b - A * xi - br0 = copy(ri) - rho0 = 1.0 - alpha = 1.0 - omega0 = 1.0 - vi = similar(xi) - pi = similar(xi) - fill!(vi, 0) - fill!(pi, 0) - - rhoi = copy(rho0) - omegai = copy(omega0) - residual = copy(b) - - y = similar(pi) - s = similar(pi) - z = similar(pi) - t1 = similar(pi) - t2 = similar(pi) - pi1 = similar(pi) - vi1 = similar(pi) - t = similar(pi) - - go = true - status = Unsolved - iter = 1 - restarts = 0 - while go - rhoi1 = dot(br0, ri) ; - if abs(rhoi1) < 1e-20 - restarts += 1 - ri .= b - mul!(ri, A, xi, -1.0, 1.0) - br0 .= ri - residual .= b - rho0 = 1.0 - rhoi = rho0 - rhoi1 = dot(br0,ri) - alpha = 1.0 - omega0 = 1.0 - omegai = 1.0 - fill!(vi, 0.0) - fill!(pi, 0.0) - end - beta = (rhoi1/rhoi) * (alpha / omegai) - pi1 .= ri .+ beta .* (pi .- omegai .* vi) - mul!(y, P, pi1) - mul!(vi1, A, y) - alpha = rhoi1 / dot(br0, vi1) - s .= ri .- (alpha .* vi1) - - mul!(z, P, s) - mul!(t, A, z) - mul!(t1, P, t) - mul!(t2, P, s) - omegai1 = dot(t1, t2) / dot(t1, t1) - xi .= xi .+ alpha .* y .+ omegai1 .* z - - mul!(residual, A, xi, 1.0, -1.0) - anorm = xnorm(residual) - - if verbose - @printf("%4d %10.4e\n", iter, anorm) - end - - if isnan(anorm) - go = false - status = NotANumber - end - if anorm < tol - go = false - status = Converged - verbose && println("Tolerance reached at iteration $iter") - elseif anorm > maxtol - go = false - status = Diverged - elseif maxiter == iter - go = false - status = MaxIterations - verbose && println("Not converged") - end - - ri .= s .- omegai1 .* t - rhoi = rhoi1 - pi .= pi1 - vi .= vi1 - omegai = omegai1 - iter += 1 - end - - return xi, iter, status -end diff --git a/src/LinearSolvers/bicgstab_eigen.jl b/src/LinearSolvers/bicgstab_eigen.jl deleted file mode 100644 index a6b4a089..00000000 --- a/src/LinearSolvers/bicgstab_eigen.jl +++ /dev/null @@ -1,94 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - -""" - bicgstab_eigen - -BiCGSTAB implementation exactly corresponding to the implementation in Eigen -""" -function bicgstab_eigen(A, b, P, x; - tol=1e-8, maxiter=size(A, 1), verbose=false) - - mul!(x, P, b) - r = b .- A * x - r0 = similar(r) - r0 .= r - - r0_sqnorm = norm(r0)^2 - rhs_sqnorm = norm(b)^2 - if rhs_sqnorm == 0 - x .= 0.0 - return x, 0, Converged - end - rho = 1.0 - alpha = 1.0 - w = 1.0 - - v = similar(x); p = similar(x) - fill!(v, 0.0); fill!(p, 0.0) - - y = similar(x); z = similar(x) - s = similar(x); t = similar(x) - - tol2 = tol*tol*rhs_sqnorm - eps2 = eps(Float64)^2 - i = 0 - restarts = 0 - status = Unsolved - - while norm(r)^2 > tol2 && i < maxiter - rho_old = rho - - rho = dot(r0, r) - - if abs(rho) < eps2*r0_sqnorm - mul!(r, A, x) - r .= b .- r - r0 .= r - v .= 0.0 - p .= 0.0 - rho = norm(r)^2 - r0_sqnorm = norm(r)^2 - if restarts == 0 - i = 0 - end - restarts += 1 - end - - beta = (rho/rho_old) * (alpha / w) - p .= r .+ (beta * (p .- w .* v)) - - mul!(y, P, p) - mul!(v, A, y) - - alpha = rho / dot(r0, v) - s .= r .- alpha .* v - - mul!(z, P, s) - mul!(t, A, z) - - tmp = norm(t)^2 - if tmp > 0.0 - w = dot(t,s) / tmp - else - w = 0.0 - end - x .= x .+ alpha * y .+ w * z; - r .= s .- w * t; - i += 1 - end - if maxiter == i - go = false - status = MaxIterations - verbose && println("Restarts: $restarts") - verbose && println("Not converged") - end - if norm(r)^2 <= tol2 - go = false - status = Converged - verbose && println("Restarts: $restarts") - verbose && println("Tolerance reached at iteration $i") - end - return x, i, status -end diff --git a/test/Polar/TestPolarForm.jl b/test/Polar/TestPolarForm.jl index bf9800a0..f1558717 100644 --- a/test/Polar/TestPolarForm.jl +++ b/test/Polar/TestPolarForm.jl @@ -2,8 +2,6 @@ module TestPolarFormulation using Test -using AMDGPU -using CUDA using FiniteDiff using KernelAbstractions using LinearAlgebra diff --git a/test/quickstart.jl b/test/quickstart.jl index 489815e1..491b020d 100644 --- a/test/quickstart.jl +++ b/test/quickstart.jl @@ -58,7 +58,7 @@ const LS = ExaPF.LinearSolvers npartitions = 8 jac = jx.J precond = BlockJacobiPreconditioner(jac, npartitions, CPU()) - iterative_linear_solver = ExaPF.KrylovBICGSTAB(jac; P=precond) + iterative_linear_solver = ExaPF.Bicgstab(jac; P=precond) @test isa(iterative_linear_solver, LS.AbstractIterativeLinearSolver) # Test default tolerance @test iterative_linear_solver.atol == 1e-10 @@ -98,7 +98,7 @@ const LS = ExaPF.LinearSolvers # Reinit buffer ExaPF.init!(polar_gpu, stack_gpu) - iterative_linear_solver = ExaPF.KrylovBICGSTAB(jac; P=precond) + iterative_linear_solver = ExaPF.Bicgstab(jac; P=precond) convergence = ExaPF.nlsolve!( pf_solver, jx_gpu, stack_gpu; linear_solver=iterative_linear_solver,