Skip to content

Commit

Permalink
Use KrylovPreconditioners
Browse files Browse the repository at this point in the history
  • Loading branch information
michel2323 committed Dec 5, 2023
1 parent 7e3b639 commit e657b19
Show file tree
Hide file tree
Showing 21 changed files with 176 additions and 506 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
name = "ExaPF"
uuid = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa"
authors = ["Adrian Maldonado <[email protected]>", "Michel Schanen <[email protected]>", "François Pacaud <[email protected]>"]
version = "0.10.1"
version = "0.11.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
KrylovPreconditioners = "45d422c2-293f-44ce-8315-2cb988662dec"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -30,6 +31,7 @@ CUDA = "4.1, 5"
ForwardDiff = "0.10"
KernelAbstractions = "0.9"
Krylov = "0.9"
KrylovPreconditioners = "0.2"
LazyArtifacts = "1.9"
LightGraphs = "1.3"
LinearAlgebra = "1.9"
Expand Down
70 changes: 70 additions & 0 deletions benchmark.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
using CUDA
using KernelAbstractions
using ExaPF
import ExaPF: AutoDiff
using LazyArtifacts
using LinearAlgebra
using KrylovPreconditioners
using PProf
using Profile


const PS = ExaPF.PowerSystem
const LS = ExaPF.LinearSolvers
# datafile = joinpath(artifact"ExaData", "ExaData", "matpower", "case_ACTIVSg70k.m")
# datafile = joinpath(artifact"ExaData", "ExaData", "case1354.m")
datafile = joinpath(artifact"ExaData", "ExaData", "case9241pegase.m")
polar = ExaPF.PolarForm(datafile, CPU())
stack = ExaPF.NetworkStack(polar)
ExaPF.init!(polar, stack)
@time convergence = run_pf(polar, stack; verbose=1)

pf = PS.PowerNetwork(datafile)
polar_gpu = ExaPF.PolarForm(pf, CUDABackend())
stack_gpu = ExaPF.NetworkStack(polar_gpu)
basis_gpu = ExaPF.PolarBasis(polar_gpu)
pflow_gpu = ExaPF.PowerFlowBalance(polar_gpu) basis_gpu
mapx = ExaPF.mapping(polar, State());
jx_gpu = ExaPF.Jacobian(polar_gpu, pflow_gpu, mapx)
direct_linear_solver = LS.DirectSolver(jx_gpu.J)
pf_algo = NewtonRaphson(; verbose=1, tol=1e-10)


jac_gpu = jx_gpu.J;

npartitions = div(size(jac_gpu,1), 32);
precond = BlockJacobiPreconditioner(jac_gpu, npartitions, CUDABackend(), 0);
# precond = KrylovPreconditioners.kp_ilu0(jac_gpu)
# linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond, ldiv=false, scaling=true, atol=1e-10, verbose=0);
# linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond, ldiv=false, scaling=false, atol=1e-10, verbose=0);
# linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond, ldiv=false, scaling=false, atol=1e-10, verbose=0, maxiter=500);
linear_solver = ExaPF.KrylovBICGSTAB(
jac_gpu; P=precond, ldiv=false, scaling=true,
rtol=1e-7, atol=1e-7, verbose=0
);
pf_algo = NewtonRaphson(; verbose=1, tol=1e-7, maxiter=20)
ExaPF.init!(polar_gpu, stack_gpu)
reset_timer!(linear_solver.precond)
@time convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=linear_solver)
@show get_timer(linear_solver.precond)
ExaPF.init!(polar_gpu, stack_gpu)
@time convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=direct_linear_solver)
@show linear_solver.inner.stats.niter
@show convergence
# Profiling
ExaPF.init!(polar_gpu, stack_gpu)
Profile.clear()
Profile.@profile begin
linear_solver.precond.timer_update = 0.0
convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=linear_solver)
@show linear_solver.precond.timer_update
end
Profile.clear()
ExaPF.init!(polar_gpu, stack_gpu)
Profile.@profile begin
linear_solver.precond.timer_update = 0.0
convergence = ExaPF.nlsolve!(pf_algo, jx_gpu, stack_gpu; linear_solver=linear_solver)
@show linear_solver.precond.timer_update
end
PProf.pprof()
@show convergence
4 changes: 2 additions & 2 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ function benchmark_bicgstab(polar, config, noverlaps, nblocks)
n = size(J, 1)
npartitions = max(ceil(Int, n / nblocks), 2)
precond = LS.BlockJacobiPreconditioner(J, npartitions, polar.device, noverlaps)
algo = LS.KrylovBICGSTAB(J; P=precond)
algo = LS.Bicgstab(J; P=precond)
# Update preconditioner
LS.update!(algo, J)
# RHS
Expand Down Expand Up @@ -319,7 +319,7 @@ function run_benchmarks_bicgstab(polar, config=DEFAULT_CONFIG)
nblocks = ref_nblocks[best_time]
config_pf[:noverlaps] = olevel
config_pf[:npartitions] = ceil(Int, nx / nblocks)
res = benchmark_powerflow(polar, config, LS.KrylovBICGSTAB)
res = benchmark_powerflow(polar, config, LS.Bicgstab)
push!(names, "powerflow_bicgstab_$(nblocks)blk_$(olevel)overlap")
push!(timings, res.time)
push!(iters, res.it)
Expand Down
31 changes: 2 additions & 29 deletions docs/src/lib/linearsolver.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,8 @@ DirectSolver
## Iterative solvers

```@docs
KrylovBICGSTAB
DQGMRES
BICGSTAB
EigenBICGSTAB
```

`ExaPF.jl` is shipped with a custom BICGSTAB implementation.
However, we highly recommend to use `KrylovBICGSTAB` instead,
which has proved to be more robust.
```@docs
bicgstab
Bicgstab
Dqgmres
```

Available linear solvers can be queried with
Expand All @@ -60,20 +50,3 @@ BlockGmresSolver
block_gmres
block_gmres!
```

## Preconditioning

To solve linear systems with iterative methods, `ExaPF`
provides an implementation of a block-Jacobi preconditioner,
portable on GPU.

```@docs
AbstractPreconditioner
```

### Block-Jacobi preconditioner

```@docs
BlockJacobiPreconditioner
update
```
1 change: 0 additions & 1 deletion docs/src/man/autodiff.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
```@meta
CurrentModule = ExaPF
DocTestSetup = quote
using ExaPF
const AD = ExaPF.AD
Expand Down
6 changes: 3 additions & 3 deletions docs/src/man/benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ DocTestFilters = [r"ExaPF"]
# Benchmark

For the purpose of performance regression testing, ExaPF provides a lightweight benchmark script. It allows to test the various configurations for the linear solvers used in the Newton-Raphson algorithm, and run them on a specific hardware. The main julia script [benchmark/benchmarks.jl](https://github.com/exanauts/ExaPF.jl/tree/main/benchmark/benchmarks.jl) takes all its options from the command line.
The benchmark script takes as input a linear solver (e.g. `KrylovBICGSTAB`), a target architecture as a `KernelAbstractions` object (CPU or CUDABackend), and a case filename which is included in the `ExaData` artifact. An exhaustive list of all available linear solvers can be obtained via [`ExaPF.LinearSolvers.list_solvers`](@ref).
The benchmark script takes as input a linear solver (e.g. `Bicgstab`), a target architecture as a `KernelAbstractions` object (CPU or CUDABackend), and a case filename which is included in the `ExaData` artifact. An exhaustive list of all available linear solvers can be obtained via [`ExaPF.LinearSolvers.list_solvers`](@ref).

Running
```
julia --project benchmark/benchmarks.jl KrylovBICGSTAB CUDABackend case300.m
julia --project benchmark/benchmarks.jl Bicgstab CUDABackend case300.m
```
yields
```
KrylovBICGSTAB, CUDABackend, case300.m, 69.0, 3.57, 43.7, true
Bicgstab, CUDABackend, case300.m, 69.0, 3.57, 43.7, true
```
The first three fields are the settings of the benchmark run. They are followed by three timings in milliseconds:
1. the time taken by the Newton-Raphson algorithm to solve the power flow,
Expand Down
1 change: 0 additions & 1 deletion docs/src/man/formulations.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

```@meta
CurrentModule = ExaPF
DocTestSetup = quote
Expand Down
2 changes: 2 additions & 0 deletions docs/src/man/linearsolver.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ systems. That's why this package comes with a block Jacobi preconditioner
that is tailored towards GPUs and is proven to work well with power flow
problems.

The block-Jacobi preconditioner used in ExaPF has been added to [`KrylovPreconditioners.jl`](https://github.com/JuliaSmoothOptimizers/KrylovPreconditioners.jl)

The Jacobian is partitioned into a dense block diagonal structure using `Metis.jl`, where each block is inverted to build our preconditioner `P`.

![Dense block Jacobi preconditioner \label{fig:preconditioner}](../figures/gpublocks.png)
Expand Down
4 changes: 2 additions & 2 deletions docs/src/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,11 @@ for GPU usage. To build an instance with 8 blocks, just write
```@repl quickstart
npartitions = 8;
jac_gpu = jx_gpu.J;
precond = LS.BlockJacobiPreconditioner(jac_gpu, npartitions, CUDABackend());
precond = BlockJacobiPreconditioner(jac_gpu, npartitions, CUDABackend());
```
You can attach the preconditioner to an BICGSTAB algorithm simply as
```@repl quickstart
linear_solver = ExaPF.KrylovBICGSTAB(jac_gpu; P=precond);
linear_solver = ExaPF.Bicgstab(jac_gpu; P=precond);
```
(this will use the BICGSTAB algorithm implemented in
Expand Down
13 changes: 6 additions & 7 deletions ext/ExaPFAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ using ForwardDiff
using LinearAlgebra
using KernelAbstractions
using SparseArrays
using KrylovPreconditioners

const KA = KernelAbstractions
const LS = ExaPF.LinearSolvers
const PS = ExaPF.PowerSystem
const AD = ExaPF.AutoDiff
const KP = KrylovPreconditioners

LS.DirectSolver(J::ROCSparseMatrixCSR; options...) = ExaPF.LS.DirectSolver(nothing)
LS.update!(solver::ExaPF.LS.AbstractIterativeLinearSolver, J::ROCSparseMatrixCSR) = ExaPF.LS.update(solver.precond, J, ROCBackend())
LS.update!(solver::ExaPF.LS.AbstractIterativeLinearSolver, J::ROCSparseMatrixCSR) = KP.update!(solver.precond, J)
LS._get_type(J::ROCSparseMatrixCSR) = ROCArray{Float64, 1, AMDGPU.Mem.HIPBuffer}
LS.default_linear_solver(A::ROCSparseMatrixCSR, device::ROCBackend) = ExaPF.LS.KrylovBICGSTAB(A)
function LS._allowscalar(f::Function, J::ROCSparseMatrixCSR)
AMDGPU.allowscalar(true)
f()
AMDGPU.allowscalar(false)
end
ExaPF._iscsr(::ROCSparseMatrixCSR) = true
ExaPF._iscsc(::ROCSparseMatrixCSR) = false
function LS.scaling!(::LS.KrylovBICGSTAB,A::ROCSparseMatrixCSR,b)
KP.scaling_csr!(A,b)
end

"""
list_solvers(::ROCBackend)
Expand All @@ -36,5 +36,4 @@ List all linear solvers available solving the power flow on an NVIDIA GPU.
ExaPF.list_solvers(::ROCBackend) = [LS.BICGSTAB, LS.DQGMRES, LS.EigenBICGSTAB, LS.KrylovBICGSTAB]

include("amdgpu_wrapper.jl")
include("amdgpu_preconditioner.jl")
end
10 changes: 7 additions & 3 deletions ext/ExaPFCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,24 @@ using ForwardDiff
using LinearAlgebra
using KernelAbstractions
using SparseArrays
using KrylovPreconditioners

const KA = KernelAbstractions
const LS = ExaPF.LinearSolvers
const PS = ExaPF.PowerSystem
const AD = ExaPF.AutoDiff
const KP = KrylovPreconditioners

LS.DirectSolver(J::CuSparseMatrixCSR; options...) = ExaPF.LS.DirectSolver(nothing)
LS.update!(solver::ExaPF.LS.AbstractIterativeLinearSolver, J::CuSparseMatrixCSR) = ExaPF.LS.update(solver.precond, J, CUDABackend())
LS.update!(solver::ExaPF.LS.AbstractIterativeLinearSolver, J::CuSparseMatrixCSR) = KP.update!(solver.precond, J)
LS.update!(solver::ExaPF.LS.DirectSolver, J::CuSparseMatrixCSR) = lu!(solver.factorization, J)
LS._get_type(J::CuSparseMatrixCSR) = CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}
LS.default_linear_solver(A::CuSparseMatrixCSR, device::CUDABackend) = ExaPF.LS.DirectSolver(A)
LS._allowscalar(f::Function, J::CuSparseMatrixCSR) = CUDA.allowscalar(f)
ExaPF._iscsr(::CuSparseMatrixCSR) = true
ExaPF._iscsc(::CuSparseMatrixCSR) = false
function LS.scaling!(::LS.KrylovBICGSTAB,A::CuSparseMatrixCSR,b)
KP.scaling_csr!(A,b)
end
"""
list_solvers(::CUDABackend)
Expand All @@ -31,5 +36,4 @@ List all linear solvers available solving the power flow on an NVIDIA GPU.
ExaPF.list_solvers(::CUDABackend) = [LS.DirectSolver, LS.BICGSTAB, LS.DQGMRES, LS.EigenBICGSTAB, LS.KrylovBICGSTAB]

include("cuda_wrapper.jl")
include("cuda_preconditioner.jl")
end
40 changes: 0 additions & 40 deletions ext/amdgpu_preconditioner.jl

This file was deleted.

40 changes: 0 additions & 40 deletions ext/cuda_preconditioner.jl

This file was deleted.

2 changes: 1 addition & 1 deletion ext/cuda_wrapper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ end
# By default, no factorization routine is available
LS.update!(s::LS.DirectSolver{Nothing}, J::CuSparseMatrixCSR) = nothing
function LS.ldiv!(::LS.DirectSolver{Nothing},
y::CuVector, J::CuSparseMatrixCSR, x::CuVector,
y::CuVector, J::CuSparseMatrixCSR, x::CuVector; options...
)
CUSOLVER.csrlsvqr!(J, x, y, 1e-8, one(Cint), 'O')
return 0
Expand Down
Loading

0 comments on commit e657b19

Please sign in to comment.