Skip to content

Commit

Permalink
test each KA's kernel on the CPU and verify output (#227)
Browse files Browse the repository at this point in the history
  • Loading branch information
frapac authored Feb 11, 2022
1 parent 12449de commit a2c48be
Show file tree
Hide file tree
Showing 8 changed files with 254 additions and 92 deletions.
4 changes: 2 additions & 2 deletions src/LinearSolvers/preconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,14 @@ struct BlockJacobiPreconditioner{AT,GAT,VI,GVI,MT,GMT,MI,GMI,SMT,VF,GVF} <: Abst
end
end

function BlockJacobiPreconditioner(J::SparseMatrixCSC; nblocks=-1, device=CPU())
function BlockJacobiPreconditioner(J::SparseMatrixCSC; nblocks=-1, device=CPU(), noverlaps=0)
n = size(J, 1)
npartitions = if nblocks > 0
nblocks
else
div(n, 32)
end
return BlockJacobiPreconditioner(J, npartitions, device)
return BlockJacobiPreconditioner(J, npartitions, device, noverlaps)
end
BlockJacobiPreconditioner(J::CUSPARSE.CuSparseMatrixCSR; options...) = BlockJacobiPreconditioner(SparseMatrixCSC(J); options...)

Expand Down
20 changes: 9 additions & 11 deletions src/autodiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ any nonlinear constraint ``h(x)``.
"""
abstract type AbstractJacobian end

function jacobian!(jac::AbstractJacobian, stack::AbstractStack)
error("Mising method jacobian!(", typeof(jac), ", ", typeof(stack), ")")
end
function jacobian! end

"""
AbstractHessianProd
Expand Down Expand Up @@ -205,20 +203,20 @@ end

# Sparse Jacobian partials

@kernel function partials_kernel_gpu!(@Const(J_rowPtr), @Const(J_colVal), J_nzVal, @Const(duals), @Const(coloring))
@kernel function partials_kernel_csr!(@Const(J_rowPtr), @Const(J_colVal), J_nzVal, @Const(duals), @Const(coloring))
i = @index(Global, Linear)

@inbounds for j in J_rowPtr[i]:J_rowPtr[i+1]-1
@inbounds J_nzVal[j] = duals[coloring[J_colVal[j]]+1, i]
for j in J_rowPtr[i]:J_rowPtr[i+1]-1
J_nzVal[j] = duals[coloring[J_colVal[j]]+1, i]
end
end

@kernel function partials_kernel_cpu!(J_colptr, J_rowval, J_nzval, duals, coloring)
@kernel function partials_kernel_csc!(J_colptr, J_rowval, J_nzval, duals, coloring)
# CSC is column oriented: nmap is equal to number of columns
i = @index(Global, Linear)

@inbounds for j in J_colptr[i]:J_colptr[i+1]-1
@inbounds J_nzval[j] = duals[coloring[i]+1, J_rowval[j]]
for j in J_colptr[i]:J_colptr[i+1]-1
J_nzval[j] = duals[coloring[i]+1, J_rowval[j]]
end
end

Expand All @@ -240,10 +238,10 @@ function partials!(jac::AbstractJacobian)
duals_ = reshape(reinterpret(T, duals), N+1, n)

if isa(device, CPU)
kernel! = partials_kernel_cpu!(device)
kernel! = partials_kernel_csc!(device)
ev = kernel!(J.colptr, J.rowval, J.nzval, duals_, coloring, ndrange=size(J,2), dependencies=Event(device))
elseif isa(device, GPU)
kernel! = partials_kernel_gpu!(device)
kernel! = partials_kernel_csr!(device)
ev = kernel!(J.rowPtr, J.colVal, J.nzVal, duals_, coloring, ndrange=size(J,1), dependencies=Event(device))
else
error("Unknown device $device")
Expand Down
65 changes: 0 additions & 65 deletions src/cuda_wrapper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,6 @@ CuSparseMatrixCSR{Tv, Int32}(A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = CuSpar

# AbstractStack

@kernel function _transfer_to_input!(input, map, src)
i = @index(Global, Linear)
input[map[i]] = src[i]
end

@kernel function _transfer_fr_input!(dest, input, map)
i = @index(Global, Linear)
dest[i] = input[map[i]]
end

function Base.copyto!(stack::AutoDiff.AbstractStack, map::AbstractVector{Int}, vals::VT) where {VT <: CuArray}
@assert length(map) == length(vals)
ndrange = (length(map),)
Expand All @@ -56,30 +46,6 @@ function Base.copyto!(dest::VT, stack::AutoDiff.AbstractStack, map::AbstractVect
wait(ev)
end

#=
LinearSolvers
=#
function csclsvqr!(A::CUSPARSE.CuSparseMatrixCSC{Float64},
b::CUDA.CuArray{Float64, 1, CUDA.Mem.DeviceBuffer},
x::CUDA.CuArray{Float64, 1, CUDA.Mem.DeviceBuffer},
tol::Float64,
reorder::Cint,
inda::Char)
n = size(A,1)
desca = CUSPARSE.CuMatrixDescriptor(
CUSPARSE.CUSPARSE_MATRIX_TYPE_GENERAL,
CUSPARSE.CUSPARSE_FILL_MODE_LOWER,
CUSPARSE.CUSPARSE_DIAG_TYPE_NON_UNIT, inda)
singularity = Ref{Cint}(1)
CUSOLVER.cusolverSpDcsrlsvqr(CUSOLVER.sparse_handle(), n, A.nnz, desca, A.nzVal, A.colPtr, A.rowVal, b, tol, reorder, x, singularity)

if singularity[] != -1
throw(SingularException(singularity[]))
end

x
end

# By default, no factorization routine is available
LinearSolvers.update!(s::DirectSolver{Nothing}, J::CuSparseMatrixCSR) = nothing
function LinearSolvers.ldiv!(::DirectSolver{Nothing},
Expand All @@ -88,27 +54,6 @@ function LinearSolvers.ldiv!(::DirectSolver{Nothing},
CUSOLVER.csrlsvqr!(J, x, y, 1e-8, one(Cint), 'O')
return 0
end
function LinearSolvers.ldiv!(::DirectSolver{Nothing},
y::CUDA.CuVector, J::CUSPARSE.CuSparseMatrixCSC, x::CUDA.CuVector,
)
csclsvqr!(J, x, y, 1e-8, one(Cint), 'O')
return 0
end

#=
Autodiff
=#

@kernel function _extract_values_kernel(dest, src)
i = @index(Global, Linear)
dest[i] = src[i].value
end

function extract_values!(dest::CuArray, src::CuArray)
ndrange = (length(dest),)
ev = _extract_values_kernel(CUDADevice())(dest, src, ndrange=ndrange)
wait(ev)
end

#=
Generic SpMV for CuSparseMatrixCSR
Expand All @@ -117,16 +62,6 @@ function ForwardDiff.npartials(vec::CuVector{ForwardDiff.Dual{T, V, N}}) where {
return N
end

# Differentiable LinearAlgebra.mul! for ForwardDiff
@kernel function _spmv_csr_kernel!(Y, X, colVal, rowPtr, nzVal, alpha, beta, n, m)
i, k = @index(Global, NTuple)
Y[k, i] *= beta
@inbounds for c in rowPtr[i]:rowPtr[i+1]-1
j = colVal[c]
Y[k, i] += alpha * nzVal[c] * X[k, j]
end
end

function LinearAlgebra.mul!(
Y::CuArray{T, 1},
A::CUSPARSE.CuSparseMatrixCSR,
Expand Down
81 changes: 79 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@

abstract type AbstractArchitecture end

# norm
xnorm(x::AbstractVector) = norm(x, 2)

Expand All @@ -14,3 +12,82 @@ function get_jacobian_types(::CPU)
return SMT, A
end

#=
Kernels utils
=#

@kernel function _transfer_to_input!(input, map, src)
i = @index(Global, Linear)
input[map[i]] = src[i]
end

@kernel function _transfer_fr_input!(dest, input, map)
i = @index(Global, Linear)
dest[i] = input[map[i]]
end

# Differentiable LinearAlgebra.mul! for ForwardDiff
@kernel function _spmv_csr_kernel!(Y, X, colVal, rowPtr, nzVal, alpha, beta, n, m)
i, k = @index(Global, NTuple)
Y[k, i] *= beta
@inbounds for c in rowPtr[i]:rowPtr[i+1]-1
j = colVal[c]
Y[k, i] += alpha * nzVal[c] * X[k, j]
end
end

#=
CSC2CSR
=#

# Taken from
# https://github.com/scipy/scipy/blob/3b36a574dc657d1ca116f6e230be694f3de31afc/scipy/sparse/sparsetools/csr.h#L376
function csr2csc(n, m, Ap, Aj, Ax, Bp, Bi, Bx)
nnzA = Ap[n+1] - 1
fill!(Bp, 0)

for i in 1:nnzA
Bp[Aj[i]] += 1
end

cumsum = 1
for j in 1:m
tmp = Bp[j]
Bp[j] = cumsum
cumsum += tmp
end
Bp[m+1] = nnzA + 1

for i in 1:n
for c in Ap[i]:Ap[i+1]-1
j = Aj[c]
dest = Bp[j]
Bi[dest] = i
Bx[dest] = Ax[c]
Bp[j] += 1
end
end

last = 1
for j in 1:m+1
tmp = Bp[j]
Bp[j] = last
last = tmp
end
end

csc2csr(n, m, Ap, Ai, Ax, Bp, Bj, Bx) = csr2csc(m, n, Ap, Ai, Ax, Bp, Bj, Bx)

function convert2csr(A::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti}
n, m = size(A)
nnzA = nnz(A)
Ap, Ai, Ax = A.colptr, A.rowval, A.nzval

Bp = zeros(Ti, n+1)
Bj = zeros(Ti, nnzA)
Bx = zeros(Tv, nnzA)

csc2csr(n, m, Ap, Ai, Ax, Bp, Bj, Bx)
return Bp, Bj, Bx
end

2 changes: 0 additions & 2 deletions test/Polar/TestPolarForm.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
module TestPolarFormulation

@eval Base.Experimental.@optlevel 0

using Test

using CUDA
Expand Down
138 changes: 138 additions & 0 deletions test/TestKernels.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Tests for custom kernels
module TestKernels

using LinearAlgebra
using Random
using SparseArrays
using Test

using ExaPF
using KernelAbstractions

const AD = ExaPF.AutoDiff

function test_utils_kernel(device, AT, SMT)
# transfer kernels
n = 10
m = 20
# FROM
mapping = randperm(m)[1:n]
dest = zeros(n)
src = rand(m)
ndrange = (n, )
ev = ExaPF._transfer_fr_input!(device)(dest, src, mapping; ndrange=ndrange)
wait(ev)
@test dest == src[mapping]

# TO
src = rand(n)
dest = zeros(m)
ndrange = (n, )
ev = ExaPF._transfer_to_input!(device)(dest, mapping, src; ndrange=ndrange)
wait(ev)
@test src == dest[mapping]
end

function test_polar_kernel(device, AT, SMT)
nb = 10
nl = 10
m = nb + 2*nl
vmag = rand(nb)
vang = rand(nb)
output = zeros(m)
f = rand(1:nb, nl)
t = rand(1:nb, nl)
ndrange = (m, 1)
ev = ExaPF.basis_kernel!(device)(output, vmag, vang, f, t, nl, nb; ndrange=ndrange)
wait(ev)

Δθ = vang[f] .- vang[t]
vl2 = vmag[f] .* vmag[t]
res = [vl2 .* cos.(Δθ); vl2 .* sin.(Δθ); vmag .* vmag]
@test res == output
end

function test_autodiff_kernel(device, AT, SMT)
n, m = 10, 20
J = sprandn(n, m, .2)
colors = AD.SparseDiffTools.matrix_colors(J)
p = length(unique(colors))

# set_value!
x = rand(n)
duals = zeros(p+1, n)
ndrange = (n, )
ev = AD._set_value_kernel!(device)(duals, x; ndrange=ndrange)
wait(ev)
@test duals[1, :] == x

# Seed with coloring for Jacobian/Hessian
coloring = rand(1:p, n)
duals = zeros(p+1, n)
map = randperm(n)
ndrange = (n, p)
ev = AD._seed_coloring_kernel!(device)(duals, coloring, map; ndrange=ndrange)
wait(ev)

# Seed for Hessian-vector products
v = rand(n)
duals = zeros(2, n)
ndrange = (n, )
ev = AD._seed_kernel!(device)(duals, v, map; ndrange=ndrange)
wait(ev)
@test duals[2, map] == v

# Partials extraction
duals = rand(p+1, m)
## CSC
ndrange = (m, ) # columns oriented
ev = AD.partials_kernel_csc!(device)(J.colptr, J.rowval, J.nzval, duals, colors; ndrange=ndrange)
wait(ev)

## CSR
Bp, Bj, Bx = ExaPF.convert2csr(J)
ndrange = (n, ) # rows oriented
ev = AD.partials_kernel_csr!(device)(Bp, Bj, Bx, duals, colors; ndrange=ndrange)
wait(ev)

# Convert back to CSC and check results
Ap = zeros(Int, m+1)
Ai = zeros(Int, nnz(J))
Ax = zeros(nnz(J))
ExaPF.csr2csc(n, m, Bp, Bj, Bx, Ap, Ai, Ax)
@test Ax == J.nzval
end

function test_sparse_mul_kernel(device, AT, SMT)
n, m = 10, 20
p = 2
J = sprandn(n, m, .2)
x = rand(m, p)
y_ref = zeros(n, p)
mul!(y_ref, J, x, 1.0, 0.0)

# Convert to CSR
Bp, Bj, Bx = ExaPF.convert2csr(J)
# Kernel is transposing the input and the output
x = Array(x')
y = zeros(p, n)
ndrange = (n, p)
ev = ExaPF._spmv_csr_kernel!(device)(y, x, Bj, Bp, Bx, 1.0, 0.0, n, m; ndrange=ndrange)
wait(ev)

@test y' y_ref
end

function runtests(device, AT, SMT)
for name_sym in names(@__MODULE__; all = true)
name = string(name_sym)
if !startswith(name, "test_")
continue
end
test_func = getfield(@__MODULE__, name_sym)
test_func(device, AT, SMT)
end
end

end

Loading

0 comments on commit a2c48be

Please sign in to comment.