Skip to content

Commit

Permalink
Add preconditioner-matrix products
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Oct 3, 2023
1 parent 3623d53 commit 79d09d3
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 15 deletions.
30 changes: 23 additions & 7 deletions src/LinearSolvers/preconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,21 @@ Base.eltype(::BlockJacobiPreconditioner) = Float64
# of the blocks, gemm_strided is performing too many unecessary operations,
# impairing its performance.
@kernel function mblock_kernel!(y, b, p_len, rp_len, part, blocks)
p = size(b, 2)
i, j = @index(Global, NTuple)
len = p_len[i]
rlen = rp_len[i]

if j <= rlen
accum = 0.0
idxA = @inbounds part[j, i]
for k=1:len
idxB = @inbounds part[k, i]
@inbounds accum = accum + blocks[j, k, i]*b[idxB]
for=1:p
accum = 0.0
idxA = @inbounds part[j, i]
for k=1:len
idxB = @inbounds part[k, i]
@inbounds accum = accum + blocks[j, k, i]*b[idxB,ℓ]
end
y[idxA,ℓ] = accum
end
y[idxA] = accum
end
end

Expand All @@ -223,7 +226,7 @@ function mul!(Y, C::BlockJacobiPreconditioner, B::Matrix{T}) where T
blck = view(C.blocks, 1:rlen, 1:rlen, i)
for j=1:C.rest_size[i]
idx = part[j]
mul!(view(Y,idx,:), view(blck, j, :)', view(B, part, :), one(T), one(T))
mul!(view(Y,idx,:), view(blck, :, j), view(B, part, :), one(T), one(T))
end
end
end
Expand All @@ -241,6 +244,19 @@ function mul!(y, C::BlockJacobiPreconditioner, b::CuVector{T}) where T
KA.synchronize(CUDABackend())
end

function mul!(Y, C::BlockJacobiPreconditioner, B::CuMatrix{T}) where T
n, p = size(B)
fill!(Y, zero(T))
max_rlen = maximum(C.rest_size)
ndrange = (C.nblocks, max_rlen)
mblock_kernel!(CUDABackend())(
Y, B, C.culpartitions, C.curest_size,
C.cupartitions, C.cublocks,
ndrange=ndrange,
)
KA.synchronize(CUDABackend())
end

"""
build_adjmatrix
Expand Down
29 changes: 21 additions & 8 deletions test/TestLinearSolvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,28 @@ function test_preconditioners(device, AT, SMT)
nblocks = 2
precond = LS.BlockJacobiPreconditioner(A, nblocks, device)
LS.update(precond, A, device)
Iₙ = Matrix{Float64}(I, n, n)
Y = zeros(Float64, n, n)
Y2 = zeros(Float64, n, n)
mul!(Y, precond, Iₙ)
for i=1:n
eᵢ = Iₙ[:,i]
mul!(view(Y2,:,i), precond, eᵢ)
if device == ExaPF.CPU()
Iₙ = Matrix{Float64}(I, n, n)
Y = zeros(Float64, n, n)
Y2 = zeros(Float64, n, n)
mul!(Y, precond, Iₙ)
for i=1:n
eᵢ = Vector(Iₙ[:,i])
mul!(view(Y2,:,i), precond, eᵢ)
end
@test Y Y2
end
if device == ExaPF.GPU()
Iₙ = CuMatrix{Float64}(I, n, n)
Y = CUDA.zeros(Float64, n, n)
Y2 = CUDA.zeros(Float64, n, n)
mul!(Y, precond, Iₙ)
for i=1:n
eᵢ = CuVector(Iₙ[:,i])
mul!(view(Y2,:,i), precond, eᵢ)
end
@test Y Y2
end
@test Y Y2
end

function test_custom_bicgstab(device, AT, SMT)
Expand Down

0 comments on commit 79d09d3

Please sign in to comment.