Skip to content

Commit

Permalink
Update operator-matrix products with the block-Jacobi preconditioner
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Oct 4, 2023
1 parent dbc883e commit 368237b
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 6 deletions.
44 changes: 38 additions & 6 deletions src/LinearSolvers/preconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,21 @@ Base.eltype(::BlockJacobiPreconditioner) = Float64
# of the blocks, gemm_strided is performing too many unecessary operations,
# impairing its performance.
@kernel function mblock_kernel!(y, b, p_len, rp_len, part, blocks)
p = size(b, 2)
i, j = @index(Global, NTuple)
len = p_len[i]
rlen = rp_len[i]

if j <= rlen
accum = 0.0
idxA = @inbounds part[j, i]
for k=1:len
idxB = @inbounds part[k, i]
@inbounds accum = accum + blocks[j, k, i]*b[idxB]
for=1:p
accum = 0.0
idxA = @inbounds part[j, i]
for k=1:len
idxB = @inbounds part[k, i]
@inbounds accum = accum + blocks[j, k, i]*b[idxB,ℓ]
end
y[idxA,ℓ] = accum
end
y[idxA] = accum
end
end

Expand All @@ -214,6 +217,22 @@ function mul!(y, C::BlockJacobiPreconditioner, b::Vector{T}) where T
end
end

function mul!(Y, C::BlockJacobiPreconditioner, B::Matrix{T}) where T
n, p = size(B)
fill!(Y, zero(T))
for i=1:C.nblocks
rlen = C.lpartitions[i]
part = C.partitions[1:rlen, i]
blck = C.blocks[1:rlen, 1:rlen, i]
for rhs=1:p
for j=1:C.rest_size[i]
idx = part[j]
Y[idx,rhs] += dot(blck[j, :], B[part,rhs])
end
end
end
end

function mul!(y, C::BlockJacobiPreconditioner, b::CuVector{T}) where T
n = size(b, 1)
fill!(y, zero(T))
Expand All @@ -227,6 +246,19 @@ function mul!(y, C::BlockJacobiPreconditioner, b::CuVector{T}) where T
KA.synchronize(CUDABackend())
end

function mul!(Y, C::BlockJacobiPreconditioner, B::CuMatrix{T}) where T
n, p = size(B)
fill!(Y, zero(T))
max_rlen = maximum(C.rest_size)
ndrange = (C.nblocks, max_rlen)
mblock_kernel!(CUDABackend())(
Y, B, C.culpartitions, C.curest_size,
C.cupartitions, C.cublocks,
ndrange=ndrange,
)
KA.synchronize(CUDABackend())
end

"""
build_adjmatrix
Expand Down
22 changes: 22 additions & 0 deletions test/TestLinearSolvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,28 @@ function generate_random_system(n::Int, m::Int)
return spA, b, x♯
end

function test_preconditioners(device, AT, SMT)
n, m = 100, 100
A, b, x♯ = generate_random_system(n, m)
# Transfer data to device
A = A |> SMT
b = b |> AT
x♯ = x♯ |> AT
x = similar(b); r = similar(b)
nblocks = 2
precond = LS.BlockJacobiPreconditioner(A, nblocks, device)
LS.update(precond, A, device)
Iₙ = Matrix{Float64}(I, n, n) |> AT
Y = zeros(Float64, n, n) |> AT
Y2 = zeros(Float64, n, n) |> AT
mul!(Y, precond, Iₙ)
for i=1:n
eᵢ = Iₙ[:,i] |> AT
mul!(view(Y2,:,i), precond, eᵢ)
end
@test Y Y2
end

function test_custom_bicgstab(device, AT, SMT)
n, m = 100, 100
A, b, x♯ = generate_random_system(n, m)
Expand Down

0 comments on commit 368237b

Please sign in to comment.