Skip to content

Commit

Permalink
Minor performance improvements for operator application and multigrid…
Browse files Browse the repository at this point in the history
… smoothers
  • Loading branch information
sebastiangrimberg committed Aug 8, 2023
1 parent b1c0f27 commit 0817fb8
Show file tree
Hide file tree
Showing 12 changed files with 549 additions and 383 deletions.
163 changes: 106 additions & 57 deletions palace/linalg/chebyshev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,43 @@ void GetInverseDiagonal(const ParOperator &A, Vector &dinv)
dinv.Reciprocal();
}

void GetInverseDiagonal(const ComplexParOperator &A, ComplexVector &dinv)
void GetInverseDiagonal(const ComplexParOperator &A, Vector &dinv)
{
MFEM_VERIFY(A.HasReal() || A.HasImag(),
"Invalid zero ComplexOperator for ChebyshevSmoother!");
MFEM_VERIFY(A.HasReal() && !A.HasImag(),
"ComplexOperator for ChebyshevSmoother must be real-valued for now!");
dinv.SetSize(A.Height());
dinv = 0.0;
if (A.HasReal())
{
A.Real()->AssembleDiagonal(dinv.Real());
}
if (A.HasImag())
{
A.Imag()->AssembleDiagonal(dinv.Imag());
}
A.Real()->AssembleDiagonal(dinv);
dinv.Reciprocal();
// MFEM_VERIFY(A.HasReal() || A.HasImag(),
// "Invalid zero ComplexOperator for ChebyshevSmoother!");
// dinv.SetSize(A.Height());
// dinv.SetSize(A.Height());
// dinv = 0.0;
// if (A.HasReal())
// {
// A.Real()->AssembleDiagonal(dinv.Real());
// }
// if (A.HasImag())
// {
// A.Imag()->AssembleDiagonal(dinv.Imag());
// }
// dinv.Reciprocal();
}

double GetLambdaMax(MPI_Comm comm, const Operator &A, const Vector &dinv)
{
DiagonalOperator Dinv(dinv);
ProductOperator DinvA(Dinv, A);
return linalg::SpectralNorm(comm, DinvA, false);
}

double GetLambdaMax(MPI_Comm comm, const ComplexOperator &A, const Vector &dinv)
{
MFEM_VERIFY(A.HasReal() && !A.HasImag(),
"ComplexOperator for ChebyshevSmoother must be real-valued for now!");
DiagonalOperator Dinv(dinv);
ProductOperator DinvA(Dinv, *A.Real());
return linalg::SpectralNorm(comm, DinvA, false);
}

} // namespace
Expand All @@ -51,64 +73,85 @@ void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
typedef typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
ComplexParOperator, ParOperator>::type ParOperType;

A = &op;
// if constexpr (std::is_same<OperType, ComplexOperator>::value)
// {
// A = &const_cast<ParOperator &>(dynamic_cast<const ParOperator &>(*op.Real()))
// .ParallelAssemble();
// }
// else
// {
// A = &const_cast<ParOperType &>(dynamic_cast<const ParOperator &>(op))
// .ParallelAssemble();
// }
r.SetSize(op.Height());
d.SetSize(op.Height());

const auto *PtAP = dynamic_cast<const ParOperType *>(&op);
MFEM_VERIFY(PtAP,
"ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!");
GetInverseDiagonal(*PtAP, dinv);

A = &op;
r.SetSize(A->Height());
d.SetSize(A->Height());

// Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See
// mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing:
// polynomial versus Gauss-Seidel, JCP (2003).
BaseDiagonalOperator<OperType> Dinv(dinv);
BaseProductOperator<OperType> DinvA(Dinv, *A);
lambda_max = 1.01 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false);
lambda_max = 1.01 * GetLambdaMax(PtAP->GetComm(), *A, dinv);
}

namespace
{

template <bool Transpose = false>
inline void ApplyOp(const Operator &A, const Vector &x, Vector &y, const double a)
inline void ApplyOp(const Operator &A, const Vector &x, Vector &y)
{
A.Mult(x, y);
}

// template <bool Transpose = false>
// inline void ApplyOp(const Operator &A, const ComplexVector &x, ComplexVector &y)
// {
// A.Mult(x.Real(), y.Real());
// A.Mult(x.Imag(), y.Imag());
// }

template <bool Transpose = false>
inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y)
{
if (a == 0.0)
if constexpr (!Transpose)
{
A.Mult(x, y);
}
else
{
A.AddMult(x, y, a);
A.MultHermitianTranspose(x, y);
}
}

template <bool Transpose = false>
inline void ApplyOp(const Operator &A, const Vector &x, Vector &y, const double a)
{
A.AddMult(x, y, a);
}

// template <bool Transpose = false>
// inline void ApplyOp(const Operator &A, const ComplexVector &x, ComplexVector &y,
// const double a)
// {
// A.AddMult(x.Real(), y.Real(), a);
// A.AddMult(x.Imag(), y.Imag(), a);
// }

template <bool Transpose = false>
inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y,
const double a)
{
if constexpr (!Transpose)
{
if (a == 0.0)
{
A.Mult(x, y);
}
else
{
A.AddMult(x, y, a);
}
A.AddMult(x, y, a);
}
else
{
if (a == 0.0)
{
A.MultHermitianTranspose(x, y);
}
else
{
A.AddMultHermitianTranspose(x, y, a);
}
A.AddMultHermitianTranspose(x, y, a);
}
}

Expand All @@ -123,12 +166,13 @@ inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &
}

template <bool Transpose = false>
inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const ComplexVector &r,
inline void ApplyOrder0(const double sr, const Vector &dinv, const ComplexVector &r,
ComplexVector &d)
{
const int N = dinv.Size();
const auto *DIR = dinv.Real().Read();
const auto *DII = dinv.Imag().Read();
// const auto *DIR = dinv.Real().Read();
// const auto *DII = dinv.Imag().Read();
const auto *DIR = dinv.Read();
const auto *RR = r.Real().Read();
const auto *RI = r.Imag().Read();
auto *DR = d.Real().ReadWrite();
Expand All @@ -138,19 +182,21 @@ inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const Comple
mfem::forall(N,
[=] MFEM_HOST_DEVICE(int i)
{
const double t = DII[i] * RR[i] + DIR[i] * RI[i];
DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
DI[i] = sr * t;
// DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
// DI[i] = sr * (DII[i] * RR[i] + DIR[i] * RI[i]);
DR[i] = sr * DIR[i] * RR[i];
DI[i] = sr * DIR[i] * RI[i];
});
}
else
{
mfem::forall(N,
[=] MFEM_HOST_DEVICE(int i)
{
const double t = -DII[i] * RR[i] + DIR[i] * RI[i];
DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
DI[i] = sr * t;
// DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
// DI[i] = sr * (-DII[i] * RR[i] + DIR[i] * RI[i]);
DR[i] = sr * DIR[i] * RR[i];
DI[i] = sr * DIR[i] * RI[i];
});
}
}
Expand All @@ -167,12 +213,13 @@ inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv,
}

template <bool Transpose = false>
inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &dinv,
inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv,
const ComplexVector &r, ComplexVector &d)
{
const int N = dinv.Size();
const auto *DIR = dinv.Real().Read();
const auto *DII = dinv.Imag().Read();
// const auto *DIR = dinv.Real().Read();
// const auto *DII = dinv.Imag().Read();
const auto *DIR = dinv.Read();
const auto *RR = r.Real().Read();
const auto *RI = r.Imag().Read();
auto *DR = d.Real().ReadWrite();
Expand All @@ -182,19 +229,21 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d
mfem::forall(N,
[=] MFEM_HOST_DEVICE(int i)
{
const double t = DII[i] * RR[i] + DIR[i] * RI[i];
DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
DI[i] = sd * DI[i] + sr * t;
// DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
// DI[i] = sd * DI[i] + sr * (DII[i] * RR[i] + DIR[i] * RI[i]);
DR[i] = sd * DR[i] + sr * DIR[i] * RR[i];
DI[i] = sd * DI[i] + sr * DIR[i] * RI[i];
});
}
else
{
mfem::forall(N,
[=] MFEM_HOST_DEVICE(int i)
{
const double t = -DII[i] * RR[i] + DIR[i] * RI[i];
DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
DI[i] = sd * DI[i] + sr * t;
// DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
// DI[i] = sd * DI[i] + sr * (-DII[i] * RR[i] + DIR[i] * RI[i]);
DR[i] = sd * DR[i] + sr * DIR[i] * RR[i];
DI[i] = sd * DI[i] + sr * DIR[i] * RI[i];
});
}
}
Expand All @@ -209,7 +258,7 @@ void ChebyshevSmoother<OperType>::Mult(const VecType &x, VecType &y) const
{
if (this->initial_guess || it > 0)
{
ApplyOp(*A, y, r, 0.0);
ApplyOp(*A, y, r);
linalg::AXPBY(1.0, x, -1.0, r);
}
else
Expand Down
4 changes: 2 additions & 2 deletions palace/linalg/chebyshev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class ChebyshevSmoother : public Solver<OperType>
// System matrix (not owned).
const OperType *A;

// Inverse diagonal scaling of the operator.
VecType dinv;
// Inverse diagonal scaling of the operator (real-valued for now).
Vector dinv;

// Maximum operator eigenvalue for Chebyshev polynomial smoothing.
double lambda_max;
Expand Down
38 changes: 27 additions & 11 deletions palace/linalg/distrelaxation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ DistRelaxationSmoother<OperType>::DistRelaxationSmoother(
grad->Assemble();
grad->Finalize();
G = std::make_unique<ParOperator>(std::move(grad), h1_fespace, nd_fespace, true);
// ParOperator RAP_G(std::move(grad), h1_fespace, nd_fespace, true);
// G = RAP_G.StealParallelAssemble();
}

// Initialize smoothers.
Expand All @@ -46,41 +48,55 @@ void DistRelaxationSmoother<OperType>::SetOperators(const OperType &op,
"Invalid operator sizes for DistRelaxationSmoother!");
A = &op;
A_G = &op_G;
// if constexpr (std::is_same<OperType, ComplexOperator>::value)
// {
// A = &const_cast<ParOperator &>(dynamic_cast<const ParOperator &>(*op.Real()))
// .ParallelAssemble();
// A_G = &const_cast<ParOperator &>(dynamic_cast<const ParOperator &>(*op_G.Real()))
// .ParallelAssemble();
// }
// else
// {
// A = &const_cast<ParOperType &>(dynamic_cast<const ParOperType &>(op))
// .ParallelAssemble();
// A_G = &const_cast<ParOperType &>(dynamic_cast<const ParOperType &>(op_G))
// .ParallelAssemble();
// }

const auto *PtAP_G = dynamic_cast<const ParOperType *>(&op_G);
MFEM_VERIFY(PtAP_G,
"ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!");
dbc_tdof_list_G = PtAP_G->GetEssentialTrueDofs();

r.SetSize(A->Height());
x_G.SetSize(A_G->Height());
y_G.SetSize(A_G->Height());
r.SetSize(op.Height());
x_G.SetSize(op_G.Height());
y_G.SetSize(op_G.Height());

// Set up smoothers for A and A_G.
B->SetOperator(*A);
B_G->SetOperator(*A_G);
B->SetOperator(op);
B_G->SetOperator(op_G);
}

namespace
{

inline void RealAddMult(Operator &op, const Vector &x, Vector &y)
inline void RealAddMult(const Operator &op, const Vector &x, Vector &y)
{
op.AddMult(x, y, 1.0);
}

inline void RealAddMult(Operator &op, const ComplexVector &x, ComplexVector &y)
inline void RealAddMult(const Operator &op, const ComplexVector &x, ComplexVector &y)
{
op.AddMult(x.Real(), y.Real(), 1.0);
op.AddMult(x.Imag(), y.Imag(), 1.0);
}

inline void RealMultTranspose(Operator &op, const Vector &x, Vector &y)
inline void RealMultTranspose(const Operator &op, const Vector &x, Vector &y)
{
op.MultTranspose(x, y);
}

inline void RealMultTranspose(Operator &op, const ComplexVector &x, ComplexVector &y)
inline void RealMultTranspose(const Operator &op, const ComplexVector &x, ComplexVector &y)
{
op.MultTranspose(x.Real(), y.Real());
op.MultTranspose(x.Imag(), y.Imag());
Expand All @@ -104,7 +120,7 @@ void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
RealMultTranspose(*G, r, x_G);
if (dbc_tdof_list_G)
{
x_G.SetSubVector(*dbc_tdof_list_G, 0.0);
linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
}
B_G->Mult(x_G, y_G);
RealAddMult(*G, y_G, y);
Expand Down Expand Up @@ -132,7 +148,7 @@ void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &
}
if (dbc_tdof_list_G)
{
x_G.SetSubVector(*dbc_tdof_list_G, 0.0);
linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
}
B_G->MultTranspose(x_G, y_G);
RealAddMult(*G, y_G, y);
Expand Down
2 changes: 1 addition & 1 deletion palace/linalg/divfree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class DivFreeSolver
// Apply essential BC and solve the linear system.
if (bdr_tdof_list_M)
{
rhs.SetSubVector(*bdr_tdof_list_M, 0.0);
linalg::SetSubVector(rhs, *bdr_tdof_list_M, 0.0);
}
ksp->Mult(rhs, psi);

Expand Down
Loading

0 comments on commit 0817fb8

Please sign in to comment.