Minor performance improvements for operator application and multigrid…

… smoothers
awslabs · Aug 8, 2023 · 0817fb8 · 0817fb8
1 parent b1c0f27
commit 0817fb8
Show file tree

Hide file tree

Showing 12 changed files with 549 additions and 383 deletions.
diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp
@@ -20,21 +20,43 @@ void GetInverseDiagonal(const ParOperator &A, Vector &dinv)
   dinv.Reciprocal();
 }
 
-void GetInverseDiagonal(const ComplexParOperator &A, ComplexVector &dinv)
+void GetInverseDiagonal(const ComplexParOperator &A, Vector &dinv)
 {
-  MFEM_VERIFY(A.HasReal() || A.HasImag(),
-              "Invalid zero ComplexOperator for ChebyshevSmoother!");
+  MFEM_VERIFY(A.HasReal() && !A.HasImag(),
+              "ComplexOperator for ChebyshevSmoother must be real-valued for now!");
   dinv.SetSize(A.Height());
-  dinv = 0.0;
-  if (A.HasReal())
-  {
-    A.Real()->AssembleDiagonal(dinv.Real());
-  }
-  if (A.HasImag())
-  {
-    A.Imag()->AssembleDiagonal(dinv.Imag());
-  }
+  A.Real()->AssembleDiagonal(dinv);
   dinv.Reciprocal();
+  // MFEM_VERIFY(A.HasReal() || A.HasImag(),
+  //             "Invalid zero ComplexOperator for ChebyshevSmoother!");
+  // dinv.SetSize(A.Height());
+  // dinv.SetSize(A.Height());
+  // dinv = 0.0;
+  // if (A.HasReal())
+  // {
+  //   A.Real()->AssembleDiagonal(dinv.Real());
+  // }
+  // if (A.HasImag())
+  // {
+  //   A.Imag()->AssembleDiagonal(dinv.Imag());
+  // }
+  // dinv.Reciprocal();
+}
+
+double GetLambdaMax(MPI_Comm comm, const Operator &A, const Vector &dinv)
+{
+  DiagonalOperator Dinv(dinv);
+  ProductOperator DinvA(Dinv, A);
+  return linalg::SpectralNorm(comm, DinvA, false);
+}
+
+double GetLambdaMax(MPI_Comm comm, const ComplexOperator &A, const Vector &dinv)
+{
+  MFEM_VERIFY(A.HasReal() && !A.HasImag(),
+              "ComplexOperator for ChebyshevSmoother must be real-valued for now!");
+  DiagonalOperator Dinv(dinv);
+  ProductOperator DinvA(Dinv, *A.Real());
+  return linalg::SpectralNorm(comm, DinvA, false);
 }
 
 }  // namespace
@@ -51,64 +73,85 @@ void ChebyshevSmoother<OperType>::SetOperator(const OperType &op)
   typedef typename std::conditional<std::is_same<OperType, ComplexOperator>::value,
                                     ComplexParOperator, ParOperator>::type ParOperType;
 
+  A = &op;
+  // if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  // {
+  //   A = &const_cast<ParOperator &>(dynamic_cast<const ParOperator &>(*op.Real()))
+  //            .ParallelAssemble();
+  // }
+  // else
+  // {
+  //   A = &const_cast<ParOperType &>(dynamic_cast<const ParOperator &>(op))
+  //            .ParallelAssemble();
+  // }
+  r.SetSize(op.Height());
+  d.SetSize(op.Height());
+
   const auto *PtAP = dynamic_cast<const ParOperType *>(&op);
   MFEM_VERIFY(PtAP,
               "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!");
   GetInverseDiagonal(*PtAP, dinv);
 
-  A = &op;
-  r.SetSize(A->Height());
-  d.SetSize(A->Height());
-
   // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See
   // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing:
   // polynomial versus Gauss-Seidel, JCP (2003).
-  BaseDiagonalOperator<OperType> Dinv(dinv);
-  BaseProductOperator<OperType> DinvA(Dinv, *A);
-  lambda_max = 1.01 * linalg::SpectralNorm(PtAP->GetComm(), DinvA, false);
+  lambda_max = 1.01 * GetLambdaMax(PtAP->GetComm(), *A, dinv);
 }
 
 namespace
 {
 
 template <bool Transpose = false>
-inline void ApplyOp(const Operator &A, const Vector &x, Vector &y, const double a)
+inline void ApplyOp(const Operator &A, const Vector &x, Vector &y)
+{
+  A.Mult(x, y);
+}
+
+// template <bool Transpose = false>
+// inline void ApplyOp(const Operator &A, const ComplexVector &x, ComplexVector &y)
+// {
+//   A.Mult(x.Real(), y.Real());
+//   A.Mult(x.Imag(), y.Imag());
+// }
+
+template <bool Transpose = false>
+inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y)
 {
-  if (a == 0.0)
+  if constexpr (!Transpose)
   {
     A.Mult(x, y);
   }
   else
   {
-    A.AddMult(x, y, a);
+    A.MultHermitianTranspose(x, y);
   }
 }
 
+template <bool Transpose = false>
+inline void ApplyOp(const Operator &A, const Vector &x, Vector &y, const double a)
+{
+  A.AddMult(x, y, a);
+}
+
+// template <bool Transpose = false>
+// inline void ApplyOp(const Operator &A, const ComplexVector &x, ComplexVector &y,
+//                     const double a)
+// {
+//   A.AddMult(x.Real(), y.Real(), a);
+//   A.AddMult(x.Imag(), y.Imag(), a);
+// }
+
 template <bool Transpose = false>
 inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y,
                     const double a)
 {
   if constexpr (!Transpose)
   {
-    if (a == 0.0)
-    {
-      A.Mult(x, y);
-    }
-    else
-    {
-      A.AddMult(x, y, a);
-    }
+    A.AddMult(x, y, a);
   }
   else
   {
-    if (a == 0.0)
-    {
-      A.MultHermitianTranspose(x, y);
-    }
-    else
-    {
-      A.AddMultHermitianTranspose(x, y, a);
-    }
+    A.AddMultHermitianTranspose(x, y, a);
   }
 }
 
@@ -123,12 +166,13 @@ inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &
 }
 
 template <bool Transpose = false>
-inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const ComplexVector &r,
+inline void ApplyOrder0(const double sr, const Vector &dinv, const ComplexVector &r,
                         ComplexVector &d)
 {
   const int N = dinv.Size();
-  const auto *DIR = dinv.Real().Read();
-  const auto *DII = dinv.Imag().Read();
+  // const auto *DIR = dinv.Real().Read();
+  // const auto *DII = dinv.Imag().Read();
+  const auto *DIR = dinv.Read();
   const auto *RR = r.Real().Read();
   const auto *RI = r.Imag().Read();
   auto *DR = d.Real().ReadWrite();
@@ -138,19 +182,21 @@ inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const Comple
     mfem::forall(N,
                  [=] MFEM_HOST_DEVICE(int i)
                  {
-                   const double t = DII[i] * RR[i] + DIR[i] * RI[i];
-                   DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
-                   DI[i] = sr * t;
+                   // DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
+                   // DI[i] = sr * (DII[i] * RR[i] + DIR[i] * RI[i]);
+                   DR[i] = sr * DIR[i] * RR[i];
+                   DI[i] = sr * DIR[i] * RI[i];
                  });
   }
   else
   {
     mfem::forall(N,
                  [=] MFEM_HOST_DEVICE(int i)
                  {
-                   const double t = -DII[i] * RR[i] + DIR[i] * RI[i];
-                   DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
-                   DI[i] = sr * t;
+                   // DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
+                   // DI[i] = sr * (-DII[i] * RR[i] + DIR[i] * RI[i]);
+                   DR[i] = sr * DIR[i] * RR[i];
+                   DI[i] = sr * DIR[i] * RI[i];
                  });
   }
 }
@@ -167,12 +213,13 @@ inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv,
 }
 
 template <bool Transpose = false>
-inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &dinv,
+inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv,
                         const ComplexVector &r, ComplexVector &d)
 {
   const int N = dinv.Size();
-  const auto *DIR = dinv.Real().Read();
-  const auto *DII = dinv.Imag().Read();
+  // const auto *DIR = dinv.Real().Read();
+  // const auto *DII = dinv.Imag().Read();
+  const auto *DIR = dinv.Read();
   const auto *RR = r.Real().Read();
   const auto *RI = r.Imag().Read();
   auto *DR = d.Real().ReadWrite();
@@ -182,19 +229,21 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d
     mfem::forall(N,
                  [=] MFEM_HOST_DEVICE(int i)
                  {
-                   const double t = DII[i] * RR[i] + DIR[i] * RI[i];
-                   DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
-                   DI[i] = sd * DI[i] + sr * t;
+                   // DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]);
+                   // DI[i] = sd * DI[i] + sr * (DII[i] * RR[i] + DIR[i] * RI[i]);
+                   DR[i] = sd * DR[i] + sr * DIR[i] * RR[i];
+                   DI[i] = sd * DI[i] + sr * DIR[i] * RI[i];
                  });
   }
   else
   {
     mfem::forall(N,
                  [=] MFEM_HOST_DEVICE(int i)
                  {
-                   const double t = -DII[i] * RR[i] + DIR[i] * RI[i];
-                   DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
-                   DI[i] = sd * DI[i] + sr * t;
+                   // DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]);
+                   // DI[i] = sd * DI[i] + sr * (-DII[i] * RR[i] + DIR[i] * RI[i]);
+                   DR[i] = sd * DR[i] + sr * DIR[i] * RR[i];
+                   DI[i] = sd * DI[i] + sr * DIR[i] * RI[i];
                  });
   }
 }
@@ -209,7 +258,7 @@ void ChebyshevSmoother<OperType>::Mult(const VecType &x, VecType &y) const
   {
     if (this->initial_guess || it > 0)
     {
-      ApplyOp(*A, y, r, 0.0);
+      ApplyOp(*A, y, r);
       linalg::AXPBY(1.0, x, -1.0, r);
     }
     else

diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp
@@ -31,8 +31,8 @@ class ChebyshevSmoother : public Solver<OperType>
   // System matrix (not owned).
   const OperType *A;
 
-  // Inverse diagonal scaling of the operator.
-  VecType dinv;
+  // Inverse diagonal scaling of the operator (real-valued for now).
+  Vector dinv;
 
   // Maximum operator eigenvalue for Chebyshev polynomial smoothing.
   double lambda_max;

diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp
@@ -26,6 +26,8 @@ DistRelaxationSmoother<OperType>::DistRelaxationSmoother(
     grad->Assemble();
     grad->Finalize();
     G = std::make_unique<ParOperator>(std::move(grad), h1_fespace, nd_fespace, true);
+    // ParOperator RAP_G(std::move(grad), h1_fespace, nd_fespace, true);
+    // G = RAP_G.StealParallelAssemble();
   }
 
   // Initialize smoothers.
@@ -46,41 +48,55 @@ void DistRelaxationSmoother<OperType>::SetOperators(const OperType &op,
               "Invalid operator sizes for DistRelaxationSmoother!");
   A = &op;
   A_G = &op_G;
+  // if constexpr (std::is_same<OperType, ComplexOperator>::value)
+  // {
+  //   A = &const_cast<ParOperator &>(dynamic_cast<const ParOperator &>(*op.Real()))
+  //            .ParallelAssemble();
+  //   A_G = &const_cast<ParOperator &>(dynamic_cast<const ParOperator &>(*op_G.Real()))
+  //              .ParallelAssemble();
+  // }
+  // else
+  // {
+  //   A = &const_cast<ParOperType &>(dynamic_cast<const ParOperType &>(op))
+  //            .ParallelAssemble();
+  //   A_G = &const_cast<ParOperType &>(dynamic_cast<const ParOperType &>(op_G))
+  //              .ParallelAssemble();
+  // }
 
   const auto *PtAP_G = dynamic_cast<const ParOperType *>(&op_G);
   MFEM_VERIFY(PtAP_G,
               "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!");
   dbc_tdof_list_G = PtAP_G->GetEssentialTrueDofs();
 
-  r.SetSize(A->Height());
-  x_G.SetSize(A_G->Height());
-  y_G.SetSize(A_G->Height());
+  r.SetSize(op.Height());
+  x_G.SetSize(op_G.Height());
+  y_G.SetSize(op_G.Height());
 
   // Set up smoothers for A and A_G.
-  B->SetOperator(*A);
-  B_G->SetOperator(*A_G);
+  B->SetOperator(op);
+  B_G->SetOperator(op_G);
 }
 
 namespace
 {
 
-inline void RealAddMult(Operator &op, const Vector &x, Vector &y)
+inline void RealAddMult(const Operator &op, const Vector &x, Vector &y)
 {
   op.AddMult(x, y, 1.0);
 }
 
-inline void RealAddMult(Operator &op, const ComplexVector &x, ComplexVector &y)
+inline void RealAddMult(const Operator &op, const ComplexVector &x, ComplexVector &y)
 {
   op.AddMult(x.Real(), y.Real(), 1.0);
   op.AddMult(x.Imag(), y.Imag(), 1.0);
 }
 
-inline void RealMultTranspose(Operator &op, const Vector &x, Vector &y)
+inline void RealMultTranspose(const Operator &op, const Vector &x, Vector &y)
 {
   op.MultTranspose(x, y);
 }
 
-inline void RealMultTranspose(Operator &op, const ComplexVector &x, ComplexVector &y)
+inline void RealMultTranspose(const Operator &op, const ComplexVector &x, ComplexVector &y)
 {
   op.MultTranspose(x.Real(), y.Real());
   op.MultTranspose(x.Imag(), y.Imag());
@@ -104,7 +120,7 @@ void DistRelaxationSmoother<OperType>::Mult(const VecType &x, VecType &y) const
     RealMultTranspose(*G, r, x_G);
     if (dbc_tdof_list_G)
     {
-      x_G.SetSubVector(*dbc_tdof_list_G, 0.0);
+      linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
     }
     B_G->Mult(x_G, y_G);
     RealAddMult(*G, y_G, y);
@@ -132,7 +148,7 @@ void DistRelaxationSmoother<OperType>::MultTranspose(const VecType &x, VecType &
     }
     if (dbc_tdof_list_G)
     {
-      x_G.SetSubVector(*dbc_tdof_list_G, 0.0);
+      linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0);
     }
     B_G->MultTranspose(x_G, y_G);
     RealAddMult(*G, y_G, y);

diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp
@@ -59,7 +59,7 @@ class DivFreeSolver
     // Apply essential BC and solve the linear system.
     if (bdr_tdof_list_M)
     {
-      rhs.SetSubVector(*bdr_tdof_list_M, 0.0);
+      linalg::SetSubVector(rhs, *bdr_tdof_list_M, 0.0);
     }
     ksp->Mult(rhs, psi);