diff --git a/cmake/ExternalHYPRE.cmake b/cmake/ExternalHYPRE.cmake index 1e2d74090..96bfb8a38 100644 --- a/cmake/ExternalHYPRE.cmake +++ b/cmake/ExternalHYPRE.cmake @@ -141,7 +141,7 @@ if(PALACE_WITH_CUDA) "--with-cuda-home=${CUDA_DIR}" "--enable-curand" "--enable-cusparse" - "--enable-device-memory-pool" + # "--enable-device-memory-pool" ) if(NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") list(GET CMAKE_CUDA_ARCHITECTURES 0 HYPRE_CUDA_ARCH) diff --git a/cmake/ExternalMFEM.cmake b/cmake/ExternalMFEM.cmake index 0d5c3e380..1b3d3c3fd 100644 --- a/cmake/ExternalMFEM.cmake +++ b/cmake/ExternalMFEM.cmake @@ -364,6 +364,7 @@ set(MFEM_PATCH_FILES "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_par_tet_mesh_fix.diff" "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_mesh_prism_vtu_fix.diff" "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_workspace_vectors.diff" + "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_workspace_vectors_2.diff" "${CMAKE_SOURCE_DIR}/extern/patch/mfem/patch_hypre_runtime_compute_policy.diff" ) diff --git a/extern/patch/mfem/patch_workspace_vectors.diff b/extern/patch/mfem/patch_workspace_vectors.diff index 04522f66a..e375d3855 100644 --- a/extern/patch/mfem/patch_workspace_vectors.diff +++ b/extern/patch/mfem/patch_workspace_vectors.diff @@ -1068,10 +1068,10 @@ index 000000000..b5c0ebae8 + WorkspaceVector(WorkspaceVector &&other); + + /// No copy constructor. -+ WorkspaceVector(WorkspaceVector &other) = delete; ++ WorkspaceVector(const WorkspaceVector &other) = delete; + + /// Copy assignment: copy contents of vector, not metadata. -+ WorkspaceVector& operator=(WorkspaceVector &other) ++ WorkspaceVector& operator=(const WorkspaceVector &other) + { + Vector::operator=(other); + return *this; diff --git a/extern/patch/mfem/patch_workspace_vectors_2.diff b/extern/patch/mfem/patch_workspace_vectors_2.diff new file mode 100644 index 000000000..d93b0671f --- /dev/null +++ b/extern/patch/mfem/patch_workspace_vectors_2.diff @@ -0,0 +1,74 @@ +diff --git a/general/workspace.cpp b/general/workspace.cpp +index ad8210a61..3ec077dfa 100644 +--- a/general/workspace.cpp ++++ b/general/workspace.cpp +@@ -15,11 +15,11 @@ namespace mfem + { + + WorkspaceVector::WorkspaceVector( +- internal::WorkspaceChunk &chunk_, int offset_, int n) ++ internal::WorkspaceChunk &chunk_, int offset_, int n, int padding) + : Vector(chunk_.GetData(), chunk_.GetOffset(), n), + chunk(chunk_), + offset(offset_), +- original_size(n) ++ original_size(n + padding) + { + UseDevice(true); + } +@@ -48,8 +48,11 @@ WorkspaceChunk::WorkspaceChunk(int capacity) + WorkspaceVector WorkspaceChunk::NewVector(int n) + { + MFEM_ASSERT(HasCapacityFor(n), "Requested vector is too large."); +- WorkspaceVector vector(*this, offset, n); +- offset += n; ++ constexpr int alignment = 16; ++ const int s = (n * sizeof(double)) % alignment; ++ const int padding = (s > 0) ? (alignment - s) / sizeof(double) : 0; ++ WorkspaceVector vector(*this, offset, n, padding); ++ offset += n + padding; + vector_count += 1; + return vector; + } +diff --git a/general/workspace.hpp b/general/workspace.hpp +index b5c0ebae8..e41e71b11 100644 +--- a/general/workspace.hpp ++++ b/general/workspace.hpp +@@ -50,7 +50,8 @@ class WorkspaceVector : public Vector + bool moved_from = false; + + /// Private constructor, create with Workspace::NewVector() instead. +- WorkspaceVector(internal::WorkspaceChunk &chunk_, int offset_, int n); ++ WorkspaceVector(internal::WorkspaceChunk &chunk_, int offset_, int n, ++ int padding = 0); + + public: + /// @brief Move constructor. The moved-from WorkspaceVector has @a +@@ -104,6 +105,9 @@ public: + /// Create a WorkspaceChunk with the given @a capacity. + WorkspaceChunk(int capacity); + ++ /// Return the data offset. ++ int GetOffset() const { return offset; } ++ + /// @brief Return the available capacity (i.e. the largest vector that will + /// fit in this chunk). + int GetAvailableCapacity() const { return data.Size() - offset; } +@@ -115,15 +119,12 @@ public: + /// "original capacity" remains unchained. + int GetOriginalCapacity() const { return original_capacity; } + +- /// Return the data offset. +- int GetOffset() const { return offset; } ++ /// Returns true if this chunk can fit a new vector of size @a n. ++ bool HasCapacityFor(int n) const { return n <= GetAvailableCapacity(); } + + /// Sets whether the chunk is in the front of the list + void SetFront(bool front_) { front = front_; } + +- /// Returns true if this chunk can fit a new vector of size @a n. +- bool HasCapacityFor(int n) const { return n <= GetAvailableCapacity(); } +- + /// Returns true if this chunk is empty. + bool IsEmpty() const { return vector_count == 0; } + diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 241e99c0c..87f508241 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -19,6 +19,7 @@ #include "utils/communication.hpp" #include "utils/iodata.hpp" #include "utils/timer.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -177,7 +178,7 @@ EigenSolver::Solve(const std::vector> &mesh) const // projected appropriately. if (iodata.solver.eigenmode.init_v0) { - ComplexVector v0; + auto v0 = workspace::NewVector(E.Size()); if (iodata.solver.eigenmode.init_v0_const) { Mpi::Print(" Using constant starting vector\n"); @@ -196,8 +197,7 @@ EigenSolver::Solve(const std::vector> &mesh) const // Debug // const auto &Grad = spaceop.GetGradMatrix(); - // ComplexVector r0(Grad->Width()); - // r0.UseDevice(true); + // auto r0 = workspace::NewVector(Grad.Width()); // Grad.MultTranspose(v0.Real(), r0.Real()); // Grad.MultTranspose(v0.Imag(), r0.Imag()); // r0.Print(); diff --git a/palace/fem/fespace.hpp b/palace/fem/fespace.hpp index b5691d270..1dc355913 100644 --- a/palace/fem/fespace.hpp +++ b/palace/fem/fespace.hpp @@ -31,9 +31,6 @@ class FiniteElementSpace mutable ceed::CeedObjectMap basis; mutable ceed::CeedObjectMap restr, interp_restr, interp_range_restr; - // Temporary storage for operator applications. - mutable ComplexVector tx, lx, ly; - bool HasUniqueInterpRestriction(const mfem::FiniteElement &fe) const { // For interpolation operators and tensor-product elements, we need native (not @@ -63,9 +60,6 @@ class FiniteElementSpace : fespace(&mesh.Get(), std::forward(args)...), mesh(mesh) { ResetCeedObjects(); - tx.UseDevice(true); - lx.UseDevice(true); - ly.UseDevice(true); } virtual ~FiniteElementSpace() { ResetCeedObjects(); } @@ -129,48 +123,6 @@ class FiniteElementSpace mfem::Geometry::Type geom, const std::vector &indices, bool is_interp = false, bool is_interp_range = false); - template - auto &GetTVector() const - { - tx.SetSize(GetTrueVSize()); - if constexpr (std::is_same::value) - { - return tx; - } - else - { - return tx.Real(); - } - } - - template - auto &GetLVector() const - { - lx.SetSize(GetVSize()); - if constexpr (std::is_same::value) - { - return lx; - } - else - { - return lx.Real(); - } - } - - template - auto &GetLVector2() const - { - ly.SetSize(GetVSize()); - if constexpr (std::is_same::value) - { - return ly; - } - else - { - return ly.Real(); - } - } - // Get the associated MPI communicator. MPI_Comm GetComm() const { return fespace.GetComm(); } }; diff --git a/palace/fem/libceed/operator.cpp b/palace/fem/libceed/operator.cpp index 4fafb72d4..ad00f9e9f 100644 --- a/palace/fem/libceed/operator.cpp +++ b/palace/fem/libceed/operator.cpp @@ -10,6 +10,7 @@ #include "fem/fespace.hpp" #include "linalg/hypre.hpp" #include "utils/omp.hpp" +#include "utils/workspace.hpp" namespace palace::ceed { @@ -37,7 +38,6 @@ Operator::Operator(int h, int w) : palace::Operator(h, w) u[id] = loc_u; v[id] = loc_v; } - temp.UseDevice(true); } Operator::~Operator() @@ -174,15 +174,15 @@ void Operator::AddMult(const Vector &x, Vector &y, const double a) const MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!"); if (dof_multiplicity.Size() > 0) { - temp.SetSize(height); - temp = 0.0; - CeedAddMult(op, u, v, x, temp); + auto t = workspace::NewVector(height); + t = 0.0; + CeedAddMult(op, u, v, x, t); { const auto *d_dof_multiplicity = dof_multiplicity.Read(); - const auto *d_temp = temp.Read(); + const auto *d_t = t.Read(); auto *d_y = y.ReadWrite(); mfem::forall(height, [=] MFEM_HOST_DEVICE(int i) - { d_y[i] += d_dof_multiplicity[i] * d_temp[i]; }); + { d_y[i] += d_dof_multiplicity[i] * d_t[i]; }); } } else @@ -203,15 +203,15 @@ void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) cons "ceed::Operator::AddMultTranspose only supports coefficient = 1.0!"); if (dof_multiplicity.Size() > 0) { - temp.SetSize(height); + auto t = workspace::NewVector(height); { const auto *d_dof_multiplicity = dof_multiplicity.Read(); const auto *d_x = x.Read(); - auto *d_temp = temp.Write(); + auto *d_t = t.Write(); mfem::forall(height, [=] MFEM_HOST_DEVICE(int i) - { d_temp[i] = d_dof_multiplicity[i] * d_x[i]; }); + { d_t[i] = d_dof_multiplicity[i] * d_x[i]; }); } - CeedAddMult(op_t, v, u, temp, y); + CeedAddMult(op_t, v, u, t, y); } else { diff --git a/palace/fem/libceed/operator.hpp b/palace/fem/libceed/operator.hpp index 72848fcea..060aac844 100644 --- a/palace/fem/libceed/operator.hpp +++ b/palace/fem/libceed/operator.hpp @@ -35,7 +35,6 @@ class Operator : public palace::Operator std::vector op, op_t; std::vector u, v; Vector dof_multiplicity; - mutable Vector temp; public: Operator(int h, int w); diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index 431ff5acc..fa0b53b7f 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -15,6 +15,7 @@ // clang-format on #include "linalg/divfree.hpp" #include "utils/communication.hpp" +#include "utils/workspace.hpp" namespace { @@ -475,13 +476,15 @@ double ArpackEigenvalueSolver::GetError(int i, EigenvalueSolver::ErrorType type) void ArpackEigenvalueSolver::RescaleEigenvectors(int num_eig) { + auto x = workspace::NewVector(n); + auto r = workspace::NewVector(n); res = std::make_unique(num_eig); xscale = std::make_unique(num_eig); for (int i = 0; i < num_eig; i++) { - x1.Set(V.get() + i * n, n, false); - xscale.get()[i] = 1.0 / GetEigenvectorNorm(x1, y1); - res.get()[i] = GetResidualNorm(eig.get()[i], x1, y1) / linalg::Norml2(comm, x1); + x.Set(V.get() + i * n, n, false); + xscale.get()[i] = 1.0 / GetEigenvectorNorm(x, r); + res.get()[i] = GetResidualNorm(eig.get()[i], x, r) / linalg::Norml2(comm, x); } } @@ -512,14 +515,6 @@ void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperat delta = 2.0 / normK; } } - - // Set up workspace. - x1.SetSize(opK->Height()); - y1.SetSize(opK->Height()); - z1.SetSize(opK->Height()); - x1.UseDevice(true); - y1.UseDevice(true); - z1.UseDevice(true); n = opK->Height(); } @@ -527,7 +522,8 @@ int ArpackEPSSolver::Solve() { // Set some defaults (default maximum iterations from SLEPc). CheckParameters(); - HYPRE_BigInt N = linalg::GlobalSize(comm, z1); + HYPRE_BigInt N = n; + Mpi::GlobalSum(1, &N, comm); if (ncv > N) { ncv = mfem::internal::to_int(N); @@ -580,37 +576,42 @@ void ArpackEPSSolver::ApplyOp(const std::complex *px, // Case 2: Shift-and-invert spectral transformation (opInv = (K - σ M)⁻¹) // y = (K - σ M)⁻¹ M x . // The input pointers are always to host memory (ARPACK runs on host). - x1.Set(px, n, false); + auto x = workspace::NewVector(n); + auto y = workspace::NewVector(n); + auto z = workspace::NewVector(n); + x.Set(px, n, false); if (!sinvert) { - opK->Mult(x1, z1); - opInv->Mult(z1, y1); - y1 *= 1.0 / gamma; + opK->Mult(x, z); + opInv->Mult(z, y); + y *= 1.0 / gamma; } else { - opM->Mult(x1, z1); - opInv->Mult(z1, y1); - y1 *= gamma; + opM->Mult(x, z); + opInv->Mult(z, y); + y *= gamma; } if (opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); - opProj->Mult(y1); - // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(comm, y1)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y)); + opProj->Mult(y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(comm, y)); } - y1.Get(py, n, false); + y.Get(py, n, false); } void ArpackEPSSolver::ApplyOpB(const std::complex *px, std::complex *py) const { MFEM_VERIFY(opB, "No B operator for weighted inner product in ARPACK solve!"); - x1.Set(px, n, false); - opB->Mult(x1.Real(), y1.Real()); - opB->Mult(x1.Imag(), y1.Imag()); - y1 *= delta * gamma; - y1.Get(py, n, false); + auto x = workspace::NewVector(n); + auto y = workspace::NewVector(n); + x.Set(px, n, false); + opB->Mult(x.Real(), y.Real()); + opB->Mult(x.Imag(), y.Imag()); + y *= delta * gamma; + y.Get(py, n, false); } double ArpackEPSSolver::GetResidualNorm(std::complex l, const ComplexVector &x, @@ -668,18 +669,6 @@ void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperat delta = 2.0 / (normK + gamma * normC); } } - - // Set up workspace. - x1.SetSize(opK->Height()); - x2.SetSize(opK->Height()); - y1.SetSize(opK->Height()); - y2.SetSize(opK->Height()); - z1.SetSize(opK->Height()); - x1.UseDevice(true); - x2.UseDevice(true); - y1.UseDevice(true); - y2.UseDevice(true); - z1.UseDevice(true); n = opK->Height(); } @@ -688,7 +677,8 @@ int ArpackPEPSolver::Solve() // Set some defaults (from SLEPc ARPACK interface). The problem size is the size of the // 2x2 block linearized problem. CheckParameters(); - HYPRE_BigInt N = linalg::GlobalSize(comm, z1); + HYPRE_BigInt N = n; + Mpi::GlobalSum(1, &N, comm); if (ncv > 2 * N) { ncv = mfem::internal::to_int(2 * N); @@ -754,6 +744,11 @@ void ArpackPEPSolver::ApplyOp(const std::complex *px, // L₀ = [ -K 0 ] L₁ = [ C M ] // [ 0 M ] , [ M 0 ] . // The input pointers are always to host memory (ARPACK runs on host). + auto x1 = workspace::NewVector(n); + auto x2 = workspace::NewVector(n); + auto y1 = workspace::NewVector(n); + auto y2 = workspace::NewVector(n); + auto z = workspace::NewVector(n); x1.Set(px, n, false); x2.Set(px + n, n, false); if (!sinvert) @@ -765,10 +760,9 @@ void ArpackPEPSolver::ApplyOp(const std::complex *px, opProj->Mult(y1); // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); } - - opK->Mult(x1, z1); - opC->AddMult(x2, z1, std::complex(gamma, 0.0)); - opInv->Mult(z1, y2); + opK->Mult(x1, z); + opC->AddMult(x2, z, std::complex(gamma, 0.0)); + opInv->Mult(z, y2); y2 *= -1.0 / (gamma * gamma); if (opProj) { @@ -780,9 +774,9 @@ void ArpackPEPSolver::ApplyOp(const std::complex *px, else { y2.AXPBYPCZ(sigma, x1, gamma, x2, 0.0); // Just temporarily - opM->Mult(y2, z1); - opC->AddMult(x1, z1, std::complex(1.0, 0.0)); - opInv->Mult(z1, y1); + opM->Mult(y2, z); + opC->AddMult(x1, z, std::complex(1.0, 0.0)); + opInv->Mult(z, y1); y1 *= -gamma; if (opProj) { @@ -807,6 +801,10 @@ void ArpackPEPSolver::ApplyOpB(const std::complex *px, std::complex *py) const { MFEM_VERIFY(opB, "No B operator for weighted inner product in ARPACK solve!"); + auto x1 = workspace::NewVector(n); + auto x2 = workspace::NewVector(n); + auto y1 = workspace::NewVector(n); + auto y2 = workspace::NewVector(n); x1.Set(px, n, false); x2.Set(px + n, n, false); opB->Mult(x1.Real(), y1.Real()); diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index 0f37cf944..7e80662ab 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -84,9 +84,6 @@ class ArpackEigenvalueSolver : public EigenvalueSolver // which case identity is used. const Operator *opB; - // Workspace vector for operator applications. - mutable ComplexVector x1, y1, z1; - // Perform the ARPACK RCI loop. int SolveInternal(int n, std::complex *r, std::complex *V, std::complex *eig, int *perm); @@ -218,9 +215,6 @@ class ArpackPEPSolver : public ArpackEigenvalueSolver // Operator norms for scaling. mutable double normK, normC, normM; - // Workspace vectors for operator applications. - mutable ComplexVector x2, y2; - protected: void ApplyOp(const std::complex *px, std::complex *py) const override; void ApplyOpB(const std::complex *px, std::complex *py) const override; diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 788796db5..9d59d4be1 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -4,6 +4,7 @@ #include "chebyshev.hpp" #include +#include "utils/workspace.hpp" namespace palace { @@ -65,22 +66,28 @@ inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVec } template -inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &d) +inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &d, + Vector &y) { - const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice(); + const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice() || y.UseDevice(); const int N = d.Size(); const auto *DI = dinv.Read(use_dev); const auto *R = r.Read(use_dev); auto *D = d.Write(use_dev); + auto *Y = y.ReadWrite(use_dev); mfem::forall_switch(use_dev, N, - [=] MFEM_HOST_DEVICE(int i) { D[i] = sr * DI[i] * R[i]; }); + [=] MFEM_HOST_DEVICE(int i) + { + D[i] = sr * DI[i] * R[i]; + Y[i] += D[i]; + }); } template inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const ComplexVector &r, - ComplexVector &d) + ComplexVector &d, ComplexVector &y) { - const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice(); + const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice() || y.UseDevice(); const int N = dinv.Size(); const auto *DIR = dinv.Real().Read(use_dev); const auto *DII = dinv.Imag().Read(use_dev); @@ -88,6 +95,8 @@ inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const Comple const auto *RI = r.Imag().Read(use_dev); auto *DR = d.Real().Write(use_dev); auto *DI = d.Imag().Write(use_dev); + auto *YR = y.Real().ReadWrite(use_dev); + auto *YI = y.Imag().ReadWrite(use_dev); if constexpr (!Transpose) { mfem::forall_switch(use_dev, N, @@ -95,6 +104,8 @@ inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const Comple { DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]); DI[i] = sr * (DII[i] * RR[i] + DIR[i] * RI[i]); + YR[i] += DR[i]; + YI[i] += DI[i]; }); } else @@ -104,28 +115,35 @@ inline void ApplyOrder0(const double sr, const ComplexVector &dinv, const Comple { DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]); DI[i] = sr * (-DII[i] * RR[i] + DIR[i] * RI[i]); + YR[i] += DR[i]; + YI[i] += DI[i]; }); } } template inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv, - const Vector &r, Vector &d) + const Vector &r, Vector &d, Vector &y) { - const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice(); + const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice() || y.UseDevice(); const int N = dinv.Size(); const auto *DI = dinv.Read(use_dev); const auto *R = r.Read(use_dev); auto *D = d.ReadWrite(use_dev); - mfem::forall_switch( - use_dev, N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sd * D[i] + sr * DI[i] * R[i]; }); + auto *Y = y.ReadWrite(use_dev); + mfem::forall_switch(use_dev, N, + [=] MFEM_HOST_DEVICE(int i) + { + D[i] = sd * D[i] + sr * DI[i] * R[i]; + Y[i] += D[i]; + }); } template inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &dinv, - const ComplexVector &r, ComplexVector &d) + const ComplexVector &r, ComplexVector &d, ComplexVector &y) { - const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice(); + const bool use_dev = dinv.UseDevice() || r.UseDevice() || d.UseDevice() || y.UseDevice(); const int N = dinv.Size(); const auto *DIR = dinv.Real().Read(use_dev); const auto *DII = dinv.Imag().Read(use_dev); @@ -133,6 +151,8 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d const auto *RI = r.Imag().Read(use_dev); auto *DR = d.Real().ReadWrite(use_dev); auto *DI = d.Imag().ReadWrite(use_dev); + auto *YR = y.Real().ReadWrite(use_dev); + auto *YI = y.Imag().ReadWrite(use_dev); if constexpr (!Transpose) { mfem::forall_switch(use_dev, N, @@ -140,6 +160,8 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d { DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]); DI[i] = sd * DI[i] + sr * (DII[i] * RR[i] + DIR[i] * RI[i]); + YR[i] += DR[i]; + YI[i] += DI[i]; }); } else @@ -149,6 +171,8 @@ inline void ApplyOrderK(const double sd, const double sr, const ComplexVector &d { DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]); DI[i] = sd * DI[i] + sr * (-DII[i] * RR[i] + DIR[i] * RI[i]); + YR[i] += DR[i]; + YI[i] += DI[i]; }); } } @@ -168,9 +192,7 @@ template void ChebyshevSmoother::SetOperator(const OperType &op) { A = &op; - d.SetSize(op.Height()); dinv.SetSize(op.Height()); - d.UseDevice(true); dinv.UseDevice(true); op.AssembleDiagonal(dinv); dinv.Reciprocal(); @@ -186,15 +208,16 @@ void ChebyshevSmoother::SetOperator(const OperType &op) } template -void ChebyshevSmoother::Mult2(const VecType &x, VecType &y, VecType &r) const +void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) { + auto r = workspace::NewVector(this->height); if (this->initial_guess || it > 0) { ApplyOp(*A, y, r); - linalg::AXPBY(1.0, x, -1.0, r); + linalg::AXPBY(1.0, x, -1.0, r); } else { @@ -204,16 +227,15 @@ void ChebyshevSmoother::Mult2(const VecType &x, VecType &y, VecType &r // 4th-kind Chebyshev smoother, from Phillips and Fischer or Lottes (with k -> k + 1 // shift due to 1-based indexing). - ApplyOrder0(4.0 / (3.0 * lambda_max), dinv, r, d); + auto d = workspace::NewVector(this->height); + ApplyOrder0(4.0 / (3.0 * lambda_max), dinv, r, d, y); for (int k = 1; k < order; k++) { - y += d; ApplyOp(*A, d, r, -1.0); const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); - ApplyOrderK(sd, sr, dinv, r, d); + ApplyOrderK(sd, sr, dinv, r, d, y); } - y += d; } } @@ -231,9 +253,7 @@ template void ChebyshevSmoother1stKind::SetOperator(const OperType &op) { A = &op; - d.SetSize(op.Height()); dinv.SetSize(op.Height()); - d.UseDevice(true); dinv.UseDevice(true); op.AssembleDiagonal(dinv); dinv.Reciprocal(); @@ -256,16 +276,16 @@ void ChebyshevSmoother1stKind::SetOperator(const OperType &op) } template -void ChebyshevSmoother1stKind::Mult2(const VecType &x, VecType &y, - VecType &r) const +void ChebyshevSmoother1stKind::Mult(const VecType &x, VecType &y) const { // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) { + auto r = workspace::NewVector(this->height); if (this->initial_guess || it > 0) { ApplyOp(*A, y, r); - linalg::AXPBY(1.0, x, -1.0, r); + linalg::AXPBY(1.0, x, -1.0, r); } else { @@ -274,19 +294,18 @@ void ChebyshevSmoother1stKind::Mult2(const VecType &x, VecType &y, } // 1th-kind Chebyshev smoother, from Phillips and Fischer or Adams. - ApplyOrder0(1.0 / theta, dinv, r, d); + auto d = workspace::NewVector(this->height); + ApplyOrder0(1.0 / theta, dinv, r, d, y); double rhop = delta / theta; for (int k = 1; k < order; k++) { - y += d; ApplyOp(*A, d, r, -1.0); const double rho = 1.0 / (2.0 * theta / delta - rhop); const double sd = rho * rhop; const double sr = 2.0 * rho / delta; - ApplyOrderK(sd, sr, dinv, r, d); + ApplyOrderK(sd, sr, dinv, r, d, y); rhop = rho; } - y += d; } } diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index a2de1e340..7f36e8d4b 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -40,31 +40,16 @@ class ChebyshevSmoother : public Solver // Maximum operator eigenvalue for Chebyshev polynomial smoothing. double lambda_max, sf_max; - // Temporary vector for smoother application. - mutable VecType d; - public: ChebyshevSmoother(MPI_Comm comm, int smooth_it, int poly_order, double sf_max); void SetOperator(const OperType &op) override; - void Mult(const VecType &x, VecType &y) const override - { - MFEM_ABORT("ChebyshevSmoother implements Mult2 using an additional preallocated " - "temporary vector!"); - } + void Mult(const VecType &x, VecType &y) const override; void MultTranspose(const VecType &x, VecType &y) const override { - MFEM_ABORT("ChebyshevSmoother implements MultTranspose2 using an additional " - "preallocated temporary vector!"); - } - - void Mult2(const VecType &x, VecType &y, VecType &r) const override; - - void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override - { - Mult2(x, y, r); // Assumes operator symmetry + Mult(x, y); // Assumes operator symmetry } }; @@ -96,32 +81,17 @@ class ChebyshevSmoother1stKind : public Solver // polynomial smoothing. double theta, delta, sf_max, sf_min; - // Temporary vector for smoother application. - mutable VecType d; - public: ChebyshevSmoother1stKind(MPI_Comm comm, int smooth_it, int poly_order, double sf_max, double sf_min); void SetOperator(const OperType &op) override; - void Mult(const VecType &x, VecType &y) const override - { - MFEM_ABORT("ChebyshevSmoother1stKind implements Mult2 using an additional preallocated " - "temporary vector!"); - } + void Mult(const VecType &x, VecType &y) const override; void MultTranspose(const VecType &x, VecType &y) const override { - MFEM_ABORT("ChebyshevSmoother1stKind implements MultTranspose2 using an additional " - "preallocated temporary vector!"); - } - - void Mult2(const VecType &x, VecType &y, VecType &r) const override; - - void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override - { - Mult2(x, y, r); // Assumes operator symmetry + Mult(x, y); // Assumes operator symmetry } }; diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 48a1cfb3f..93694b66c 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -6,6 +6,7 @@ #include #include "linalg/chebyshev.hpp" #include "linalg/rap.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -48,12 +49,6 @@ void DistRelaxationSmoother::SetOperators(const OperType &op, "Invalid operator sizes for DistRelaxationSmoother!"); A = &op; A_G = &op_G; - x_G.SetSize(op_G.Height()); - y_G.SetSize(op_G.Height()); - r_G.SetSize(op_G.Height()); - x_G.UseDevice(true); - y_G.UseDevice(true); - r_G.UseDevice(true); const auto *PtAP_G = dynamic_cast(&op_G); MFEM_VERIFY(PtAP_G, @@ -96,57 +91,72 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple } // namespace template -void DistRelaxationSmoother::Mult2(const VecType &x, VecType &y, VecType &r) const +void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const { // Apply smoother. for (int it = 0; it < pc_it; it++) { // y = y + B (x - A y) B->SetInitialGuess(this->initial_guess || it > 0); - B->Mult2(x, y, r); + B->Mult(x, y); // y = y + G B_G Gᵀ (x - A y) - A->Mult(y, r); - linalg::AXPBY(1.0, x, -1.0, r); - RealMultTranspose(*G, r, x_G); - if (dbc_tdof_list_G) { - linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); + auto x_G = workspace::NewVector(B_G->Height()); + { + auto r = workspace::NewVector(this->height); + A->Mult(y, r); + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*G, r, x_G); + } + if (dbc_tdof_list_G) + { + linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); + } + { + auto y_G = workspace::NewVector(B_G->Height()); + B_G->Mult(x_G, y_G); + RealAddMult(*G, y_G, y); + } } - B_G->Mult2(x_G, y_G, r_G); - RealAddMult(*G, y_G, y); } } template -void DistRelaxationSmoother::MultTranspose2(const VecType &x, VecType &y, - VecType &r) const +void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType &y) const { // Apply transpose. B->SetInitialGuess(true); for (int it = 0; it < pc_it; it++) { // y = y + G B_Gᵀ Gᵀ (x - A y) - if (this->initial_guess || it > 0) { - A->Mult(y, r); - linalg::AXPBY(1.0, x, -1.0, r); - RealMultTranspose(*G, r, x_G); + auto x_G = workspace::NewVector(B_G->Height()); + if (this->initial_guess || it > 0) + { + auto r = workspace::NewVector(this->height); + A->Mult(y, r); + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*G, r, x_G); + } + else + { + y = 0.0; + RealMultTranspose(*G, x, x_G); + } + if (dbc_tdof_list_G) + { + linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); + } + { + auto y_G = workspace::NewVector(B_G->Height()); + B_G->MultTranspose(x_G, y_G); + RealAddMult(*G, y_G, y); + } } - else - { - y = 0.0; - RealMultTranspose(*G, x, x_G); - } - if (dbc_tdof_list_G) - { - linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); - } - B_G->MultTranspose2(x_G, y_G, r_G); - RealAddMult(*G, y_G, y); // y = y + Bᵀ (x - A y) - B->MultTranspose2(x, y, r); + B->MultTranspose(x, y); } } diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index 77969ffe4..c6ee30ce0 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -46,9 +46,6 @@ class DistRelaxationSmoother : public Solver mutable std::unique_ptr> B; std::unique_ptr> B_G; - // Temporary vectors for smoother application. - mutable VecType x_G, y_G, r_G; - public: DistRelaxationSmoother(MPI_Comm comm, const Operator &G, int smooth_it, int cheby_smooth_it, int cheby_order, double cheby_sf_max, @@ -62,21 +59,9 @@ class DistRelaxationSmoother : public Solver void SetOperators(const OperType &op, const OperType &op_G); - void Mult(const VecType &x, VecType &y) const override - { - MFEM_ABORT("DistRelaxationSmoother implements Mult2 using an additional preallocated " - "temporary vector!"); - } - - void MultTranspose(const VecType &x, VecType &y) const override - { - MFEM_ABORT("DistRelaxationSmoother implements MultTranspose2 using an additional " - "preallocated temporary vector!"); - } - - void Mult2(const VecType &x, VecType &y, VecType &r) const override; + void Mult(const VecType &x, VecType &y) const override; - void MultTranspose2(const VecType &x, VecType &y, VecType &r) const override; + void MultTranspose(const VecType &x, VecType &y) const override; }; } // namespace palace diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index 853a26d82..51daeb6a5 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -14,6 +14,7 @@ #include "linalg/rap.hpp" #include "models/materialoperator.hpp" #include "utils/timer.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -90,10 +91,9 @@ DivFreeSolver::DivFreeSolver( const int mg_smooth_order = std::max(h1_fespaces.GetFinestFESpace().GetMaxElementOrder(), 2); pc = std::make_unique>( - h1_fespaces.GetFinestFESpace().GetComm(), - - std::move(amg), h1_fespaces.GetProlongationOperators(), nullptr, 1, 1, - mg_smooth_order, 1.0, 0.0, true); + h1_fespaces.GetFinestFESpace().GetComm(), std::move(amg), + h1_fespaces.GetProlongationOperators(), nullptr, 1, 1, mg_smooth_order, 1.0, 0.0, + true); } else { @@ -109,11 +109,6 @@ DivFreeSolver::DivFreeSolver( ksp = std::make_unique>(std::move(pcg), std::move(pc)); ksp->SetOperators(*M, *M); - - psi.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); - rhs.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); - psi.UseDevice(true); - rhs.UseDevice(true); } template @@ -122,6 +117,7 @@ void DivFreeSolver::Mult(VecType &y) const BlockTimer bt(Timer::DIV_FREE); // Compute the divergence of y. + auto rhs = workspace::NewVector(M->Height()); if constexpr (std::is_same::value) { WeakDiv->Mult(y.Real(), rhs.Real()); @@ -135,8 +131,9 @@ void DivFreeSolver::Mult(VecType &y) const // Apply essential BC and solve the linear system. if (bdr_tdof_list_M) { - linalg::SetSubVector(rhs, *bdr_tdof_list_M, 0.0); + linalg::SetSubVector(rhs, *bdr_tdof_list_M, 0.0); } + auto psi = workspace::NewVector(M->Height()); ksp->Mult(rhs, psi); // Compute the irrotational portion of y and subtract. diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp index 47b14a91e..0056c85d0 100644 --- a/palace/linalg/divfree.hpp +++ b/palace/linalg/divfree.hpp @@ -46,9 +46,6 @@ class DivFreeSolver // Linear solver for the projected linear system (Gᵀ M G) y = x. std::unique_ptr> ksp; - // Workspace objects for solver application. - mutable VecType psi, rhs; - public: DivFreeSolver(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace, AuxiliaryFiniteElementSpaceHierarchy &h1_fespaces, diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp index a1e001685..1c014748f 100644 --- a/palace/linalg/errorestimator.cpp +++ b/palace/linalg/errorestimator.cpp @@ -13,6 +13,7 @@ #include "utils/communication.hpp" #include "utils/omp.hpp" #include "utils/timer.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -74,12 +75,8 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op, std::make_unique(flux.PartialAssemble(), nd_fespace)); } M = WrapOperator(GetMassMatrix(nd_fespace)); - ksp = ConfigureLinearSolver(nd_fespace.GetComm(), tol, max_it, print); ksp->SetOperators(*M, *M); - - rhs.SetSize(nd_fespace.GetTrueVSize()); - rhs.UseDevice(true); } template @@ -99,19 +96,15 @@ FluxProjector::FluxProjector(const MaterialOperator &mat_op, flux.PartialAssemble(), h1_fespace, rt_fespace, false)); } M = WrapOperator(GetMassMatrix(rt_fespace)); - - ksp = ConfigureLinearSolver(h1_fespace.GetComm(), tol, max_it, print); + ksp = ConfigureLinearSolver(rt_fespace.GetComm(), tol, max_it, print); ksp->SetOperators(*M, *M); - - rhs.SetSize(rt_fespace.GetTrueVSize()); - rhs.UseDevice(true); } template void FluxProjector::Mult(const VecType &x, VecType &y) const { BlockTimer bt(Timer::SOLVE_ESTIMATOR); - MFEM_ASSERT(y.Size() == rhs.Size(), "Invalid vector dimensions for FluxProjector::Mult!"); + auto rhs = workspace::NewVector(y.Size()); Flux->Mult(x, rhs); // Mpi::Print(" Computing smooth flux projection for error estimation\n"); ksp->Mult(rhs, y); @@ -122,10 +115,8 @@ CurlFluxErrorEstimator::CurlFluxErrorEstimator(const MaterialOperator & FiniteElementSpace &nd_fespace, double tol, int max_it, int print) : mat_op(mat_op), nd_fespace(nd_fespace), - projector(mat_op, nd_fespace, tol, max_it, print), F(nd_fespace.GetTrueVSize()), - F_gf(nd_fespace.GetVSize()), U_gf(nd_fespace.GetVSize()) + projector(mat_op, nd_fespace, tol, max_it, print) { - F.UseDevice(true); } template @@ -135,24 +126,29 @@ void CurlFluxErrorEstimator::AddErrorIndicator(const VecType &U, // Compute the projection of the discontinuous flux onto the smooth finite element space // and populate the corresponding grid functions. BlockTimer bt(Timer::ESTIMATION); - projector.Mult(U, F); - if constexpr (std::is_same::value) + auto U_gf = workspace::NewVector(nd_fespace.GetVSize()); + auto F_gf = workspace::NewVector(nd_fespace.GetVSize()); { - nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real()); - nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag()); - nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real()); - nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag()); - U_gf.Real().HostRead(); - U_gf.Imag().HostRead(); - F_gf.Real().HostRead(); - F_gf.Imag().HostRead(); - } - else - { - nd_fespace.GetProlongationMatrix()->Mult(U, U_gf); - nd_fespace.GetProlongationMatrix()->Mult(F, F_gf); - U_gf.HostRead(); - F_gf.HostRead(); + auto F = workspace::NewVector(nd_fespace.GetTrueVSize()); + projector.Mult(U, F); + if constexpr (std::is_same::value) + { + nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real()); + nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag()); + nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real()); + nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag()); + U_gf.Real().HostRead(); + U_gf.Imag().HostRead(); + F_gf.Real().HostRead(); + F_gf.Imag().HostRead(); + } + else + { + nd_fespace.GetProlongationMatrix()->Mult(U, U_gf); + nd_fespace.GetProlongationMatrix()->Mult(F, F_gf); + U_gf.HostRead(); + F_gf.HostRead(); + } } // Loop over elements and accumulate the estimates from this component. The discontinuous @@ -251,10 +247,8 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op, rt_fec(std::make_unique(h1_fespace.GetFEColl().GetOrder() - 1, h1_fespace.SpaceDimension())), rt_fespace(std::make_unique(h1_fespace.GetMesh(), rt_fec.get())), - projector(mat_op, h1_fespace, *rt_fespace, tol, max_it, print), - F(rt_fespace->GetTrueVSize()), F_gf(rt_fespace->GetVSize()), U_gf(h1_fespace.GetVSize()) + projector(mat_op, h1_fespace, *rt_fespace, tol, max_it, print) { - F.UseDevice(true); } void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U, @@ -263,11 +257,16 @@ void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U, // Compute the projection of the discontinuous flux onto the smooth finite element space // and populate the corresponding grid functions. BlockTimer bt(Timer::ESTIMATION); - projector.Mult(U, F); - h1_fespace.GetProlongationMatrix()->Mult(U, U_gf); - rt_fespace->GetProlongationMatrix()->Mult(F, F_gf); - U_gf.HostRead(); - F_gf.HostRead(); + auto U_gf = workspace::NewVector(h1_fespace.GetVSize()); + auto F_gf = workspace::NewVector(rt_fespace->GetVSize()); + { + auto F = workspace::NewVector(rt_fespace->GetTrueVSize()); + projector.Mult(U, F); + h1_fespace.GetProlongationMatrix()->Mult(U, U_gf); + rt_fespace->GetProlongationMatrix()->Mult(F, F_gf); + U_gf.HostRead(); + F_gf.HostRead(); + } // Loop over elements and accumulate the estimates from this component. The discontinuous // flux is ε ∇U. diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp index ab3c53711..7e89055c3 100644 --- a/palace/linalg/errorestimator.hpp +++ b/palace/linalg/errorestimator.hpp @@ -38,9 +38,6 @@ class FluxProjector // Linear solver and preconditioner for the projected linear system. std::unique_ptr> ksp; - // Workspace object for solver application. - mutable VecType rhs; - public: FluxProjector(const MaterialOperator &mat_op, const FiniteElementSpace &nd_fespace, double tol, int max_it, int print); @@ -64,9 +61,6 @@ class CurlFluxErrorEstimator // Global L2 projection solver. FluxProjector projector; - // Temporary vectors for error estimation. - mutable VecType F, F_gf, U_gf; - public: CurlFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace, double tol, int max_it, int print); @@ -93,9 +87,6 @@ class GradFluxErrorEstimator // Global L2 projection solver. FluxProjector projector; - // Temporary vectors for error estimation. - mutable Vector F, F_gf, U_gf; - public: GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &rt_fespace, double tol, int max_it, int print); diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 646ad577f..c5967c053 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -8,6 +8,7 @@ #include "linalg/distrelaxation.hpp" #include "linalg/rap.hpp" #include "utils/timer.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -19,8 +20,7 @@ GeometricMultigridSolver::GeometricMultigridSolver( int cycle_it, int smooth_it, int cheby_order, double cheby_sf_max, double cheby_sf_min, bool cheby_4th_kind) : Solver(), pc_it(cycle_it), P(P.begin(), P.end()), A(P.size() + 1), - dbc_tdof_lists(P.size()), B(P.size() + 1), X(P.size() + 1), Y(P.size() + 1), - R(P.size() + 1), use_timer(false) + dbc_tdof_lists(P.size()), B(P.size() + 1), use_timer(false) { // Configure levels of geometric coarsening. Multigrid vectors will be configured at first // call to Mult. The multigrid operator size is set based on the finest space dimension. @@ -109,13 +109,6 @@ void GeometricMultigridSolver::SetOperator(const OperType &op) { B[l]->SetOperator(mg_op->GetOperatorAtLevel(l)); } - - X[l].SetSize(A[l]->Height()); - Y[l].SetSize(A[l]->Height()); - R[l].SetSize(A[l]->Height()); - X[l].UseDevice(true); - Y[l].UseDevice(true); - R[l].UseDevice(true); } this->height = op.Height(); @@ -133,26 +126,24 @@ void GeometricMultigridSolver::Mult(const VecType &x, VecType &y) cons "Single-level geometric multigrid will not work with multiple iterations!"); // Apply V-cycle. The initial guess for y is zero'd at the first pre-smooth iteration. - X.back() = x; for (int it = 0; it < pc_it; it++) { - VCycle(n_levels - 1, (it > 0)); + VCycle(x, y, n_levels - 1, (it > 0)); } - y = Y.back(); } namespace { -inline void RealMult(const Operator &op, const Vector &x, Vector &y) +inline void RealAddMult(const Operator &op, const Vector &x, Vector &y) { - op.Mult(x, y); + op.AddMult(x, y, 1.0); } -inline void RealMult(const Operator &op, const ComplexVector &x, ComplexVector &y) +inline void RealAddMult(const Operator &op, const ComplexVector &x, ComplexVector &y) { - op.Mult(x.Real(), y.Real()); - op.Mult(x.Imag(), y.Imag()); + op.AddMult(x.Real(), y.Real(), 1.0); + op.AddMult(x.Imag(), y.Imag(), 1.0); } inline void RealMultTranspose(const Operator &op, const Vector &x, Vector &y) @@ -169,39 +160,44 @@ inline void RealMultTranspose(const Operator &op, const ComplexVector &x, Comple } // namespace template -void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const +void GeometricMultigridSolver::VCycle(const VecType &x, VecType &y, int l, + bool initial_guess) const { - // Pre-smooth, with zero initial guess (Y = 0 set inside). This is the coarse solve at + // Pre-smooth, with zero initial guess (y = 0 set inside). This is the coarse solve at // level 0. Important to note that the smoothers must respect the initial guess flag - // correctly (given X, Y, compute Y <- Y + B (X - A Y)) . + // correctly (given x, y, compute y <- y + B (x - A y)) . B[l]->SetInitialGuess(initial_guess); if (l == 0) { BlockTimer bt(Timer::KSP_COARSE_SOLVE, use_timer); - B[l]->Mult(X[l], Y[l]); + B[l]->Mult(x, y); return; } - B[l]->Mult2(X[l], Y[l], R[l]); - - // Compute residual. - A[l]->Mult(Y[l], R[l]); - linalg::AXPBY(1.0, X[l], -1.0, R[l]); + B[l]->Mult(x, y); - // Coarse grid correction. - RealMultTranspose(*P[l - 1], R[l], X[l - 1]); - if (dbc_tdof_lists[l - 1]) { - linalg::SetSubVector(X[l - 1], *dbc_tdof_lists[l - 1], 0.0); - } - VCycle(l - 1, false); + // Compute residual and restrict. + auto xr = workspace::NewVector(A[l - 1]->Height()); + { + auto r = workspace::NewVector(A[l]->Height()); + A[l]->Mult(y, r); + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*P[l - 1], r, xr); + } - // Prolongate and add. - RealMult(*P[l - 1], Y[l - 1], R[l]); - Y[l] += R[l]; + // Coarse grid correction. + if (dbc_tdof_lists[l - 1]) + { + linalg::SetSubVector(xr, *dbc_tdof_lists[l - 1], 0.0); + } + auto yr = workspace::NewVector(A[l - 1]->Height()); + VCycle(xr, yr, l - 1, false); + RealAddMult(*P[l - 1], yr, y); + } // Post-smooth, with nonzero initial guess. B[l]->SetInitialGuess(true); - B[l]->MultTranspose2(X[l], Y[l], R[l]); + B[l]->MultTranspose(x, y); } template class GeometricMultigridSolver; diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index 092ead72a..9906e9de0 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -46,15 +46,11 @@ class GeometricMultigridSolver : public Solver // Smoothers for each level. Coarse-level solver is B[0]. mutable std::vector>> B; - // Temporary vectors for preconditioner application. The type of these is dictated by the - // MFEM Operator interface for multiple RHS. - mutable std::vector X, Y, R; - // Enable timer contribution for Timer::KSP_COARSE_SOLVE. bool use_timer; // Internal function to perform a single V-cycle iteration. - void VCycle(int l, bool initial_guess) const; + void VCycle(const VecType &x, VecType &y, int l, bool initial_guess) const; public: GeometricMultigridSolver(MPI_Comm comm, std::unique_ptr> &&coarse_solver, diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp index 79f5f27f5..7715ba125 100644 --- a/palace/linalg/iterative.cpp +++ b/palace/linalg/iterative.cpp @@ -10,6 +10,7 @@ #include "linalg/orthog.hpp" #include "utils/communication.hpp" #include "utils/timer.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -367,18 +368,15 @@ void CgSolver::Mult(const VecType &b, VecType &x) const MFEM_VERIFY(A, "Operator must be set for CgSolver::Mult!"); MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), "Size mismatch for CgSolver::Mult!"); - r.SetSize(A->Height()); - z.SetSize(A->Height()); - p.SetSize(A->Height()); - r.UseDevice(true); - z.UseDevice(true); - p.UseDevice(true); + auto r = workspace::NewVector(A->Height()); + auto z = workspace::NewVector(A->Height()); + auto p = workspace::NewVector(A->Height()); // Initialize. if (this->initial_guess) { A->Mult(x, r); - linalg::AXPBY(1.0, b, -1.0, r); + linalg::AXPBY(1.0, b, -1.0, r); } else { @@ -387,7 +385,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const } if (B) { - ApplyB(B, r, z, this->use_timer); + ApplyB(B, r, z, this->use_timer); } else { @@ -398,8 +396,8 @@ void CgSolver::Mult(const VecType &b, VecType &x) const res = std::sqrt(std::abs(beta)); if (this->initial_guess && B) { - ApplyB(B, b, p, this->use_timer); - auto beta_rhs = linalg::Dot(comm, p, b); + ApplyB(B, b, p, this->use_timer); + auto beta_rhs = linalg::Dot(comm, p, b); CheckDot(beta_rhs, "PCG preconditioner is not positive definite: (Bb, b) = "); initial_res = std::sqrt(std::abs(beta_rhs)); } @@ -430,7 +428,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const } else { - linalg::AXPBY(ScalarType(1.0), z, beta / beta_prev, p); + linalg::AXPBY(ScalarType(1.0), z, beta / beta_prev, p); } A->Mult(p, z); @@ -444,7 +442,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const beta_prev = beta; if (B) { - ApplyB(B, r, z, this->use_timer); + ApplyB(B, r, z, this->use_timer); } else { @@ -500,10 +498,6 @@ void GmresSolver::Initialize() const V[j].SetSize(A->Height()); V[j].UseDevice(true); } - H.resize((max_dim + 1) * max_dim); - s.resize(max_dim + 1); - cs.resize(max_dim + 1); - sn.resize(max_dim + 1); } template @@ -526,9 +520,11 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const MFEM_VERIFY(A, "Operator must be set for GmresSolver::Mult!"); MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), "Size mismatch for GmresSolver::Mult!"); - r.SetSize(A->Height()); - r.UseDevice(true); Initialize(); + std::vector H((max_dim + 1) * max_dim); + std::vector s(max_dim + 1), sn(max_dim + 1); + std::vector cs(max_dim + 1); + auto r = workspace::NewVector(A->Height()); // Begin iterations. converged = false; @@ -541,8 +537,9 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const for (; it < max_it; restart++) { // Initialize. - InitialResidual(pc_side, A, B, b, x, r, V[0], (this->initial_guess || restart > 0), - this->use_timer); + InitialResidual::PrecSide, OperType, VecType>( + pc_side, A, B, b, x, r, V[0], (this->initial_guess || restart > 0), + this->use_timer); true_beta = linalg::Norml2(comm, r); CheckDot(true_beta, "GMRES residual norm is not valid: beta = "); if (it == 0) @@ -552,7 +549,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const RealType beta_rhs; if (B && pc_side == PrecSide::LEFT) { - ApplyB(B, b, V[0], this->use_timer); + ApplyB(B, b, V[0], this->use_timer); beta_rhs = linalg::Norml2(comm, V[0]); } else // !B || pc_side == PrecSide::RIGHT @@ -602,7 +599,8 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const { Update(j); } - ApplyBA(pc_side, A, B, V[j], w, r, this->use_timer); + ApplyBA::PrecSide, OperType, VecType>(pc_side, A, B, V[j], w, r, + this->use_timer); ScalarType *Hj = H.data() + j * (max_dim + 1); OrthogonalizeIteration(orthog_type, comm, V, w, Hj, j); @@ -651,7 +649,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const { r.Add(s[k], V[k]); } - ApplyB(B, r, V[0], this->use_timer); + ApplyB(B, r, V[0], this->use_timer); x += V[0]; } if (converged) @@ -717,6 +715,9 @@ void FgmresSolver::Mult(const VecType &b, VecType &x) const MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), "Size mismatch for FgmresSolver::Mult!"); Initialize(); + std::vector H((max_dim + 1) * max_dim); + std::vector s(max_dim + 1), sn(max_dim + 1); + std::vector cs(max_dim + 1); // Begin iterations. converged = false; diff --git a/palace/linalg/iterative.hpp b/palace/linalg/iterative.hpp index 72cda6764..6652e4b54 100644 --- a/palace/linalg/iterative.hpp +++ b/palace/linalg/iterative.hpp @@ -133,9 +133,6 @@ class CgSolver : public IterativeSolver using IterativeSolver::final_res; using IterativeSolver::final_it; - // Temporary workspace for solve. - mutable VecType r, z, p; - public: CgSolver(MPI_Comm comm, int print) : IterativeSolver(comm, print) {} @@ -183,9 +180,6 @@ class GmresSolver : public IterativeSolver using IterativeSolver::final_res; using IterativeSolver::final_it; - // Maximum subspace dimension for restarted GMRES. - mutable int max_dim; - // Orthogonalization method for orthonormalizing a newly computed vector against a basis // at each iteration. OrthogType orthog_type; @@ -193,12 +187,11 @@ class GmresSolver : public IterativeSolver // Use left or right preconditioning. PrecSide pc_side; + // Maximum subspace dimension for restarted GMRES. + mutable int max_dim; + // Temporary workspace for solve. mutable std::vector V; - mutable VecType r; - mutable std::vector H; - mutable std::vector s, sn; - mutable std::vector cs; // Allocate storage for solve. virtual void Initialize() const; @@ -206,20 +199,20 @@ class GmresSolver : public IterativeSolver public: GmresSolver(MPI_Comm comm, int print) - : IterativeSolver(comm, print), max_dim(-1), orthog_type(OrthogType::MGS), - pc_side(PrecSide::LEFT) + : IterativeSolver(comm, print), orthog_type(OrthogType::MGS), + pc_side(PrecSide::LEFT), max_dim(-1) { } - // Set the dimension for restart. - void SetRestartDim(int dim) { max_dim = dim; } - // Set the orthogonalization method. void SetOrthogonalization(OrthogType type) { orthog_type = type; } // Set the side for preconditioning. virtual void SetPrecSide(PrecSide side) { pc_side = side; } + // Set the dimension for restart. + void SetRestartDim(int dim) { max_dim = dim; } + void Mult(const VecType &b, VecType &x) const override; }; @@ -254,14 +247,10 @@ class FgmresSolver : public GmresSolver using GmresSolver::final_res; using GmresSolver::final_it; - using GmresSolver::max_dim; using GmresSolver::orthog_type; using GmresSolver::pc_side; + using GmresSolver::max_dim; using GmresSolver::V; - using GmresSolver::H; - using GmresSolver::s; - using GmresSolver::sn; - using GmresSolver::cs; // Temporary workspace for solve. mutable std::vector Z; diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp index bf852c5d6..47783c134 100644 --- a/palace/linalg/operator.cpp +++ b/palace/linalg/operator.cpp @@ -65,8 +65,6 @@ ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&dAr, MFEM_VERIFY(Ar || Ai, "Cannot construct ComplexWrapperOperator from an empty matrix!"); MFEM_VERIFY((!Ar || !Ai) || (Ar->Height() == Ai->Height() && Ar->Width() == Ai->Width()), "Mismatch in dimension of real and imaginary matrix parts!"); - tx.UseDevice(true); - ty.UseDevice(true); height = Ar ? Ar->Height() : Ai->Height(); width = Ar ? Ar->Width() : Ai->Width(); } @@ -221,7 +219,7 @@ void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y, Vector &yi = y.Imag(); if (a.real() != 0.0 && a.imag() != 0.0) { - ty.SetSize(height); + auto ty = workspace::NewVector(height); Mult(x, ty); y.AXPY(a, ty); } @@ -288,9 +286,9 @@ void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVec Vector &yi = y.Imag(); if (a.real() != 0.0 && a.imag() != 0.0) { - tx.SetSize(width); - MultTranspose(x, tx); - y.AXPY(a, tx); + auto ty = workspace::NewVector(width); + MultTranspose(x, ty); + y.AXPY(a, ty); } else if (a.real() != 0.0) { @@ -356,9 +354,9 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, Vector &yi = y.Imag(); if (a.real() != 0.0 && a.imag() != 0.0) { - tx.SetSize(width); - MultHermitianTranspose(x, tx); - y.AXPY(a, tx); + auto ty = workspace::NewVector(width); + MultHermitianTranspose(x, ty); + y.AXPY(a, ty); } else if (a.real() != 0.0) { @@ -415,7 +413,6 @@ void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, SumOperator::SumOperator(const Operator &op, double a) : Operator(op.Height(), op.Width()) { AddOperator(op, a); - z.UseDevice(true); } void SumOperator::AddOperator(const Operator &op, double a) @@ -434,10 +431,12 @@ void SumOperator::Mult(const Vector &x, Vector &y) const { y *= ops.front().second; } - return; } - y = 0.0; - AddMult(x, y); + else + { + y = 0.0; + AddMult(x, y); + } } void SumOperator::MultTranspose(const Vector &x, Vector &y) const @@ -449,15 +448,17 @@ void SumOperator::MultTranspose(const Vector &x, Vector &y) const { y *= ops.front().second; } - return; } - y = 0.0; - AddMultTranspose(x, y); + else + { + y = 0.0; + AddMultTranspose(x, y); + } } void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const { - z.SetSize(y.Size()); + auto z = workspace::NewVector(height); for (const auto &[op, c] : ops) { op->Mult(x, z); @@ -467,7 +468,7 @@ void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const { - z.SetSize(y.Size()); + auto z = workspace::NewVector(height); for (const auto &[op, c] : ops) { op->MultTranspose(x, z); @@ -635,10 +636,9 @@ double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double t int it = 0; double res = 0.0; double l = 0.0, l0 = 0.0; - ComplexVector u(A.Height()), v(A.Height()); - u.UseDevice(true); - v.UseDevice(true); - SetRandom(comm, u); + auto u = workspace::NewVector(A.Height()); + auto v = workspace::NewVector(A.Height()); + SetRandom(comm, u); Normalize(comm, u); while (it < max_it) { diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp index f0512a769..d34856c0d 100644 --- a/palace/linalg/operator.hpp +++ b/palace/linalg/operator.hpp @@ -10,6 +10,7 @@ #include #include #include "linalg/vector.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -77,9 +78,6 @@ class ComplexWrapperOperator : public ComplexOperator std::unique_ptr data_Ar, data_Ai; const Operator *Ar, *Ai; - // Temporary storage for operator application. - mutable ComplexVector tx, ty; - ComplexWrapperOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi); @@ -117,11 +115,9 @@ class SumOperator : public Operator { private: std::vector> ops; - mutable Vector z; public: - SumOperator(int s) : Operator(s) { z.UseDevice(true); } - SumOperator(int h, int w) : Operator(h, w) { z.UseDevice(true); } + SumOperator(int h, int w) : Operator(h, w) {} SumOperator(const Operator &op, double a = 1.0); void AddOperator(const Operator &op, double a = 1.0); @@ -159,7 +155,7 @@ class ProductOperatorHelper : public ComplexOp { const ComplexOperator &A = static_cast(this)->A; const ComplexOperator &B = static_cast(this)->B; - ComplexVector &z = static_cast(this)->z; + auto z = workspace::NewVector(A.Width()); A.MultHermitianTranspose(x, z); B.MultHermitianTranspose(z, y); } @@ -169,7 +165,7 @@ class ProductOperatorHelper : public ComplexOp { const ComplexOperator &A = static_cast(this)->A; const ComplexOperator &B = static_cast(this)->B; - ComplexVector &z = static_cast(this)->z; + auto z = workspace::NewVector(A.Width()); A.MultHermitianTranspose(x, z); B.AddMultHermitianTranspose(z, y, a); } @@ -189,30 +185,31 @@ class BaseProductOperator private: const OperType &A, &B; - mutable VecType z; public: BaseProductOperator(const OperType &A, const OperType &B) : ProductOperatorHelper, OperType>(A.Height(), B.Width()), - A(A), B(B), z(B.Height()) + A(A), B(B) { - z.UseDevice(true); } void Mult(const VecType &x, VecType &y) const override { + auto z = workspace::NewVector(B.Height()); B.Mult(x, z); A.Mult(z, y); } void MultTranspose(const VecType &x, VecType &y) const override { + auto z = workspace::NewVector(A.Width()); A.MultTranspose(x, z); B.MultTranspose(z, y); } void AddMult(const VecType &x, VecType &y, const ScalarType a = 1.0) const override { + auto z = workspace::NewVector(A.Width()); B.Mult(x, z); A.AddMult(z, y, a); } @@ -220,6 +217,7 @@ class BaseProductOperator void AddMultTranspose(const VecType &x, VecType &y, const ScalarType a = 1.0) const override { + auto z = workspace::NewVector(A.Width()); A.MultTranspose(x, z); B.AddMultTranspose(z, y, a); } diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp index 070358e2f..152d1e360 100644 --- a/palace/linalg/rap.cpp +++ b/palace/linalg/rap.cpp @@ -5,6 +5,7 @@ #include "fem/bilinearform.hpp" #include "linalg/hypre.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -49,19 +50,19 @@ void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, void ParOperator::EliminateRHS(const Vector &x, Vector &b) const { MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(trial_fespace.GetVSize()); { - auto &tx = trial_fespace.GetTVector(); + auto tx = workspace::NewVector(trial_fespace.GetTrueVSize()); tx = 0.0; - linalg::SetSubVector(tx, dbc_tdof_list, x); + linalg::SetSubVector(tx, dbc_tdof_list, x); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } // Apply the unconstrained operator. + auto ly = workspace::NewVector(test_fespace.GetVSize()); A->Mult(lx, ly); - auto &ty = test_fespace.GetTVector(); + auto ty = workspace::NewVector(test_fespace.GetTrueVSize()); RestrictionMatrixMult(ly, ty); b.Add(-1.0, ty); if (diag_policy == DiagonalPolicy::DIAG_ONE) @@ -157,10 +158,10 @@ void ParOperator::AssembleDiagonal(Vector &diag) const // entry-wise absolute values of the conforming prolongation operator. MFEM_VERIFY(&trial_fespace == &test_fespace, "Diagonal assembly is only available for square ParOperator!"); - auto &lx = trial_fespace.GetLVector(); + auto lx = workspace::NewVector(trial_fespace.GetVSize()); A->AssembleDiagonal(lx); - // Parallel assemble and eliminate essential true dofs. + // Parallel assembly. const Operator *P = test_fespace.GetProlongationMatrix(); if (const auto *hP = dynamic_cast(P)) { @@ -195,13 +196,12 @@ void ParOperator::Mult(const Vector &x, Vector &y) const return; } - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(trial_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &tx = trial_fespace.GetTVector(); + auto tx = workspace::NewVector(trial_fespace.GetTrueVSize()); tx = x; - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else @@ -210,6 +210,7 @@ void ParOperator::Mult(const Vector &x, Vector &y) const } // Apply the operator on the L-vector. + auto ly = workspace::NewVector(test_fespace.GetVSize()); A->Mult(lx, ly); RestrictionMatrixMult(ly, y); @@ -236,24 +237,24 @@ void ParOperator::MultTranspose(const Vector &x, Vector &y) const return; } - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(test_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &ty = test_fespace.GetTVector(); - ty = x; - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto tx = workspace::NewVector(test_fespace.GetTrueVSize()); + tx = x; + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + RestrictionMatrixMultTranspose(x, lx); } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + auto ly = workspace::NewVector(trial_fespace.GetVSize()); + A->MultTranspose(lx, ly); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly, y); if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) @@ -277,13 +278,12 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const return; } - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(trial_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &tx = trial_fespace.GetTVector(); + auto tx = workspace::NewVector(trial_fespace.GetTrueVSize()); tx = x; - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx, lx); } else @@ -292,19 +292,20 @@ void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const } // Apply the operator on the L-vector. + auto ly = workspace::NewVector(test_fespace.GetVSize()); A->Mult(lx, ly); - auto &ty = test_fespace.GetTVector(); + auto ty = workspace::NewVector(test_fespace.GetTrueVSize()); RestrictionMatrixMult(ly, ty); if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } y.Add(a, ty); @@ -320,37 +321,37 @@ void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) c return; } - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(test_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &ty = test_fespace.GetTVector(); - ty = x; - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto tx = workspace::NewVector(test_fespace.GetTrueVSize()); + tx = x; + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + RestrictionMatrixMultTranspose(x, lx); } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + auto ly = workspace::NewVector(trial_fespace.GetVSize()); + A->MultTranspose(lx, ly); - auto &tx = trial_fespace.GetTVector(); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx, tx); + auto ty = workspace::NewVector(trial_fespace.GetTrueVSize()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly, ty); if (dbc_tdof_list.Size()) { if (diag_policy == DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } - y.Add(a, tx); + y.Add(a, ty); } void ParOperator::RestrictionMatrixMult(const Vector &ly, Vector &ty) const @@ -377,12 +378,6 @@ void ParOperator::RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) c } } -Vector &ParOperator::GetTestLVector() const -{ - return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2() - : test_fespace.GetLVector(); -} - ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi, @@ -469,13 +464,12 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexParOperator::Mult!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(trial_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &tx = trial_fespace.GetTVector(); + auto tx = workspace::NewVector(trial_fespace.GetTrueVSize()); tx = x; - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } @@ -486,6 +480,7 @@ void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const } // Apply the operator on the L-vector. + auto ly = workspace::NewVector(test_fespace.GetVSize()); A->Mult(lx, ly); RestrictionMatrixMult(ly, y); @@ -507,25 +502,25 @@ void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::MultTranspose!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(test_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &ty = test_fespace.GetTVector(); - ty = x; - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto tx = workspace::NewVector(test_fespace.GetTrueVSize()); + tx = x; + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + RestrictionMatrixMultTranspose(x, lx); } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + auto ly = workspace::NewVector(trial_fespace.GetVSize()); + A->MultTranspose(lx, ly); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), y.Imag()); if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) @@ -545,25 +540,25 @@ void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(test_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &ty = test_fespace.GetTVector(); - ty = x; - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto tx = workspace::NewVector(test_fespace.GetTrueVSize()); + tx = x; + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + RestrictionMatrixMultTranspose(x, lx); } // Apply the operator on the L-vector. - A->MultHermitianTranspose(ly, lx); + auto ly = workspace::NewVector(trial_fespace.GetVSize()); + A->MultHermitianTranspose(lx, ly); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), y.Imag()); if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) @@ -583,13 +578,12 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, MFEM_ASSERT(x.Size() == width && y.Size() == height, "Incompatible dimensions for ComplexParOperator::AddMult!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(trial_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &tx = trial_fespace.GetTVector(); + auto tx = workspace::NewVector(trial_fespace.GetTrueVSize()); tx = x; - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); trial_fespace.GetProlongationMatrix()->Mult(tx.Real(), lx.Real()); trial_fespace.GetProlongationMatrix()->Mult(tx.Imag(), lx.Imag()); } @@ -600,19 +594,20 @@ void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, } // Apply the operator on the L-vector. + auto ly = workspace::NewVector(test_fespace.GetVSize()); A->Mult(lx, ly); - auto &ty = test_fespace.GetTVector(); + auto ty = workspace::NewVector(test_fespace.GetTrueVSize()); RestrictionMatrixMult(ly, ty); if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(ty, dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } y.AXPY(a, ty); @@ -624,38 +619,38 @@ void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(test_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &ty = test_fespace.GetTVector(); - ty = x; - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto tx = workspace::NewVector(test_fespace.GetTrueVSize()); + tx = x; + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + RestrictionMatrixMultTranspose(x, lx); } // Apply the operator on the L-vector. - A->MultTranspose(ly, lx); + auto ly = workspace::NewVector(trial_fespace.GetVSize()); + A->MultTranspose(lx, ly); - auto &tx = trial_fespace.GetTVector(); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); + auto ty = workspace::NewVector(trial_fespace.GetTrueVSize()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), ty.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), ty.Imag()); if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } - y.AXPY(a, tx); + y.AXPY(a, ty); } void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, @@ -664,38 +659,38 @@ void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, Compl MFEM_ASSERT(x.Size() == height && y.Size() == width, "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); - auto &lx = trial_fespace.GetLVector(); - auto &ly = GetTestLVector(); + auto lx = workspace::NewVector(test_fespace.GetVSize()); if (dbc_tdof_list.Size()) { - auto &ty = test_fespace.GetTVector(); - ty = x; - linalg::SetSubVector(ty, dbc_tdof_list, 0.0); - RestrictionMatrixMultTranspose(ty, ly); + auto tx = workspace::NewVector(test_fespace.GetTrueVSize()); + tx = x; + linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(tx, lx); } else { - RestrictionMatrixMultTranspose(x, ly); + RestrictionMatrixMultTranspose(x, lx); } // Apply the operator on the L-vector. - A->MultHermitianTranspose(ly, lx); + auto ly = workspace::NewVector(trial_fespace.GetVSize()); + A->MultHermitianTranspose(lx, ly); - auto &tx = trial_fespace.GetTVector(); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), tx.Real()); - trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), tx.Imag()); + auto ty = workspace::NewVector(trial_fespace.GetTrueVSize()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), ty.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), ty.Imag()); if (dbc_tdof_list.Size()) { if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) { - linalg::SetSubVector(tx, dbc_tdof_list, x); + linalg::SetSubVector(ty, dbc_tdof_list, x); } else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) { - linalg::SetSubVector(tx, dbc_tdof_list, 0.0); + linalg::SetSubVector(ty, dbc_tdof_list, 0.0); } } - y.AXPY(a, tx); + y.AXPY(a, ty); } void ComplexParOperator::RestrictionMatrixMult(const ComplexVector &ly, @@ -728,10 +723,4 @@ void ComplexParOperator::RestrictionMatrixMultTranspose(const ComplexVector &ty, } } -ComplexVector &ComplexParOperator::GetTestLVector() const -{ - return (&trial_fespace == &test_fespace) ? trial_fespace.GetLVector2() - : test_fespace.GetLVector(); -} - } // namespace palace diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp index 1be52fa20..fb322f1c6 100644 --- a/palace/linalg/rap.hpp +++ b/palace/linalg/rap.hpp @@ -44,7 +44,6 @@ class ParOperator : public Operator // Helper methods for operator application. void RestrictionMatrixMult(const Vector &ly, Vector &ty) const; void RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const; - Vector &GetTestLVector() const; ParOperator(std::unique_ptr &&dA, const Operator *pA, const FiniteElementSpace &trial_fespace, @@ -133,7 +132,6 @@ class ComplexParOperator : public ComplexOperator // Helper methods for operator application. void RestrictionMatrixMult(const ComplexVector &ly, ComplexVector &ty) const; void RestrictionMatrixMultTranspose(const ComplexVector &ty, ComplexVector &ly) const; - ComplexVector &GetTestLVector() const; ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, const Operator *pAr, const Operator *pAi, diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index 3b8e4abdc..f86fd9b81 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -11,6 +11,7 @@ #include #include "linalg/divfree.hpp" #include "utils/communication.hpp" +#include "utils/workspace.hpp" static PetscErrorCode __mat_apply_EPS_A0(Mat, Vec, Vec); static PetscErrorCode __mat_apply_EPS_A1(Mat, Vec, Vec); @@ -136,22 +137,20 @@ inline VecType PetscVecType() return VECSTANDARD; } -struct MatShellContext -{ - const ComplexOperator &A; - ComplexVector &x, &y; -}; - PetscErrorCode __mat_apply_shell(Mat A, Vec x, Vec y) { PetscFunctionBeginUser; - MatShellContext *ctx; + ComplexOperator *ctx; PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x)); - ctx->A.Mult(ctx->x, ctx->y); - PetscCall(ToPetscVec(ctx->y, y)); + PetscInt m, n; + PalacePetscCall(MatGetLocalSize(A, &m, &n)); + auto x1 = workspace::NewVector(n); + auto y1 = workspace::NewVector(m); + PetscCall(FromPetscVec(x, x1)); + ctx->Mult(x1, y1); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -159,13 +158,17 @@ PetscErrorCode __mat_apply_shell(Mat A, Vec x, Vec y) PetscErrorCode __mat_apply_transpose_shell(Mat A, Vec x, Vec y) { PetscFunctionBeginUser; - MatShellContext *ctx; + ComplexOperator *ctx; PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x)); - ctx->A.MultTranspose(ctx->x, ctx->y); - PetscCall(ToPetscVec(ctx->y, y)); + PetscInt m, n; + PalacePetscCall(MatGetLocalSize(A, &m, &n)); + auto x1 = workspace::NewVector(m); + auto y1 = workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->MultTranspose(x1, y1); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -173,13 +176,17 @@ PetscErrorCode __mat_apply_transpose_shell(Mat A, Vec x, Vec y) PetscErrorCode __mat_apply_hermitian_transpose_shell(Mat A, Vec x, Vec y) { PetscFunctionBeginUser; - MatShellContext *ctx; + ComplexOperator *ctx; PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x)); - ctx->A.MultHermitianTranspose(ctx->x, ctx->y); - PetscCall(ToPetscVec(ctx->y, y)); + PetscInt m, n; + PalacePetscCall(MatGetLocalSize(A, &m, &n)); + auto x1 = workspace::NewVector(m); + auto y1 = workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->MultHermitianTranspose(x1, y1); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); }; @@ -240,14 +247,9 @@ PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm // or SVD solvers, namely MATOP_MULT and MATOP_MULT_HERMITIAN_TRANSPOSE (if the matrix // is not Hermitian). MFEM_VERIFY(A.Height() == A.Width(), "Spectral norm requires a square matrix!"); - const PetscInt n = A.Height(); - ComplexVector x(n), y(n); - x.UseDevice(true); - y.UseDevice(true); - MatShellContext ctx = {A, x, y}; Mat A0; - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)&ctx, &A0)); + const PetscInt n = A.Height(); + PalacePetscCall(MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)&A, &A0)); PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_shell)); PalacePetscCall(MatShellSetVecType(A0, PetscVecType())); if (herm) @@ -486,15 +488,16 @@ PetscReal SlepcEigenvalueSolver::GetError(int i, EigenvalueSolver::ErrorType typ void SlepcEigenvalueSolver::RescaleEigenvectors(int num_eig) { + auto x = workspace::NewVector(Size()); + auto r = workspace::NewVector(Size()); res = std::make_unique(num_eig); xscale = std::make_unique(num_eig); for (int i = 0; i < num_eig; i++) { xscale.get()[i] = 0.0; - GetEigenvector(i, x1); - xscale.get()[i] = 1.0 / GetEigenvectorNorm(x1, y1); - res.get()[i] = - GetResidualNorm(GetEigenvalue(i), x1, y1) / linalg::Norml2(GetComm(), x1); + GetEigenvector(i, x); + xscale.get()[i] = 1.0 / GetEigenvectorNorm(x, r); + res.get()[i] = GetResidualNorm(GetEigenvalue(i), x, r) / linalg::Norml2(GetComm(), x); } } @@ -715,6 +718,17 @@ void SlepcEPSSolverBase::GetEigenvector(int i, ComplexVector &x) const } } +PetscInt SlepcEPSSolverBase::Size() const +{ + if (!A0) + { + return 0; + } + PetscInt n; + PalacePetscCall(MatGetLocalSize(A0, &n, nullptr)); + return n; +} + BV SlepcEPSSolverBase::GetBV() const { BV bv; @@ -785,10 +799,6 @@ void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperato { PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - x1.SetSize(opK->Height()); - y1.SetSize(opK->Height()); - x1.UseDevice(true); - y1.UseDevice(true); // Configure linear solver for generalized problem or spectral transformation. This also // allows use of the divergence-free projector as a linear solve side-effect. @@ -890,14 +900,6 @@ void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexO { PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - x1.SetSize(opK->Height()); - x2.SetSize(opK->Height()); - y1.SetSize(opK->Height()); - y2.SetSize(opK->Height()); - x1.UseDevice(true); - x2.UseDevice(true); - y1.UseDevice(true); - y2.UseDevice(true); // Configure linear solver. if (first) @@ -950,6 +952,17 @@ void SlepcPEPLinearSolver::GetEigenvector(int i, ComplexVector &x) const } } +PetscInt SlepcPEPLinearSolver::Size() const +{ + if (!A0) + { + return 0; + } + PetscInt n; + PalacePetscCall(MatGetLocalSize(A0, &n, nullptr)); + return n / 2; +} + PetscReal SlepcPEPLinearSolver::GetResidualNorm(PetscScalar l, const ComplexVector &x, ComplexVector &r) const { @@ -1193,6 +1206,17 @@ void SlepcPEPSolverBase::GetEigenvector(int i, ComplexVector &x) const } } +PetscInt SlepcPEPSolverBase::Size() const +{ + if (!A0) + { + return 0; + } + PetscInt n; + PalacePetscCall(MatGetLocalSize(A0, &n, nullptr)); + return n; +} + BV SlepcPEPSolverBase::GetBV() const { BV bv; @@ -1273,8 +1297,6 @@ void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperato { PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - x1.SetSize(opK->Height()); - y1.SetSize(opK->Height()); // Configure linear solver. if (first) @@ -1337,10 +1359,13 @@ PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opK->Mult(ctx->x1, ctx->y1); - ctx->y1 *= ctx->delta; - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opK->Mult(x1, y1); + y1 *= ctx->delta; + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1352,10 +1377,13 @@ PetscErrorCode __mat_apply_EPS_A1(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opM->Mult(ctx->x1, ctx->y1); - ctx->y1 *= ctx->delta * ctx->gamma; - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opM->Mult(x1, y1); + y1 *= ctx->delta * ctx->gamma; + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1367,11 +1395,14 @@ PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real()); - ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag()); - ctx->y1 *= ctx->delta * ctx->gamma; - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opB->Mult(x1.Real(), y1.Real()); + ctx->opB->Mult(x1.Imag(), y1.Imag()); + y1 *= ctx->delta * ctx->gamma; + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1386,23 +1417,26 @@ PetscErrorCode __pc_apply_EPS(PC pc, Vec x, Vec y) PetscCall(PCShellGetContext(pc, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opInv->Mult(ctx->x1, ctx->y1); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opInv->Mult(x1, y1); if (!ctx->sinvert) { - ctx->y1 *= 1.0 / (ctx->delta * ctx->gamma); + y1 *= 1.0 / (ctx->delta * ctx->gamma); } else { - ctx->y1 *= 1.0 / ctx->delta; + y1 *= 1.0 / ctx->delta; } if (ctx->opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); - ctx->opProj->Mult(ctx->y1); - // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); + ctx->opProj->Mult(y1); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); } - PetscCall(ToPetscVec(ctx->y1, y)); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1416,13 +1450,18 @@ PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1, ctx->x2)); - ctx->y1 = ctx->x2; - ctx->opC->Mult(ctx->x2, ctx->y2); - ctx->y2 *= ctx->gamma; - ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex(1.0, 0.0)); - ctx->y2 *= -ctx->delta; - PetscCall(ToPetscVec(ctx->y1, ctx->y2, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto x2 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + auto y2 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1, x2)); + y1 = x2; + ctx->opC->Mult(x2, y2); + y2 *= ctx->gamma; + ctx->opK->AddMult(x1, y2, std::complex(1.0, 0.0)); + y2 *= -ctx->delta; + PetscCall(ToPetscVec(y1, y2, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1436,11 +1475,16 @@ PetscErrorCode __mat_apply_PEPLinear_L1(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1, ctx->x2)); - ctx->y1 = ctx->x1; - ctx->opM->Mult(ctx->x2, ctx->y2); - ctx->y2 *= ctx->delta * ctx->gamma * ctx->gamma; - PetscCall(ToPetscVec(ctx->y1, ctx->y2, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto x2 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + auto y2 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1, x2)); + y1 = x1; + ctx->opM->Mult(x2, y2); + y2 *= ctx->delta * ctx->gamma * ctx->gamma; + PetscCall(ToPetscVec(y1, y2, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1452,14 +1496,19 @@ PetscErrorCode __mat_apply_PEPLinear_B(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1, ctx->x2)); - ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real()); - ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag()); - ctx->opB->Mult(ctx->x2.Real(), ctx->y2.Real()); - ctx->opB->Mult(ctx->x2.Imag(), ctx->y2.Imag()); - ctx->y1 *= ctx->delta * ctx->gamma * ctx->gamma; - ctx->y2 *= ctx->delta * ctx->gamma * ctx->gamma; - PetscCall(ToPetscVec(ctx->y1, ctx->y2, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto x2 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + auto y2 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1, x2)); + ctx->opB->Mult(x1.Real(), y1.Real()); + ctx->opB->Mult(x1.Imag(), y1.Imag()); + ctx->opB->Mult(x2.Real(), y2.Real()); + ctx->opB->Mult(x2.Imag(), y2.Imag()); + y1 *= ctx->delta * ctx->gamma * ctx->gamma; + y2 *= ctx->delta * ctx->gamma * ctx->gamma; + PetscCall(ToPetscVec(y1, y2, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1477,48 +1526,52 @@ PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y) PetscCall(PCShellGetContext(pc, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1, ctx->x2)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto x2 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + auto y2 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1, x2)); if (!ctx->sinvert) { - ctx->y1 = ctx->x1; + y1 = x1; if (ctx->opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); - ctx->opProj->Mult(ctx->y1); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); + ctx->opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); } - ctx->opInv->Mult(ctx->x2, ctx->y2); - ctx->y2 *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); + ctx->opInv->Mult(x2, y2); + y2 *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); if (ctx->opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); - ctx->opProj->Mult(ctx->y2); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y2)); + ctx->opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y2)); } } else { - ctx->y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), ctx->x2, 0.0); // Temporarily - ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex(1.0, 0.0)); - ctx->opInv->Mult(ctx->y1, ctx->y2); + y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), x2, 0.0); // Temporarily + ctx->opK->AddMult(x1, y1, std::complex(1.0, 0.0)); + ctx->opInv->Mult(y1, y2); if (ctx->opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); - ctx->opProj->Mult(ctx->y2); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y2)); + ctx->opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y2)); } - ctx->y1.AXPBYPCZ(ctx->gamma / ctx->sigma, ctx->y2, -ctx->gamma / ctx->sigma, ctx->x1, - 0.0); + y1.AXPBYPCZ(ctx->gamma / ctx->sigma, y2, -ctx->gamma / ctx->sigma, x1, 0.0); if (ctx->opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); - ctx->opProj->Mult(ctx->y1); - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); + ctx->opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); } } - PetscCall(ToPetscVec(ctx->y1, ctx->y2, y)); + PetscCall(ToPetscVec(y1, y2, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1530,9 +1583,12 @@ PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opK->Mult(ctx->x1, ctx->y1); - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opK->Mult(x1, y1); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1544,9 +1600,12 @@ PetscErrorCode __mat_apply_PEP_A1(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opC->Mult(ctx->x1, ctx->y1); - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opC->Mult(x1, y1); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1558,9 +1617,12 @@ PetscErrorCode __mat_apply_PEP_A2(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opM->Mult(ctx->x1, ctx->y1); - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opM->Mult(x1, y1); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1572,11 +1634,14 @@ PetscErrorCode __mat_apply_PEP_B(Mat A, Vec x, Vec y) PetscCall(MatShellGetContext(A, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real()); - ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag()); - ctx->y1 *= ctx->delta * ctx->gamma; - PetscCall(ToPetscVec(ctx->y1, y)); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opB->Mult(x1.Real(), y1.Real()); + ctx->opB->Mult(x1.Imag(), y1.Imag()); + y1 *= ctx->delta * ctx->gamma; + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } @@ -1591,23 +1656,26 @@ PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y) PetscCall(PCShellGetContext(pc, (void **)&ctx)); MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); - PetscCall(FromPetscVec(x, ctx->x1)); - ctx->opInv->Mult(ctx->x1, ctx->y1); + const auto n = ctx->Size(); + auto x1 = palace::workspace::NewVector(n); + auto y1 = palace::workspace::NewVector(n); + PetscCall(FromPetscVec(x, x1)); + ctx->opInv->Mult(x1, y1); if (!ctx->sinvert) { - ctx->y1 *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); + y1 *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); } else { - ctx->y1 *= 1.0 / ctx->delta; + y1 *= 1.0 / ctx->delta; } if (ctx->opProj) { - // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); - ctx->opProj->Mult(ctx->y1); - // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); + ctx->opProj->Mult(y1); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), y1)); } - PetscCall(ToPetscVec(ctx->y1, y)); + PetscCall(ToPetscVec(y1, y)); PetscFunctionReturn(PETSC_SUCCESS); } diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index bd4fbc201..142eed9e3 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -74,9 +74,6 @@ class SlepcEigenvalueSolver : public EigenvalueSolver JD }; - // Workspace vector for operator applications. - mutable ComplexVector x1, y1; - protected: // Control print level for debugging. int print; @@ -172,6 +169,9 @@ class SlepcEigenvalueSolver : public EigenvalueSolver // the new matrix, only normalization. void RescaleEigenvectors(int num_eig) override; + // Get the (local) size of the eigenvalue problem. + virtual PetscInt Size() const = 0; + // Get the basis vectors object. virtual BV GetBV() const = 0; @@ -230,6 +230,8 @@ class SlepcEPSSolverBase : public SlepcEigenvalueSolver void GetEigenvector(int i, ComplexVector &x) const override; + PetscInt Size() const override; + BV GetBV() const override; ST GetST() const override; @@ -296,9 +298,6 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase // (not owned). const ComplexOperator *opK, *opC, *opM; - // Workspace vectors for operator applications. - mutable ComplexVector x2, y2; - private: // Operator norms for scaling. mutable PetscReal normK, normC, normM; @@ -321,6 +320,8 @@ class SlepcPEPLinearSolver : public SlepcEPSSolverBase void SetInitialSpace(const ComplexVector &v) override; void GetEigenvector(int i, ComplexVector &x) const override; + + PetscInt Size() const override; }; // Base class for SLEPc's PEP problem type. @@ -365,6 +366,8 @@ class SlepcPEPSolverBase : public SlepcEigenvalueSolver void GetEigenvector(int i, ComplexVector &x) const override; + PetscInt Size() const override; + BV GetBV() const override; ST GetST() const override; diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp index 1a47a64bc..33f3c2ed5 100644 --- a/palace/linalg/solver.hpp +++ b/palace/linalg/solver.hpp @@ -47,21 +47,6 @@ class Solver : public OperType { MFEM_ABORT("MultTranspose() is not implemented for base class Solver!"); } - - // Apply the solver with a preallocated temporary storage vector. - virtual void Mult2(const VecType &x, VecType &y, VecType &r) const - { - MFEM_ABORT("Mult2() with temporary storage vector is not implemented for base class " - "Solver!"); - } - - // Apply the solver for the transpose problem with a preallocated temporary storage - // vector. - virtual void MultTranspose2(const VecType &x, VecType &y, VecType &r) const - { - MFEM_ABORT("MultTranspose2() with temporary storage vector is not implemented for base " - "class Solver!"); - } }; // This solver wraps a real-valued mfem::Solver for application to complex-valued problems diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp index 16f5fe88c..147ce70cd 100644 --- a/palace/linalg/vector.cpp +++ b/palace/linalg/vector.cpp @@ -8,6 +8,7 @@ #include #include "linalg/hypre.hpp" #include "utils/omp.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -34,11 +35,6 @@ ComplexVector::ComplexVector(const std::complex *py, int size, bool on_d Set(py, size, on_dev); } -ComplexVector::ComplexVector(Vector &y, int offset, int size) -{ - MakeRef(y, offset, size); -} - void ComplexVector::UseDevice(bool use_dev) { xr.UseDevice(use_dev); @@ -47,17 +43,13 @@ void ComplexVector::UseDevice(bool use_dev) void ComplexVector::SetSize(int size) { - xr.SetSize(size); - xi.SetSize(size); -} - -void ComplexVector::MakeRef(Vector &y, int offset, int size) -{ - MFEM_ASSERT(y.Size() >= offset + 2 * size, - "Insufficient storage for ComplexVector alias reference of the given size!"); - y.ReadWrite(); // Ensure memory is allocated on device before aliasing - xr.MakeRef(y, offset, size); - xi.MakeRef(y, offset + size, size); + if (size != Size()) + { + MFEM_VERIFY(Size() == 0 || (xr.OwnsData() && xi.OwnsData()), + "Alias vectors should not be resized!"); + xr.SetSize(size); + xi.SetSize(size); + } } void ComplexVector::Set(const ComplexVector &y) @@ -101,8 +93,7 @@ void ComplexVector::Set(const std::complex *py, int size, bool on_dev) else if (!on_dev) { // Need copy from host to device (host pointer but using device). - Vector y(2 * size); - y.UseDevice(true); + auto y = workspace::NewVector(2 * size); { auto *Y = y.HostWrite(); PalacePragmaOmp(parallel for schedule(static)) diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp index c40de1f0c..bb9742f7f 100644 --- a/palace/linalg/vector.hpp +++ b/palace/linalg/vector.hpp @@ -21,7 +21,7 @@ using Vector = mfem::Vector; // A complex-valued vector represented as two real vectors, one for each component. class ComplexVector { -private: +protected: Vector xr, xi; public: @@ -37,10 +37,6 @@ class ComplexVector // Copy constructor from an array of complex values. ComplexVector(const std::complex *py, int size, bool on_dev); - // Create a vector referencing the memory of another vector, at the given base offset and - // size. - ComplexVector(Vector &y, int offset, int size); - // Flag for runtime execution on the mfem::Device. See the documentation for mfem::Vector. void UseDevice(bool use_dev); bool UseDevice() const { return xr.UseDevice(); } @@ -52,10 +48,6 @@ class ComplexVector // where the new size is less than or greater than Size() or Capacity(). void SetSize(int size); - // Set this vector to reference the memory of another vector, at the given base offset and - // size. - void MakeRef(Vector &y, int offset, int size); - // Get access to the real and imaginary vector parts. const Vector &Real() const { return xr; } Vector &Real() { return xr; } diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp index aa3100fa6..9acf191e4 100644 --- a/palace/models/domainpostoperator.cpp +++ b/palace/models/domainpostoperator.cpp @@ -11,6 +11,7 @@ #include "models/materialoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -35,8 +36,6 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera BilinearForm m(nd_fespace); m.AddDomainIntegrator(epsilon_func); M_elec = m.PartialAssemble(); - D.SetSize(M_elec->Height()); - D.UseDevice(true); } { // Construct RT mass matrix to compute the magnetic field energy integral as: @@ -46,8 +45,6 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera BilinearForm m(rt_fespace); m.AddDomainIntegrator(muinv_func); M_mag = m.PartialAssemble(); - H.SetSize(M_mag->Height()); - H.UseDevice(true); } // Use the provided domain postprocessing indices for postprocessing the electric and @@ -88,8 +85,6 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera BilinearForm m(fespace); m.AddDomainIntegrator(epsilon_func); M_elec = m.PartialAssemble(); - D.SetSize(M_elec->Height()); - D.UseDevice(true); } for (const auto &[idx, data] : iodata.domains.postpro.energy) @@ -115,8 +110,6 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera BilinearForm m(fespace); m.AddDomainIntegrator(muinv_func); M_mag = m.PartialAssemble(); - H.SetSize(M_mag->Height()); - H.UseDevice(true); } for (const auto &[idx, data] : iodata.domains.postpro.energy) @@ -143,6 +136,7 @@ double DomainPostOperator::GetElectricFieldEnergy(const GridFunction &E) const { if (M_elec) { + auto D = workspace::NewVector(M_elec->Height()); M_elec->Mult(E.Real(), D); double dot = linalg::LocalDot(E.Real(), D); if (E.HasImag()) @@ -162,6 +156,7 @@ double DomainPostOperator::GetMagneticFieldEnergy(const GridFunction &B) const { if (M_mag) { + auto H = workspace::NewVector(M_mag->Height()); M_mag->Mult(B.Real(), H); double dot = linalg::LocalDot(B.Real(), H); if (B.HasImag()) @@ -188,6 +183,7 @@ double DomainPostOperator::GetDomainElectricFieldEnergy(int idx, { return 0.0; } + auto D = workspace::NewVector(it->second.first->Height()); it->second.first->Mult(E.Real(), D); double dot = linalg::LocalDot(E.Real(), D); if (E.HasImag()) @@ -210,6 +206,7 @@ double DomainPostOperator::GetDomainMagneticFieldEnergy(int idx, { return 0.0; } + auto H = workspace::NewVector(it->second.second->Height()); it->second.second->Mult(B.Real(), H); double dot = linalg::LocalDot(B.Real(), H); if (B.HasImag()) diff --git a/palace/models/domainpostoperator.hpp b/palace/models/domainpostoperator.hpp index 19bb284ca..1a1ca5a4c 100644 --- a/palace/models/domainpostoperator.hpp +++ b/palace/models/domainpostoperator.hpp @@ -31,9 +31,6 @@ class DomainPostOperator std::unique_ptr M_elec, M_mag; std::map, std::unique_ptr>> M_i; - // Temporary vectors for inner product calculations. - mutable Vector D, H; - public: DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op, const FiniteElementSpace &nd_fespace, diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 7231d6f87..48068c055 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -11,6 +11,7 @@ #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" +#include "utils/workspace.hpp" namespace palace { @@ -36,7 +37,6 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera double a0_, a1_; std::unique_ptr kspM, kspA; std::unique_ptr A, B; - mutable Vector RHS; // Bindings to SpaceOperator functions to get the system matrix and preconditioner, and // construct the linear solver. @@ -60,8 +60,6 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Set up RHS vector for the current source term: -g'(t) J, where g(t) handles the time // dependence. spaceop.GetExcitationVector(NegJ); - RHS.SetSize(NegJ.Size()); - RHS.UseDevice(true); // Set up linear solvers. { @@ -115,6 +113,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera // Operators have already been set in constructor. ddu = 0.0; } + auto RHS = workspace::NewVector(NegJ.Size()); FormRHS(u, du, RHS); kspM->Mult(RHS, ddu); } @@ -135,6 +134,7 @@ class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOpera k = 0.0; } Mpi::Print("\n"); + auto RHS = workspace::NewVector(NegJ.Size()); FormRHS(u, du, RHS); kspA->Mult(RHS, k); } diff --git a/palace/utils/workspace.hpp b/palace/utils/workspace.hpp new file mode 100644 index 000000000..4dfe51b76 --- /dev/null +++ b/palace/utils/workspace.hpp @@ -0,0 +1,65 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_UTILS_WORKSPACE_HPP +#define PALACE_UTILS_WORKSPACE_HPP + +#include +#include "linalg/vector.hpp" + +namespace palace::workspace +{ + +// +// Utility class and functions for complex-valued temporary workspace vectors. See +// mfem::Workspace and mfem::WorkspaceVector. +// + +class ComplexWorkspaceVector : public ComplexVector +{ +private: + mfem::WorkspaceVector data_xr, data_xi; + +public: + ComplexWorkspaceVector(int size) + : data_xr(mfem::Workspace::NewVector(size)), data_xi(mfem::Workspace::NewVector(size)) + { + xr.MakeRef(data_xr, 0, size); + xi.MakeRef(data_xi, 0, size); + } + + // No copy constructor. + ComplexWorkspaceVector(const ComplexWorkspaceVector &other) = delete; + + // Copy assignment: copy contents of vector, not metadata. + ComplexWorkspaceVector &operator=(const ComplexWorkspaceVector &other) + { + ComplexVector::operator=(other); + return *this; + } + + // Cannot move to an existing ComplexWorkspaceVector. + ComplexWorkspaceVector &operator=(ComplexWorkspaceVector &&other) = delete; + + // All other operator=, inherit from ComplexVector. + using ComplexVector::operator=; +}; + +template +auto NewVector(int size); + +template <> +inline auto NewVector(int size) +{ + return mfem::Workspace::NewVector(size); +} + +template <> +inline auto NewVector(int size) +{ + return ComplexWorkspaceVector(size); +} + +} // namespace palace::workspace + +#endif // PALACE_UTILS_WORKSPACE_HPP