From b802a9813a8a0ae8be32bfa05d0b5338cc804a55 Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 19 Feb 2024 10:41:28 -0800
Subject: [PATCH] Fix HostRead bug for error estimator

---
 palace/linalg/errorestimator.cpp | 43 ++++++++++++++++++--------------
 palace/linalg/errorestimator.hpp | 10 ++------
 2 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index 39b0d1fe1..238f3ba8a 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -137,6 +137,7 @@ void FluxProjector<VecType>::Mult(const VecType &x, VecType &y) const
   {
     if constexpr (std::is_same<VecType, Vector>::value)
     {
+      y.Write();  // Ensure memory is allocated on device before aliasing
       for (int i = 0; i < vdim; i++)
       {
         // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for "
@@ -162,7 +163,7 @@ CurlFluxErrorEstimator<VecType>::CurlFluxErrorEstimator(const MaterialOperator &
                                                         double tol, int max_it, int print)
   : mat_op(mat_op), nd_fespace(nd_fespace),
     projector(mat_op, nd_fespace, tol, max_it, print), F(nd_fespace.GetTrueVSize()),
-    F_gf(&nd_fespace.Get()), U_gf(&nd_fespace.Get())
+    F_gf(nd_fespace.GetVSize()), U_gf(nd_fespace.GetVSize())
 {
   F.UseDevice(true);
 }
@@ -177,18 +178,22 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
   projector.Mult(U, F);
   if constexpr (std::is_same<VecType, ComplexVector>::value)
   {
-    F_gf.real().SetFromTrueDofs(F.Real());
-    F_gf.imag().SetFromTrueDofs(F.Imag());
-    U_gf.real().SetFromTrueDofs(U.Real());
-    U_gf.imag().SetFromTrueDofs(U.Imag());
+    nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real());
+    nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag());
+    nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real());
+    nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag());
+    U_gf.Real().HostRead();
+    U_gf.Imag().HostRead();
+    F_gf.Real().HostRead();
+    F_gf.Imag().HostRead();
   }
   else
   {
-    F_gf.SetFromTrueDofs(F);
-    U_gf.SetFromTrueDofs(U);
+    nd_fespace.GetProlongationMatrix()->Mult(U, U_gf);
+    nd_fespace.GetProlongationMatrix()->Mult(F, F_gf);
+    U_gf.HostRead();
+    F_gf.HostRead();
   }
-  F_gf.HostRead();
-  U_gf.HostRead();
 
   // Loop over elements and accumulate the estimates from this component. The discontinuous
   // flux is μ⁻¹ ∇ × U.
@@ -228,8 +233,7 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
         fe.CalcCurlShape(ip, Curl);
         const double w = ip.weight * T.Weight();
 
-        auto AccumulateError =
-            [&](const mfem::ParGridFunction &U_gf_, const mfem::ParGridFunction &F_gf_)
+        auto AccumulateError = [&](const Vector &U_gf_, const Vector &F_gf_)
         {
           // μ⁻¹ ∇ × U
           U_gf_.GetSubVector(dofs, loc_gf);
@@ -253,16 +257,16 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
 
           V_smooth -= V_ip;
           elem_err += w * (V_smooth * V_smooth);
-          loc_norm2 += w * (V_ip * V_ip);
+          return w * (V_ip * V_ip);
         };
         if constexpr (std::is_same<VecType, ComplexVector>::value)
         {
-          AccumulateError(U_gf.real(), F_gf.real());
-          AccumulateError(U_gf.imag(), F_gf.imag());
+          loc_norm2 += AccumulateError(U_gf.Real(), F_gf.Real());
+          loc_norm2 += AccumulateError(U_gf.Imag(), F_gf.Imag());
         }
         else
         {
-          AccumulateError(U_gf, F_gf);
+          loc_norm2 += AccumulateError(U_gf, F_gf);
         }
       }
       h_estimates[e] = std::sqrt(elem_err);
@@ -288,7 +292,8 @@ GradFluxErrorEstimator::GradFluxErrorEstimator(const MaterialOperator &mat_op,
         h1_fespace.GetMesh(), &h1_fespace.GetFEColl(), h1_fespace.SpaceDimension(),
         mfem::Ordering::byNODES)),
     projector(mat_op, h1_fespace, *h1d_fespace, tol, max_it, print),
-    F(h1d_fespace->GetTrueVSize()), F_gf(&h1d_fespace->Get()), U_gf(&h1_fespace.Get())
+    F(h1d_fespace->GetTrueVSize()), F_gf(h1d_fespace->GetVSize()),
+    U_gf(h1_fespace.GetVSize())
 {
   F.UseDevice(true);
 }
@@ -300,10 +305,10 @@ void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U,
   // and populate the corresponding grid functions.
   BlockTimer bt(Timer::ESTIMATION);
   projector.Mult(U, F);
-  F_gf.SetFromTrueDofs(F);
-  U_gf.SetFromTrueDofs(U);
-  F_gf.HostRead();
+  h1_fespace.GetProlongationMatrix()->Mult(U, U_gf);
+  h1d_fespace->GetProlongationMatrix()->Mult(F, F_gf);
   U_gf.HostRead();
+  F_gf.HostRead();
 
   // Loop over elements and accumulate the estimates from this component. The discontinuous
   // flux is ε ∇U.
diff --git a/palace/linalg/errorestimator.hpp b/palace/linalg/errorestimator.hpp
index 23c8924e2..745ba48c5 100644
--- a/palace/linalg/errorestimator.hpp
+++ b/palace/linalg/errorestimator.hpp
@@ -55,10 +55,6 @@ class FluxProjector
 template <typename VecType>
 class CurlFluxErrorEstimator
 {
-  using GridFunctionType =
-      typename std::conditional<std::is_same<VecType, ComplexVector>::value,
-                                mfem::ParComplexGridFunction, mfem::ParGridFunction>::type;
-
   // Reference to material property data (not owned).
   const MaterialOperator &mat_op;
 
@@ -69,8 +65,7 @@ class CurlFluxErrorEstimator
   FluxProjector<VecType> projector;
 
   // Temporary vectors for error estimation.
-  mutable VecType F;
-  mutable GridFunctionType F_gf, U_gf;
+  mutable VecType F, F_gf, U_gf;
 
 public:
   CurlFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &nd_fespace,
@@ -98,8 +93,7 @@ class GradFluxErrorEstimator
   FluxProjector<Vector> projector;
 
   // Temporary vectors for error estimation.
-  mutable Vector F;
-  mutable mfem::ParGridFunction F_gf, U_gf;
+  mutable Vector F, F_gf, U_gf;
 
 public:
   GradFluxErrorEstimator(const MaterialOperator &mat_op, FiniteElementSpace &h1_fespace,