Updates for patch release v4.2.1

LLNL · Aug 31, 2023 · dab0113 · dab0113
1 parent daca3cd
commit dab0113
Show file tree

Hide file tree

Showing 6 changed files with 147 additions and 19 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(SAMRAI C CXX Fortran)
 
 set(SAMRAI_VERSION_MAJOR 4)
 set(SAMRAI_VERSION_MINOR 2)
-set(SAMRAI_VERSION_PATCHLEVEL 0)
+set(SAMRAI_VERSION_PATCHLEVEL 1)
 set(SAMRAI_VERSION
   "${SAMRAI_VERSION_MAJOR}.${SAMRAI_VERSION_MINOR}.${SAMRAI_VERSION_PATCHLEVEL}")
 

diff --git a/INSTALL-NOTES b/INSTALL-NOTES
@@ -296,9 +296,9 @@ doxygen	        1.6.1
 
 graphviz	2.26
 
-Umpire          v2022.03.1
+Umpire          v2023.06.0
 
-RAJA            v2022.03.1
+RAJA            v2023.06.0
 
 
 Visualization Tools:

diff --git a/RELEASE-NOTES b/RELEASE-NOTES
@@ -4,7 +4,7 @@
   All rights reserved.
 *****************************************************************************
 
-                     Release Notes for SAMRAI v4.2.0
+                     Release Notes for SAMRAI v4.2.1
 
      (notes for previous releases may be found in /SAMRAI/docs/release)
 
@@ -22,6 +22,14 @@ https://github.com/LLNL/SAMRAI
 
 *****************************************************************************
 
+VERSION 4.2.1
+
+Version 4.2.1 is a minor patch release to make SAMRAI compatible with 
+Umpire v2023.06.0 and RAJA v2023.06.0.  The content of the release notes
+file for SAMRAI version 4.2.0 is preserved here, except for notes marked
+as version 4.2.1
+
+
 VERSION 4.2.0
 
 Version 4.2.0 is considered a beta release due to the introduction of
@@ -53,12 +61,14 @@ GPU kernels to be accumulated for a single kernel launch.
 2) A minimum_patch_load parameter has been added to CascadePartitioner and
 TreeLoadBalancer as an option to change how small patches are treated during
 load balancing.
- 
+
 -----------------------------------------------------------------------------
                 Summary of what's changed
 -----------------------------------------------------------------------------
 
-1)
+1) VERSION 4.2.1 Minor changes have been made for compatibility with RAJA
+v2023.06.0 and Umpire v2023.06.0, and those are the recommended releases
+for those libraries for use with this SAMRAI release.
 
 
 *****************************************************************************
@@ -108,6 +118,10 @@ count may be more desired than uniformity in cell count.
                 Details about what's changed
 ----------------------------------------------------------------------------
 
+3) VERSION 4.2.1 Minor changes have been made for compatibility with RAJA
+v2023.06.0 and Umpire v2023.06.0, and those are the recommended releases
+for those libraries for use with this SAMRAI release.
+
 
 =============================================================================
 =============================================================================
diff --git a/docs/release/version-4.2.0 b/docs/release/version-4.2.0
@@ -0,0 +1,113 @@
+*****************************************************************************
+  Copyright 1997-2023
+  Lawrence Livermore National Security, LLC.
+  All rights reserved.
+*****************************************************************************
+
+                     Release Notes for SAMRAI v4.2.0
+
+     (notes for previous releases may be found in /SAMRAI/docs/release)
+
+*****************************************************************************
+
+
+Where to report Bugs
+--------------------
+
+If a bug is found in the SAMRAI library, we ask that you kindly report
+it to us so that we may fix it.
+
+Please send email to [email protected] or post an issue on github.
+https://github.com/LLNL/SAMRAI
+
+*****************************************************************************
+
+VERSION 4.2.0
+
+Version 4.2.0 is considered a beta release due to the introduction of
+kernel fusion features which may see notable changes in future releases.
+
+
+*****************************************************************************
+
+----------------------------------------------------------------------------
+                Significant bug fixes
+----------------------------------------------------------------------------
+
+1) A bug in the internal computation of connector widths in
+TimeRefinementIntegrator was fixed.  There was an incorrect computation of
+connector width between adjacent levels when a large tag buffer size was
+provided to the input.  This computation has been fixed.
+
+*****************************************************************************
+
+
+
+----------------------------------------------------------------------------
+                Summary of what's new
+-----------------------------------------------------------------------------
+
+1) RAJA-based kernel fusion features have been added, allowing for independent
+GPU kernels to be accumulated for a single kernel launch.
+
+2) A minimum_patch_load parameter has been added to CascadePartitioner and
+TreeLoadBalancer as an option to change how small patches are treated during
+load balancing.
+
+-----------------------------------------------------------------------------
+                Summary of what's changed
+-----------------------------------------------------------------------------
+
+1)
+
+
+*****************************************************************************
+
+-----------------------------------------------------------------------------
+                Details about what's new
+-----------------------------------------------------------------------------
+
+1) RAJA-based kernel fusion features have been added, allowing for independent
+GPU kernels to be accumulated for a single kernel launch.
+
+The core of these features are in the new tbox::KernelFuser class, which uses
+RAJA WorkGroup features to enqueue a set of independent kernels that have been
+defined as lambda functions in RAJA for_alls.  Rather than executing each
+kernel on the GPU device when it is reached in the code, the kernels are
+stored until the KernelFuser object makes a launch() call, at which time all
+kernels are executed concurrently.  The intent of this is to reduce the
+overhead from launching each kernel separately.
+
+To support usage of kernel fusion, a new abstract base class
+tbox::ScheduleOpsStrategy has been added, with methods that are called from
+tbox::Schedule at places where it could be useful for applications to make
+calls for kernel fusion operations in their codes.  In particular, the calls
+to postPack(), postCopy(), and postUnpack() are provided so that
+applications can implement calls to kernel fusion launches after the original
+calls to data packing, copy, and unpacking operations have enqueued rather
+than launched the kernels that do those operations.  ScheuduleOpsStrategy
+is defined generally without reference to kernel fusion, as applications
+could choose to implement other things for their codes to do before and after
+Schedule operations.  A pointer to a ScheduleOpsStrategy can be provided
+using set methods available in RefineSchedule, CoarsenSchedule, or Schedule.
+
+2) A minimum_patch_load parameter has been added to CascadePartitioner and
+TreeLoadBalancer as an option to change how small patches are treated during
+load balancing.
+
+The value given for minimum_patch_load is used to "fool" the load balancing
+algorithm into treating small patches with a cell count below the given value
+as if they were the size of the given value.  This can reduce the likelihood
+that the load balancers will accumulate a large number of small patches on
+on a single rank, with a known side effect being that the decomposition will
+have less uniformity in total cell count on each processor.  This is provided
+as an option for users running in environments where uniformity in patch
+count may be more desired than uniformity in cell count.
+
+-----------------------------------------------------------------------------
+                Details about what's changed
+----------------------------------------------------------------------------
+
+
+=============================================================================
+=============================================================================
diff --git a/source/SAMRAI/tbox/AllocatorDatabase.cpp b/source/SAMRAI/tbox/AllocatorDatabase.cpp
@@ -70,14 +70,14 @@ AllocatorDatabase::initialize()
   if (!rm.isAllocator("samrai::data_allocator")) {
 #if defined(HAVE_CUDA)
     // Internal pool for allocations
-#if 1
+#if 0
     auto allocator = rm.makeAllocator<umpire::strategy::AllocationAdvisor>(
         "internal::samrai::um_allocation_advisor",
         rm.getAllocator(umpire::resource::Unified),
         // Set preferred location to GPU
-        "PREFERRED_LOCATION");
+        "SET_PREFERRED_LOCATION");
 #endif
-    //auto allocator = rm.getAllocator(umpire::resource::Pinned);
+    auto allocator = rm.getAllocator(umpire::resource::Pinned);
 #else
     auto allocator = rm.getAllocator(umpire::resource::Host);
 #endif

diff --git a/source/SAMRAI/tbox/CommGraphWriter.h b/source/SAMRAI/tbox/CommGraphWriter.h
@@ -114,16 +114,6 @@ class CommGraphWriter
       size_t record_number,
       std::ostream& os) const;
 
-private:
-   // Unimplemented copy constructor.
-   CommGraphWriter(
-      const CommGraphWriter& other);
-
-   // Unimplemented assignment operator.
-   CommGraphWriter&
-   operator = (
-      const CommGraphWriter& rhs);
-
    struct Edge {
       Edge():d_value(0.0),
          d_dir(TO),
@@ -135,6 +125,17 @@ class CommGraphWriter
       std::string d_label;
    };
 
+
+private:
+   // Unimplemented copy constructor.
+   CommGraphWriter(
+      const CommGraphWriter& other);
+
+   // Unimplemented assignment operator.
+   CommGraphWriter&
+   operator = (
+      const CommGraphWriter& rhs);
+
    struct NodeValue {
       NodeValue():d_value(0.0) {
       }