From 8506a344c7f10f068c51030b590e2bfa537324fc Mon Sep 17 00:00:00 2001 From: wrtobin Date: Thu, 21 Dec 2023 15:26:37 -0500 Subject: [PATCH 1/2] compress trivially copyable --- host-configs/ORNL/crusher-cce@13.0.1.cmake | 41 ++++++++++++++++++++++ host-configs/ORNL/spock-cce@12.0.3.cmake | 39 ++++++++++++++++++++ src/ArrayOfArraysView.hpp | 40 +++++++++++++++++---- src/bufferManipulation.hpp | 15 ++++++++ 4 files changed, 129 insertions(+), 6 deletions(-) create mode 100644 host-configs/ORNL/crusher-cce@13.0.1.cmake create mode 100644 host-configs/ORNL/spock-cce@12.0.3.cmake diff --git a/host-configs/ORNL/crusher-cce@13.0.1.cmake b/host-configs/ORNL/crusher-cce@13.0.1.cmake new file mode 100644 index 00000000..2a359fd5 --- /dev/null +++ b/host-configs/ORNL/crusher-cce@13.0.1.cmake @@ -0,0 +1,41 @@ +set(CONFIG_NAME "crusher-cce@13.0.1" CACHE PATH "") + +# Set up the tpls +set(GEOSX_TPL_ROOT_DIR "/gpfs/alpine/geo127/world-shared/cray-sles15-zen2/cce-13.0.1" CACHE PATH "") +set(GEOSX_TPL_DIR ${GEOSX_TPL_ROOT_DIR} CACHE PATH "") + +set(BLT_DIR "${GEOSX_TPL_DIR}/blt-0.4.1-3zz2mkf2wvglevvl4ozepe4tzhwtchoa/" CACHE PATH "" ) +set(CAMP_DIR "${GEOSX_TPL_DIR}/camp-0.2.2-3qpdz6h2dzvfm5t7uabpz2ykiheza5b4/" CACHE PATH "" ) + +set(UMPIRE_DIR "${GEOSX_TPL_DIR}/umpire-6.0.0-aeczo5gctizktwwt5x7xlmuyoarwipag/" CACHE PATH "" ) +set(RAJA_DIR "${GEOSX_TPL_DIR}/raja-0.14.0-twro7k3cfsmp7s6mkiugsqncivj6w327/" CACHE PATH "" ) +set(CHAI_DIR "${GEOSX_TPL_DIR}/chai-2.4.0-yubforuougga3ujwwpfz3tmybqhroczp/" CACHE PATH "" ) + +set(METIS_DIR "${GEOSX_TPL_DIR}/metis-5.1.0-zcfkawg5ifqpzcihrc3i6cdrrijusc2p/" CACHE PATH "" ) +set(PARMETIS_DIR "${GEOSX_TPL_DIR}/parmetis-4.0.3-t2amifl5hh7yewre24gn2x3mlrz7qkl5/" CACHE PATH "" ) + +# C++ options +set(CMAKE_C_COMPILER "/opt/cray/pe/craype/2.7.13/bin/cc" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/opt/cray/pe/craype/2.7.13/bin/CC" CACHE PATH "") +set(CMAKE_Fortran_COMPILER "/opt/cray/pe/craype/2.7.13/bin/ftn" CACHE PATH "") + +set(CMAKE_CXX_STANDARD 14 CACHE STRING "") + +set( ENABLE_MPI ON CACHE BOOL "" FORCE ) +set( ENABLE_FIND_MPI ON CACHE BOOL "" FORCE ) + +# HIP Options +set( ENABLE_HIP ON CACHE BOOL "" FORCE ) +set( HIP_ROOT "/opt/rocm-4.5.2" CACHE PATH "" ) +set( HIP_VERSION_STRING "4.5.2" CACHE STRING "" ) +set( CMAKE_HIP_ARCHITECTURES "gfx90a" CACHE STRING "" FORCE ) +set( AMDGPU_TARGETS "${CMAKE_HIP_ARCHITECTURES}" CACHE STRING "" FORCE ) +#set( CMAKE_CXX_FLAGS "-std=c++14 -D__HIP_ARCH_GFX90A__=1" CACHE STRING "" FORCE ) + +set( HIP_HIPCC_INCLUDE_ARGS "$<$:-I/opt/cray/pe/mpich/8.1.12/ofi/crayclang/10.0/include>" CACHE STRING "" FORCE ) +set( HIP_HIPCC_FLAGS "-std=c++14" CACHE STRING "" FORCE ) +set( CMAKE_EXE_LINKER_FLAGS "-L/opt/cray/pe/mpich/8.1.12/ofi/crayclang/10.0/lib -lmpi -L/opt/cray/pe/mpich/8.1.12/gtl/lib -lmpi_gtl_hsa" CACHE STRING "" FORCE ) + +# GTEST options +set(ENABLE_GTEST_DEATH_TESTS OFF CACHE BOOL "") +set(gtest_disable_pthreads ON CACHE BOOL "") diff --git a/host-configs/ORNL/spock-cce@12.0.3.cmake b/host-configs/ORNL/spock-cce@12.0.3.cmake new file mode 100644 index 00000000..f0764c32 --- /dev/null +++ b/host-configs/ORNL/spock-cce@12.0.3.cmake @@ -0,0 +1,39 @@ +set(CONFIG_NAME "spock-cce@12.0.3" CACHE PATH "") + +# Set up the tpls +set(GEOSX_TPL_ROOT_DIR "/gpfs/alpine/geo127/scratch/tobin6/spack/opt/spack/cray-sles15-zen2/cce-12.0.3" CACHE PATH "") +set(GEOSX_TPL_DIR ${GEOSX_TPL_ROOT_DIR} CACHE PATH "") + +set(BLT_DIR "${GEOSX_TPL_DIR}/blt-0.4.1-qpmhf6p7n5sarmks55hgjnzff3ncs7jd/" CACHE PATH "" ) +set(CAMP_DIR "${GEOSX_TPL_DIR}/camp-0.2.2-frggdmwjevbxy4a6kw7ctgrhyv7erfhr/" CACHE PATH "" ) + +set(UMPIRE_DIR "${GEOSX_TPL_DIR}/umpire-6.0.0-nkdetdg5tjyzzf5yjzo32jxwkmwfjjqn/" CACHE PATH "" ) +set(RAJA_DIR "${GEOSX_TPL_DIR}/raja-0.14.0-wun25mr5qf7vo6x2vblhzh2ivs7vr4g6/" CACHE PATH "" ) +set(CHAI_DIR "${GEOSX_TPL_DIR}/chai-2.4.0-a5ponjo23u7smy7w4a4jj7im47shrsxk/" CACHE PATH "" ) + +set(METIS_DIR "/sw/spock/spack-envs/base/opt/cray-sles15-zen2/cce-12.0.3/metis-5.1.0-rbblqiymq6eoursordyaq2ghimzpd22v/" CACHE PATH "" ) +set(PARMETIS_DIR "/sw/spock/spack-envs/base/opt/cray-sles15-zen2/cce-12.0.3/parmetis-4.0.3-mliemgo6vxrahsz4f6u5agdqyfpk2yd2/" CACHE PATH "" ) + +# C++ options +#set(CMAKE_C_COMPILER "/opt/cray/pe/cce/12.0.3/bin/craycc" CACHE PATH "") +#set(CMAKE_CXX_COMPILER "/opt/cray/pe/cce/12.0.3/bin/crayCC" CACHE PATH "") +#set(CMAKE_Fortran_COMPILER "/opt/cray/pe/cce/12.0.3/bin/crayftn" CACHE PATH "") + +set(CMAKE_C_COMPILER "/opt/cray/pe/craype/2.7.11/bin/cc" CACHE PATH "") +set(CMAKE_CXX_COMPILER "/opt/cray/pe/craype/2.7.11/bin/CC" CACHE PATH "") +set(CMAKE_Fortran_COMPILER "/opt/cray/pe/craype/2.7.11/bin/ftn" CACHE PATH "") + +set(CMAKE_CXX_STANDARD 14 CACHE STRING "") + +set( ENABLE_MPI ON CACHE BOOL "" FORCE ) +set( ENABLE_FIND_MPI OFF CACHE BOOL "" FORCE ) + +# HIP Options +set( ENABLE_HIP ON CACHE BOOL "" FORCE ) +set( HIP_ROOT "/opt/rocm-4.2.0" CACHE PATH "" ) +set( HIP_VERSION_STRING "4.2.0" CACHE STRING "" ) +set( CMAKE_HIP_ARCHITECTURES "gfx908" CACHE STRING "" FORCE ) + +# GTEST options +set(ENABLE_GTEST_DEATH_TESTS OFF CACHE BOOL "") +set(gtest_disable_pthreads ON CACHE BOOL "") diff --git a/src/ArrayOfArraysView.hpp b/src/ArrayOfArraysView.hpp index ebaeff82..1bea47e4 100644 --- a/src/ArrayOfArraysView.hpp +++ b/src/ArrayOfArraysView.hpp @@ -680,7 +680,12 @@ class ArrayOfArraysView typeManipulation::forEachArg( [sizeOfNextArray, nextOffset, shiftAmount] ( auto & buffer ) { - arrayManipulation::uninitializedShiftDown( &buffer[ nextOffset ], sizeOfNextArray, shiftAmount ); + // arrayManipulation::uninitializedShiftDown( &buffer[ nextOffset ], sizeOfNextArray, shiftAmount ); + T * const LVARRAY_RESTRICT ptr = &buffer[ nextOffset ]; + for (std::ptrdiff_t j = 0; j < sizeOfNextArray; ++j) + { + ptr[j - shiftAmount] = std::move(ptr[j]); + } }, m_values, buffers ... ); // And update the offsets. @@ -728,11 +733,34 @@ class ArrayOfArraysView { auto const fillOffsets = [&]() { - m_offsets[ 0 ] = 0; - RAJA::inclusive_scan< POLICY >( RAJA::make_span< INDEX_TYPE const * >( capacities, numSubArrays ), - RAJA::make_span< INDEX_TYPE * >( m_offsets.data()+1, numSubArrays ) ); - }; - resizeFromOffsetsImpl( numSubArrays, fillOffsets, buffers ... ); + LVARRAY_ERROR_IF_LT( capacities[ i ], 0 ); + } + #endif + + destroyValues( 0, m_numArrays, buffers ... ); + + bufferManipulation::reserve( m_sizes, m_numArrays, MemorySpace::host, numSubArrays ); + std::fill_n( m_sizes.data(), numSubArrays, 0 ); + + INDEX_TYPE const offsetsSize = ( m_numArrays == 0 ) ? 0 : m_numArrays + 1; + bufferManipulation::reserve( m_offsets, offsetsSize, MemorySpace::host, numSubArrays + 1 ); + + m_offsets[ 0 ] = 0; + // RAJA::inclusive_scan fails on empty input range + if( numSubArrays > 0 ) + { + bufferManipulation::ContainerShim< INDEX_TYPE const > capacitiesShim( capacities, capacities + numSubArrays ); + bufferManipulation::ContainerShim< INDEX_TYPE > offsetsShim( m_offsets.data() + 1, m_offsets.data() + numSubArrays ); + // const_cast needed until for RAJA bug. + RAJA::inclusive_scan< POLICY >( capacitiesShim, offsetsShim ); + } + + m_numArrays = numSubArrays; + INDEX_TYPE const maxOffset = m_offsets[ m_numArrays ]; + typeManipulation::forEachArg( [ maxOffset] ( auto & buffer ) + { + bufferManipulation::reserve( buffer, 0, MemorySpace::host, maxOffset ); + }, m_values, buffers ... ); } ///@} diff --git a/src/bufferManipulation.hpp b/src/bufferManipulation.hpp index 83e5a00e..548cfe2b 100644 --- a/src/bufferManipulation.hpp +++ b/src/bufferManipulation.hpp @@ -69,6 +69,21 @@ namespace bufferManipulation */ HAS_MEMBER_FUNCTION_NO_RTYPE( move, MemorySpace::host, true ); + +template < typename T > +struct ContainerShim +{ + ContainerShim( T * begin, T * end ) + : m_begin( begin ) + , m_end( end ) + {} + T * begin() const { return m_begin; } + T * end() const { return m_end; } + T * m_begin; + T * m_end; +}; + + /** * @class VoidBuffer * @brief This class implements the default behavior for the Buffer methods related From 1760f07f34cf4a869d66ddcf0fb53af0fda29165 Mon Sep 17 00:00:00 2001 From: wrtobin Date: Thu, 21 Dec 2023 15:29:48 -0500 Subject: [PATCH 2/2] merge issue --- src/ArrayOfArraysView.hpp | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/src/ArrayOfArraysView.hpp b/src/ArrayOfArraysView.hpp index 1bea47e4..8b4ba781 100644 --- a/src/ArrayOfArraysView.hpp +++ b/src/ArrayOfArraysView.hpp @@ -733,34 +733,11 @@ class ArrayOfArraysView { auto const fillOffsets = [&]() { - LVARRAY_ERROR_IF_LT( capacities[ i ], 0 ); - } - #endif - - destroyValues( 0, m_numArrays, buffers ... ); - - bufferManipulation::reserve( m_sizes, m_numArrays, MemorySpace::host, numSubArrays ); - std::fill_n( m_sizes.data(), numSubArrays, 0 ); - - INDEX_TYPE const offsetsSize = ( m_numArrays == 0 ) ? 0 : m_numArrays + 1; - bufferManipulation::reserve( m_offsets, offsetsSize, MemorySpace::host, numSubArrays + 1 ); - - m_offsets[ 0 ] = 0; - // RAJA::inclusive_scan fails on empty input range - if( numSubArrays > 0 ) - { - bufferManipulation::ContainerShim< INDEX_TYPE const > capacitiesShim( capacities, capacities + numSubArrays ); - bufferManipulation::ContainerShim< INDEX_TYPE > offsetsShim( m_offsets.data() + 1, m_offsets.data() + numSubArrays ); - // const_cast needed until for RAJA bug. - RAJA::inclusive_scan< POLICY >( capacitiesShim, offsetsShim ); - } - - m_numArrays = numSubArrays; - INDEX_TYPE const maxOffset = m_offsets[ m_numArrays ]; - typeManipulation::forEachArg( [ maxOffset] ( auto & buffer ) - { - bufferManipulation::reserve( buffer, 0, MemorySpace::host, maxOffset ); - }, m_values, buffers ... ); + m_offsets[ 0 ] = 0; + RAJA::inclusive_scan< POLICY >( RAJA::make_span< INDEX_TYPE const * >( capacities, numSubArrays ), + RAJA::make_span< INDEX_TYPE * >( m_offsets.data()+1, numSubArrays ) ); + }; + resizeFromOffsetsImpl( numSubArrays, fillOffsets, buffers ... ); } ///@}