Skip to content

Commit

Permalink
Require BML_USE_POSIX_MEMALIGN to use posix_memalign()
Browse files Browse the repository at this point in the history
Using the HAVE_POSIX_MEMALIGN code path results in ~1.7x slowdown of an MD code which frequently allocates arrays.

Require a new macro BML_USE_POSIX_MEMALIGN to be defined, to use the code path. Define by default
  • Loading branch information
mewall authored and nicolasbock committed Jul 2, 2024
1 parent 6a24bfc commit 0df37a8
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 5 deletions.
11 changes: 8 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -671,9 +671,14 @@ endif()

add_definitions(-D_POSIX_C_SOURCE=200112L)

check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN)
if(HAVE_POSIX_MEMALIGN)
add_definitions(-DHAVE_POSIX_MEMALIGN)
option(BML_POSIX_MEMALIGN "Use posix_memalign() for allocations if available" ON)

if(BML_POSIX_MEMALIGN)
check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN)
if(HAVE_POSIX_MEMALIGN)
add_definitions(-DBML_USE_POSIX_MEMALIGN)
message(STATUS "Will use posix_memalign() for allocations")
endif()
endif()

set(INTEL_OPT FALSE
Expand Down
3 changes: 3 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ EOF
echo "EXTRA_FFLAGS Extra fortran flags (default is '${EXTRA_FFLAGS}')"
echo "EXTRA_LINK_FLAGS Add extra link flags (default is '${EXTRA_LINK_FLAGS}')"
echo "BML_OMP_OFFLOAD {yes,no} (default is ${BML_OMP_OFFLOAD})"
echo "BML_POSIX_MEMALIGN Use if available {yes,no} (default is ${BML_POSIX_MEMALIGN})"
echo "BML_OFFLOAD_ARCH {NVIDIA, AMD} (default is ${BML_OFFLOAD_ARCH})"
echo "GPU_ARCH GPU architecture (default is '${GPU_ARCH}')"
echo "BML_CUDA Build with CUDA (default is ${BML_CUDA})"
Expand Down Expand Up @@ -139,6 +140,7 @@ set_defaults() {
: ${FORTRAN_FLAGS:=}
: ${EXTRA_LINK_FLAGS:=}
: ${BML_OMP_OFFLOAD:=no}
: ${BML_POSIX_MEMALIGN:=yes}
: ${BML_OFFLOAD_ARCH:=NVIDIA}
: ${GPU_ARCH:=}
: ${BML_CUDA:=no}
Expand Down Expand Up @@ -232,6 +234,7 @@ configure() {
-DSCALAPACK_LIBRARIES="${SCALAPACK_LIBRARIES}" \
-DBML_ELPA="${BML_ELPA}" \
-DBML_OPENMP="${BML_OPENMP}" \
-DBML_POSIX_MEMALIGN="${BML_POSIX_MEMALIGN}" \
-DMKL_GPU="${MKL_GPU}" \
-DBML_MPI="${BML_MPI}" \
-DBML_MPIEXEC_EXECUTABLE="${BML_MPIEXEC_EXECUTABLE}" \
Expand Down
4 changes: 2 additions & 2 deletions src/C-interface/bml_allocate.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ bml_allocate_memory(
__assume_aligned(ptr, MALLOC_ALIGNMENT);
ptr[i] = 0;
}
#elif defined(HAVE_POSIX_MEMALIGN)
#elif defined(BML_USE_POSIX_MEMALIGN)
char *ptr;
posix_memalign((void **) &ptr, MALLOC_ALIGNMENT, size);
#pragma omp simd
Expand Down Expand Up @@ -86,7 +86,7 @@ bml_noinit_allocate_memory(
{
#if defined(INTEL_OPT)
void *ptr = _mm_malloc(size, MALLOC_ALIGNMENT);
#elif defined(HAVE_POSIX_MEMALIGN)
#elif defined(BML_USE_POSIX_MEMALIGN)
void *ptr;
posix_memalign(&ptr, MALLOC_ALIGNMENT, size);
#else
Expand Down

0 comments on commit 0df37a8

Please sign in to comment.