Skip to content

Commit

Permalink
ACC Setdevice reordering (#595)
Browse files Browse the repository at this point in the history
* Reorder set_device within acc_init
* Exclude cusmm test, see #427
* Adjusted PACKAGE dependency.
  • Loading branch information
alazzaro authored Mar 25, 2022
1 parent 4f13428 commit b8c0255
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/acc/PACKAGE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description": "Generic accelerator API",
"archive": "libdbcsr",
"requires": ["../base", "cuda", "hip", "opencl", "libsmm_acc"]
"requires": ["../base", "../core", "cuda", "hip", "opencl", "libsmm_acc"]
}
14 changes: 13 additions & 1 deletion src/acc/dbcsr_acc_init.F
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ MODULE dbcsr_acc_init
#if defined (__DBCSR_ACC)
USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_PTR, C_NULL_PTR, C_NULL_CHAR, C_ASSOCIATED
#endif
USE dbcsr_acc_device, ONLY: dbcsr_acc_set_active_device
USE dbcsr_config, ONLY: get_accdrv_active_device_id
#include "base/dbcsr_base_uses.f90"

IMPLICIT NONE
Expand Down Expand Up @@ -47,9 +49,15 @@ SUBROUTINE acc_init()
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
INTEGER :: istat
! Set active device first
CALL dbcsr_acc_set_active_device(get_accdrv_active_device_id())
!$OMP PARALLEL DEFAULT(NONE) PRIVATE(istat)
!$OMP MASTER
istat = acc_interface_drv_init()
IF (istat /= 0) &
DBCSR_ABORT("acc_init failed")
!$OMP END MASTER
!$OMP END PARALLEL
#endif
END SUBROUTINE acc_init

Expand All @@ -58,10 +66,14 @@ SUBROUTINE acc_finalize()
#if ! defined (__DBCSR_ACC)
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
INTEGER :: istat
INTEGER :: istat
!$OMP PARALLEL DEFAULT(NONE) PRIVATE(istat)
!$OMP MASTER
istat = acc_interface_drv_finalize()
IF (istat /= 0) &
DBCSR_ABORT("acc_finalize failed")
!$OMP END MASTER
!$OMP END PARALLEL
#endif
END SUBROUTINE acc_finalize

Expand Down
31 changes: 10 additions & 21 deletions src/core/dbcsr_lib.F
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ MODULE dbcsr_lib

!! Routines that affect the DBCSR library as a whole
USE dbcsr_acc_init, ONLY: acc_finalize, acc_init
USE dbcsr_acc_device, ONLY: dbcsr_acc_get_ndevices, dbcsr_acc_set_active_device
USE dbcsr_config, ONLY: get_accdrv_active_device_id, &
set_accdrv_active_device_id, &
USE dbcsr_acc_device, ONLY: dbcsr_acc_get_ndevices
USE dbcsr_config, ONLY: set_accdrv_active_device_id, &
reset_accdrv_active_device_id, &
dbcsr_set_config, &
has_acc
Expand Down Expand Up @@ -204,24 +203,18 @@ SUBROUTINE dbcsr_init_lib_pre(mp_comm, io_unit, accdrv_active_device_id)

! Initialize Acc and set active device
IF (has_acc) THEN
!$OMP PARALLEL
!$OMP MASTER
CALL acc_init()
!$OMP END MASTER
!$OMP END PARALLEL
IF (dbcsr_acc_get_ndevices() > 0) THEN
IF (PRESENT(accdrv_active_device_id)) THEN
CALL set_accdrv_active_device_id(accdrv_active_device_id)
ELSE
! Use round-robin assignment per rank
CALL set_accdrv_active_device_id(MOD(mynode, dbcsr_acc_get_ndevices()))
END IF
IF (PRESENT(accdrv_active_device_id)) THEN
CALL set_accdrv_active_device_id(accdrv_active_device_id)
ELSEIF (dbcsr_acc_get_ndevices() > 0) THEN
! Use round-robin assignment per rank
CALL set_accdrv_active_device_id(MOD(mynode, dbcsr_acc_get_ndevices()))
ELSE
DBCSR_ABORT("dbcsr_init_lib: No recongnized GPU devices")
END IF
CALL acc_init()
END IF

#if defined(__DBCSR_ACC)
CALL dbcsr_acc_set_active_device(get_accdrv_active_device_id())

! Checks related to DBCSR's GPU backend: check consistency in threading level
libsmm_acc_thread_safe = libsmm_acc_is_thread_safe() ! 0: not threaded, 1: threaded
dbcsr_thread_safe = 0 ! not threaded
Expand Down Expand Up @@ -310,11 +303,7 @@ SUBROUTINE dbcsr_finalize_lib()
! Reset Acc ID
CALL reset_accdrv_active_device_id()
IF (has_acc) THEN
!$OMP PARALLEL
!$OMP MASTER
CALL acc_finalize()
!$OMP END MASTER
!$OMP END PARALLEL
END IF
! Check the number of communicators
Expand Down
16 changes: 8 additions & 8 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,14 @@ if (USE_ACCEL MATCHES "cuda|hip")
$<$<STREQUAL:${USE_ACCEL},cuda>:CUDA::cuda_driver> libsmm_acc)
endforeach ()

add_test(NAME libsmm_acc_unittest_multiply
COMMAND libsmm_acc_unittest_multiply)
add_test(NAME libsmm_acc_unittest_transpose
COMMAND libsmm_acc_unittest_transpose)
add_test(NAME libsmm_acc_timer_multiply-autotuned
COMMAND libsmm_acc_timer_multiply autotuned)
add_test(NAME libsmm_acc_timer_multiply-predicted
COMMAND libsmm_acc_timer_multiply predicted)
# Comment for the moment, they are not parallelized, very slow... Check issue
# https://github.com/cp2k/dbcsr/issues/427 add_test(NAME
# libsmm_acc_unittest_multiply COMMAND libsmm_acc_unittest_multiply)
# add_test(NAME libsmm_acc_unittest_transpose COMMAND
# libsmm_acc_unittest_transpose) add_test(NAME
# libsmm_acc_timer_multiply-autotuned COMMAND libsmm_acc_timer_multiply
# autotuned) add_test(NAME libsmm_acc_timer_multiply-predicted COMMAND
# libsmm_acc_timer_multiply predicted)

endif ()

Expand Down

0 comments on commit b8c0255

Please sign in to comment.