diff --git a/src/acc/acc_libsmm.h b/src/acc/acc_libsmm.h index 06957d74074..94428d598e5 100644 --- a/src/acc/acc_libsmm.h +++ b/src/acc/acc_libsmm.h @@ -15,11 +15,11 @@ #define DBCSR_TYPE_double dbcsr_type_real_8 #define DBCSR_TYPE_float dbcsr_type_real_4 -#define LIBSMM_ACC_TRANSPOSE_ROUTINE_NAME_STRPTR ((const char**)&libsmm_acc_transpose_routine_name_ptr) +#define LIBSMM_ACC_TRANSPOSE_ROUTINE_NAME_STRPTR ((const char**)((uintptr_t)&libsmm_acc_transpose_routine_name_str)) #define LIBSMM_ACC_TRANSPOSE_ROUTINE_NAME_LENPTR (&libsmm_acc_transpose_routine_name_len) #define LIBSMM_ACC_TRANSPOSE_ROUTINE_NAME_STR (libsmm_acc_transpose_routine_name_str) -#define LIBSMM_ACC_PROCESS_ROUTINE_NAME_STRPTR ((const char**)&libsmm_acc_process_routine_name_ptr) +#define LIBSMM_ACC_PROCESS_ROUTINE_NAME_STRPTR ((const char**)((uintptr_t)&libsmm_acc_process_routine_name_str)) #define LIBSMM_ACC_PROCESS_ROUTINE_NAME_LENPTR (&libsmm_acc_process_routine_name_len) #define LIBSMM_ACC_PROCESS_ROUTINE_NAME_STR (libsmm_acc_process_routine_name_str) @@ -49,11 +49,9 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack, int c_calculate_norms(const double* mat, int nblks, const int* offsets, const int* nelems, float* norms, void* stream_ptr); static const char libsmm_acc_transpose_routine_name_str[] = "jit_kernel_transpose"; -static const char* const libsmm_acc_transpose_routine_name_ptr = libsmm_acc_transpose_routine_name_str; static const int libsmm_acc_transpose_routine_name_len = (int)sizeof(libsmm_acc_transpose_routine_name_str) - 1; static const char libsmm_acc_process_routine_name_str[] = "jit_kernel_multiply"; -static const char* const libsmm_acc_process_routine_name_ptr = libsmm_acc_process_routine_name_str; static const int libsmm_acc_process_routine_name_len = (int)sizeof(libsmm_acc_process_routine_name_str) - 1; #if defined(__cplusplus) diff --git a/src/acc/cuda/Makefile b/src/acc/cuda/Makefile index 2aedadeb979..6f6c66b2369 100644 --- a/src/acc/cuda/Makefile +++ b/src/acc/cuda/Makefile @@ -103,23 +103,15 @@ ifneq (,$(ELEM_TYPE)) DFLAGS += -DELEM_TYPE=$(ELEM_TYPE) endif -ifeq (1,$(INTEL)) - CXX := icpc - CC := icc - AR := xiar -else ifneq (0,$(INTEL)) - CXX := icpx - CC := icx - AR := xiar -else ifneq (0,$(GNU)) - override CXX := g++ - override CC := gcc - ifneq (Darwin,$(UNAME)) - override AR := gcc-ar +ifneq (0,$(INTEL)) + ifneq (1,$(INTEL)) + CXX := icpx + CC := icx else - override AR := ar + CXX := icpc + CC := icc endif - #override LD_LIBRARY_DIRS := $(NULL) + AR := $(if $(call which,xiar),xiar,ar) else CXX := g++ CC := gcc diff --git a/src/acc/opencl/Makefile b/src/acc/opencl/Makefile index 0cbefd97d69..244a7b2692f 100644 --- a/src/acc/opencl/Makefile +++ b/src/acc/opencl/Makefile @@ -73,23 +73,15 @@ ifneq (,$(ELEM_TYPE)) CFLAGS += -DELEM_TYPE=$(ELEM_TYPE) endif -ifeq (1,$(INTEL)) - CXX := icpc - CC := icc - AR := xiar -else ifneq (0,$(INTEL)) - CXX := icpx - CC := icx - AR := xiar -else ifneq (0,$(GNU)) - override CXX := g++ - override CC := gcc - ifneq (Darwin,$(UNAME)) - override AR := gcc-ar +ifneq (0,$(INTEL)) + ifneq (1,$(INTEL)) + CXX := icpx + CC := icx else - override AR := ar + CXX := icpc + CC := icc endif - #override LD_LIBRARY_DIRS := $(NULL) + AR := $(if $(call which,xiar),xiar,ar) else CXX := g++ CC := gcc @@ -109,13 +101,14 @@ ifneq (0,$(DEV)) CFLAGS += -D__DBCSR_ACC CFLAGS += -Wno-deprecated -Werror ifneq (2,$(DEV)) + $(info DEBUG: $(CC) $(CXX)) ifneq (,$(findstring clang,$(CC) $(CXX))) override CC := clang++ --analyze else - CC := $(CXX) -xc++ + override CC := $(CXX) -xc++ endif else - CC := $(CXX) -xc++ + override CC := $(CXX) -xc++ endif $(info CC: $(shell $(CC) --version | head -n1)) OMP := 0 diff --git a/src/acc/opencl/acc_opencl.c b/src/acc/opencl/acc_opencl.c index 57e1f050f0b..cab7e358cfb 100644 --- a/src/acc/opencl/acc_opencl.c +++ b/src/acc/opencl/acc_opencl.c @@ -380,8 +380,8 @@ int c_dbcsr_acc_init(void) { assert(EXIT_SUCCESS == result); # endif if (~(1 + 2) & c_dbcsr_acc_opencl_config.wa) { /* environment is populated before touching the compute runtime */ - static char* key_value[] = {"NEOReadDebugKeys=1", "ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE", "EnableRecoverablePageFaults=0", - "DirectSubmissionOverrideBlitterSupport=0"}; + static char a[] = "NEOReadDebugKeys=1", b[] = "ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE", c[] = "EnableRecoverablePageFaults=0"; + static char d[] = "DirectSubmissionOverrideBlitterSupport=0", *key_value[] = {a, b, c, d}; if (NULL == env_neo) ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[0])); if ((4 & c_dbcsr_acc_opencl_config.wa) && NULL == getenv("ZE_FLAT_DEVICE_HIERARCHY")) { ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[1])); diff --git a/src/acc/opencl/acc_opencl.h b/src/acc/opencl/acc_opencl.h index fadcf20f976..ddd1834051f 100644 --- a/src/acc/opencl/acc_opencl.h +++ b/src/acc/opencl/acc_opencl.h @@ -124,7 +124,7 @@ # define ACC_OPENCL_ACTIVATE 0 #endif /* Use DBCSR's profile for detailed timings */ -#if !defined(ACC_OPENCL_PROFILE) && 0 +#if !defined(ACC_OPENCL_PROFILE) && (defined(__OFFLOAD_PROFILING) || 0) # define ACC_OPENCL_PROFILE #endif @@ -362,7 +362,7 @@ typedef struct c_dbcsr_acc_opencl_config_t { extern c_dbcsr_acc_opencl_config_t c_dbcsr_acc_opencl_config; /** Determines host-pointer registration for modification. */ -c_dbcsr_acc_opencl_info_memptr_t* c_dbcsr_acc_opencl_info_hostptr(void* memory); +c_dbcsr_acc_opencl_info_memptr_t* c_dbcsr_acc_opencl_info_hostptr(const void* memory); /** Determines device-pointer registration for modification (internal). */ c_dbcsr_acc_opencl_info_memptr_t* c_dbcsr_acc_opencl_info_devptr_modify( ACC_OPENCL_LOCKTYPE* lock, void* memory, size_t elsize, const size_t* amount, size_t* offset); diff --git a/src/acc/opencl/acc_opencl_mem.c b/src/acc/opencl/acc_opencl_mem.c index 8a218634057..7d9bd86a4d0 100644 --- a/src/acc/opencl/acc_opencl_mem.c +++ b/src/acc/opencl/acc_opencl_mem.c @@ -60,7 +60,7 @@ void c_dbcsr_acc_opencl_pfree(ACC_OPENCL_LOCKTYPE* lock, const void* pointer, vo } -c_dbcsr_acc_opencl_info_memptr_t* c_dbcsr_acc_opencl_info_hostptr(void* memory) { +c_dbcsr_acc_opencl_info_memptr_t* c_dbcsr_acc_opencl_info_hostptr(const void* memory) { assert(NULL == memory || sizeof(c_dbcsr_acc_opencl_info_memptr_t) <= (uintptr_t)memory); return (NULL != memory ? (c_dbcsr_acc_opencl_info_memptr_t*)((uintptr_t)memory - sizeof(c_dbcsr_acc_opencl_info_memptr_t)) : (c_dbcsr_acc_opencl_info_memptr_t*)NULL); @@ -291,6 +291,7 @@ int c_dbcsr_acc_opencl_memcpy_d2h( int result = EXIT_SUCCESS; # if defined(ACC_OPENCL_MEM_DEVPTR) if (NULL != c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); result = c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL(queue, finish, host_mem, dev_mem, nbytes, 0, NULL, NULL); } else @@ -302,6 +303,7 @@ int c_dbcsr_acc_opencl_memcpy_d2h( int result_sync = EXIT_SUCCESS; # if defined(ACC_OPENCL_MEM_DEVPTR) if (NULL != c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); result_sync = c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL(queue, CL_TRUE, host_mem, dev_mem, nbytes, 0, NULL, NULL); } else @@ -343,6 +345,7 @@ int c_dbcsr_acc_dev_mem_allocate(void** dev_mem, size_t nbytes) { assert(NULL != dev_mem && NULL != context); # if defined(ACC_OPENCL_MEM_DEVPTR) if (NULL != c_dbcsr_acc_opencl_config.device.clDeviceMemAllocINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); *dev_mem = memptr = c_dbcsr_acc_opencl_config.device.clDeviceMemAllocINTEL( context, c_dbcsr_acc_opencl_config.device.id, NULL /*properties*/, nbytes, 0 /*alignment*/, &result); if (EXIT_SUCCESS != result) *dev_mem = NULL; @@ -436,6 +439,7 @@ int c_dbcsr_acc_dev_mem_deallocate(void* dev_mem) { # else assert(NULL != c_dbcsr_acc_opencl_config.device.context); if (NULL != c_dbcsr_acc_opencl_config.device.clMemFreeINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); result = c_dbcsr_acc_opencl_config.device.clMemFreeINTEL(c_dbcsr_acc_opencl_config.device.context, dev_mem); } else { @@ -513,6 +517,7 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v assert(NULL != str && NULL != str->queue); # if defined(ACC_OPENCL_MEM_DEVPTR) if (NULL != c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); result = c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL(str->queue, finish, dev_mem, host_mem, nbytes, 0, NULL, NULL); } else @@ -592,6 +597,7 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt # if defined(ACC_OPENCL_MEM_DEVPTR) assert(NULL != c_dbcsr_acc_opencl_config.device.context); if (NULL != c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); result = c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL( str->queue, CL_FALSE /*blocking*/, devmem_dst, devmem_src, nbytes, 0, NULL, &event); } @@ -642,6 +648,7 @@ int c_dbcsr_acc_opencl_memset(void* dev_mem, int value, size_t offset, size_t nb # if defined(ACC_OPENCL_MEM_DEVPTR) assert(NULL != c_dbcsr_acc_opencl_config.device.context); if (NULL != c_dbcsr_acc_opencl_config.device.clEnqueueMemFillINTEL) { + assert(0 == c_dbcsr_acc_opencl_config.device.unified); result = c_dbcsr_acc_opencl_config.device.clEnqueueMemFillINTEL( str->queue, (char*)dev_mem + offset, &value, size_of_value, nbytes, 0, NULL, &event); }