From 0124bbd5d7768e2bab550c7bbebf17e77200b7c0 Mon Sep 17 00:00:00 2001 From: Sergey Lebedev Date: Tue, 7 Nov 2023 12:19:22 +0000 Subject: [PATCH] TOOLS: use separate cuda alloc in perftest --- tools/perf/ucc_pt_coll.cc | 24 ++++++++++++++++++++++++ tools/perf/ucc_pt_cuda.cc | 3 +++ tools/perf/ucc_pt_cuda.h | 30 ++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/tools/perf/ucc_pt_coll.cc b/tools/perf/ucc_pt_coll.cc index a561ea73b4..31e6af22bc 100644 --- a/tools/perf/ucc_pt_coll.cc +++ b/tools/perf/ucc_pt_coll.cc @@ -5,11 +5,29 @@ */ #include "ucc_pt_coll.h" +#include "ucc_pt_cuda.h" ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len, ucc_memory_type_t mem_type) { ucc_status_t status; + int cuda_st; + + if (mem_type == UCC_MEMORY_TYPE_CUDA) { + *h_ptr = new ucc_mc_buffer_header_t; + (*h_ptr)->mt = UCC_MEMORY_TYPE_CUDA; + cuda_st = ucc_pt_cudaMalloc(&((*h_ptr)->addr), len); + if (cuda_st != 0) { + return UCC_ERR_NO_RESOURCE; + } + cuda_st = ucc_pt_cudaMemset((*h_ptr)->addr, 0, len); + if (cuda_st != 0) { + ucc_pt_cudaFree((*h_ptr)->addr); + delete *h_ptr; + return UCC_ERR_NO_RESOURCE; + } + return UCC_OK; + } status = ucc_mc_alloc(h_ptr, len, mem_type); if (status != UCC_OK) { @@ -26,6 +44,12 @@ ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len, ucc_status_t ucc_pt_free(ucc_mc_buffer_header_t *h_ptr) { + if (h_ptr->mt == UCC_MEMORY_TYPE_CUDA) { + ucc_pt_cudaFree(h_ptr->addr); + delete h_ptr; + return UCC_OK; + } + return ucc_mc_free(h_ptr); } diff --git a/tools/perf/ucc_pt_cuda.cc b/tools/perf/ucc_pt_cuda.cc index bcadabc955..f0807f88a9 100644 --- a/tools/perf/ucc_pt_cuda.cc +++ b/tools/perf/ucc_pt_cuda.cc @@ -36,6 +36,9 @@ void ucc_pt_cuda_init(void) LOAD_CUDA_SYM("cudaGetErrorString", getErrorString); LOAD_CUDA_SYM("cudaStreamCreateWithFlags", streamCreateWithFlags); LOAD_CUDA_SYM("cudaStreamDestroy", streamDestroy); + LOAD_CUDA_SYM("cudaMalloc", cudaMalloc); + LOAD_CUDA_SYM("cudaFree", cudaFree); + LOAD_CUDA_SYM("cudaMemset", cudaMemset); ucc_pt_cuda_iface.available = 1; } diff --git a/tools/perf/ucc_pt_cuda.h b/tools/perf/ucc_pt_cuda.h index 1a5844c1bc..05c1fbbbf8 100644 --- a/tools/perf/ucc_pt_cuda.h +++ b/tools/perf/ucc_pt_cuda.h @@ -31,6 +31,9 @@ typedef struct ucc_pt_cuda_iface { int (*streamCreateWithFlags)(cudaStream_t *stream, unsigned int flags); int (*streamDestroy)(cudaStream_t stream); char* (*getErrorString)(int err); + int (*cudaMalloc)(void **devptr, size_t size); + int (*cudaFree)(void *devptr); + int (*cudaMemset)(void *devptr, int value, size_t count); } ucc_pt_cuda_iface_t; extern ucc_pt_cuda_iface_t ucc_pt_cuda_iface; @@ -74,4 +77,31 @@ static inline int ucc_pt_cudaStreamDestroy(cudaStream_t stream) return 0; } +static inline int ucc_pt_cudaMalloc(void **devptr, size_t size) +{ + if (!ucc_pt_cuda_iface.available) { + return 1; + } + CUDA_CHECK(ucc_pt_cuda_iface.cudaMalloc(devptr, size)); + return 0; +} + +static inline int ucc_pt_cudaFree(void *devptr) +{ + if (!ucc_pt_cuda_iface.available) { + return 1; + } + CUDA_CHECK(ucc_pt_cuda_iface.cudaFree(devptr)); + return 0; +} + +static inline int ucc_pt_cudaMemset(void *devptr, int value, size_t count) +{ + if (!ucc_pt_cuda_iface.available) { + return 1; + } + CUDA_CHECK(ucc_pt_cuda_iface.cudaMemset(devptr, value, count)); + return 0; +} + #endif