From c319e2572def875fb6cc297c83d3bba8807a4cad Mon Sep 17 00:00:00 2001 From: Sergey Lebedev Date: Thu, 16 Mar 2023 11:51:51 +0400 Subject: [PATCH] MC: add memset function (#740) --- .azure/azure-pipelines-pr.yml | 2 +- src/components/mc/base/ucc_mc_base.h | 3 ++- src/components/mc/cpu/mc_cpu.c | 9 ++++++- src/components/mc/cuda/mc_cuda.c | 31 +++++++++++++++++++++-- src/components/mc/rocm/mc_rocm.c | 27 ++++++++++++++++++++ src/components/mc/ucc_mc.c | 7 +++++ src/components/mc/ucc_mc.h | 4 ++- tools/perf/ucc_pt_coll.cc | 29 +++++++++++++++++++++ tools/perf/ucc_pt_coll.h | 7 ++++- tools/perf/ucc_pt_coll_allgather.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_allgatherv.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_allreduce.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_alltoall.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_alltoallv.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_bcast.cc | 10 ++++++-- tools/perf/ucc_pt_coll_gather.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_gatherv.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_reduce.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_reduce_scatter.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_reduce_scatterv.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_scatter.cc | 16 ++++++++---- tools/perf/ucc_pt_coll_scatterv.cc | 16 ++++++++---- tools/perf/ucc_pt_op_memcpy.cc | 16 ++++++++---- tools/perf/ucc_pt_op_reduce.cc | 16 ++++++++---- tools/perf/ucc_pt_op_reduce_strided.cc | 16 ++++++++---- 25 files changed, 285 insertions(+), 84 deletions(-) diff --git a/.azure/azure-pipelines-pr.yml b/.azure/azure-pipelines-pr.yml index ab09b644f6..cd708e3fc5 100644 --- a/.azure/azure-pipelines-pr.yml +++ b/.azure/azure-pipelines-pr.yml @@ -101,4 +101,4 @@ stages: cd build make gtest displayName: Launch Gtest - timeoutInMinutes: 40 + timeoutInMinutes: 120 diff --git a/src/components/mc/base/ucc_mc_base.h b/src/components/mc/base/ucc_mc_base.h index beb1d1c889..0f2a8c31bf 100644 --- a/src/components/mc/base/ucc_mc_base.h +++ b/src/components/mc/base/ucc_mc_base.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -105,6 +105,7 @@ typedef struct ucc_mc_ops { ucc_status_t (*memcpy)(void *dst, const void *src, size_t len, ucc_memory_type_t dst_mem, ucc_memory_type_t src_mem); + ucc_status_t (*memset)(void *dst, int value, size_t len); ucc_status_t (*flush)(); } ucc_mc_ops_t; diff --git a/src/components/mc/cpu/mc_cpu.c b/src/components/mc/cpu/mc_cpu.c index 629bcd7094..c7f431efb0 100644 --- a/src/components/mc/cpu/mc_cpu.c +++ b/src/components/mc/cpu/mc_cpu.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -167,6 +167,12 @@ static ucc_status_t ucc_mc_cpu_memcpy(void *dst, const void *src, size_t len, return UCC_OK; } +static ucc_status_t ucc_mc_cpu_memset(void *ptr, int value, size_t len) +{ + memset(ptr, value, len); + return UCC_OK; +} + static ucc_status_t ucc_mc_cpu_mem_query(const void *ptr, //NOLINT ucc_mem_attr_t *mem_attr) //NOLINT { @@ -199,6 +205,7 @@ ucc_mc_cpu_t ucc_mc_cpu = { .super.ops.mem_alloc = ucc_mc_cpu_mem_pool_alloc_with_init, .super.ops.mem_free = ucc_mc_cpu_mem_pool_free, .super.ops.memcpy = ucc_mc_cpu_memcpy, + .super.ops.memset = ucc_mc_cpu_memset, .super.ops.flush = NULL, .super.config_table = { diff --git a/src/components/mc/cuda/mc_cuda.c b/src/components/mc/cuda/mc_cuda.c index 3954e21f04..97f4ef2276 100644 --- a/src/components/mc/cuda/mc_cuda.c +++ b/src/components/mc/cuda/mc_cuda.c @@ -277,7 +277,7 @@ static ucc_status_t ucc_mc_cuda_memcpy(void *dst, const void *src, size_t len, ucc_memory_type_t dst_mem, ucc_memory_type_t src_mem) { - cudaError_t st; + cudaError_t st; ucc_assert(dst_mem == UCC_MEMORY_TYPE_CUDA || src_mem == UCC_MEMORY_TYPE_CUDA); @@ -286,7 +286,7 @@ static ucc_status_t ucc_mc_cuda_memcpy(void *dst, const void *src, size_t len, if (ucc_unlikely(st != cudaSuccess)) { cudaGetLastError(); mc_error(&ucc_mc_cuda.super, - "failed to launch cudaMemcpyAsync, dst %p, src %p, len %zd " + "failed to launch cudaMemcpyAsync, dst %p, src %p, len %zd " "cuda error %d(%s)", dst, src, len, st, cudaGetErrorString(st)); return UCC_ERR_NO_MESSAGE; @@ -303,6 +303,32 @@ static ucc_status_t ucc_mc_cuda_memcpy(void *dst, const void *src, size_t len, return UCC_OK; } +static ucc_status_t ucc_mc_cuda_memset(void *ptr, int val, size_t len) +{ + cudaError_t st; + + UCC_MC_CUDA_INIT_STREAM(); + st = cudaMemsetAsync(ptr, val, len, ucc_mc_cuda.stream); + if (ucc_unlikely(st != cudaSuccess)) { + cudaGetLastError(); + mc_error(&ucc_mc_cuda.super, + "failed to launch cudaMemsetAsync, dst %p, len %zd " + "cuda error %d(%s)", + ptr, len, st, cudaGetErrorString(st)); + return UCC_ERR_NO_MESSAGE; + } + st = cudaStreamSynchronize(ucc_mc_cuda.stream); + if (ucc_unlikely(st != cudaSuccess)) { + cudaGetLastError(); + mc_error(&ucc_mc_cuda.super, + "failed to synchronize mc_cuda.stream " + "cuda error %d(%s)", + st, cudaGetErrorString(st)); + return UCC_ERR_NO_MESSAGE; + } + return UCC_OK; +} + static ucc_status_t ucc_mc_cuda_mem_query(const void *ptr, ucc_mem_attr_t *mem_attr) { @@ -399,6 +425,7 @@ ucc_mc_cuda_t ucc_mc_cuda = { .super.ops.mem_alloc = ucc_mc_cuda_mem_pool_alloc_with_init, .super.ops.mem_free = ucc_mc_cuda_mem_pool_free, .super.ops.memcpy = ucc_mc_cuda_memcpy, + .super.ops.memset = ucc_mc_cuda_memset, .super.ops.flush = ucc_mc_cuda_flush_not_supported, .super.config_table = { diff --git a/src/components/mc/rocm/mc_rocm.c b/src/components/mc/rocm/mc_rocm.c index 3c377cdcbb..29df74de5b 100644 --- a/src/components/mc/rocm/mc_rocm.c +++ b/src/components/mc/rocm/mc_rocm.c @@ -251,6 +251,32 @@ static ucc_status_t ucc_mc_rocm_memcpy(void *dst, const void *src, size_t len, return UCC_OK; } +static ucc_status_t ucc_mc_rocm_memset(void *ptr, int val, size_t len) +{ + hipError_t st; + + UCC_MC_ROCM_INIT_STREAM(); + st = hipMemsetAsync(ptr, val, len, ucc_mc_rocm.stream); + if (ucc_unlikely(st != hipSuccess)) { + hipGetLastError(); + mc_error(&ucc_mc_rocm.super, + "failed to launch hipMemsetAsync, dst %p, len %zd " + "hip error %d(%s)", + ptr, len, st, hipGetErrorString(st)); + return hip_error_to_ucc_status(st); + } + st = hipStreamSynchronize(ucc_mc_rocm.stream); + if (ucc_unlikely(st != hipSuccess)) { + hipGetLastError(); + mc_error(&ucc_mc_rocm.super, + "failed to synchronize mc_rocm.stream " + "hip error %d(%s)", + st, hipGetErrorString(st)); + return hip_error_to_ucc_status(st); + } + return UCC_OK; +} + static ucc_status_t ucc_mc_rocm_mem_query(const void *ptr, ucc_mem_attr_t *mem_attr) { @@ -330,6 +356,7 @@ ucc_mc_rocm_t ucc_mc_rocm = { .super.ops.mem_alloc = ucc_mc_rocm_mem_pool_alloc_with_init, .super.ops.mem_free = ucc_mc_rocm_mem_pool_free, .super.ops.memcpy = ucc_mc_rocm_memcpy, + .super.ops.memset = ucc_mc_rocm_memset, .super.config_table = { .name = "ROCM memory component", diff --git a/src/components/mc/ucc_mc.c b/src/components/mc/ucc_mc.c index 0393dbb886..9dbfc81c1d 100644 --- a/src/components/mc/ucc_mc.c +++ b/src/components/mc/ucc_mc.c @@ -155,6 +155,13 @@ UCC_MC_PROFILE_FUNC(ucc_status_t, ucc_mc_memcpy, return mc_ops[mt]->memcpy(dst, src, len, dst_mem, src_mem); } +ucc_status_t ucc_mc_memset(void *ptr, int value, size_t size, + ucc_memory_type_t mem_type) +{ + UCC_CHECK_MC_AVAILABLE(mem_type); + return mc_ops[mem_type]->memset(ptr, value, size); +} + ucc_status_t ucc_mc_flush(ucc_memory_type_t mem_type) { UCC_CHECK_MC_AVAILABLE(mem_type); diff --git a/src/components/mc/ucc_mc.h b/src/components/mc/ucc_mc.h index 599a4df000..e0ce1030c8 100644 --- a/src/components/mc/ucc_mc.h +++ b/src/components/mc/ucc_mc.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * See file LICENSE for terms. */ @@ -35,5 +35,7 @@ ucc_status_t ucc_mc_memcpy(void *dst, const void *src, size_t len, ucc_memory_type_t dst_mem, ucc_memory_type_t src_mem); +ucc_status_t ucc_mc_memset(void *ptr, int value, size_t size, + ucc_memory_type_t mem_type); #endif diff --git a/tools/perf/ucc_pt_coll.cc b/tools/perf/ucc_pt_coll.cc index 25877d5a2b..a561ea73b4 100644 --- a/tools/perf/ucc_pt_coll.cc +++ b/tools/perf/ucc_pt_coll.cc @@ -1,5 +1,34 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" +ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len, + ucc_memory_type_t mem_type) +{ + ucc_status_t status; + + status = ucc_mc_alloc(h_ptr, len, mem_type); + if (status != UCC_OK) { + return status; + } + + status = ucc_mc_memset((*h_ptr)->addr, 0, len, mem_type); + if (status != UCC_OK) { + ucc_mc_free(*h_ptr); + return status; + } + return UCC_OK; +} + +ucc_status_t ucc_pt_free(ucc_mc_buffer_header_t *h_ptr) +{ + return ucc_mc_free(h_ptr); +} + bool ucc_pt_coll::has_reduction() { return has_reduction_; diff --git a/tools/perf/ucc_pt_coll.h b/tools/perf/ucc_pt_coll.h index 60ce24d635..cec5c7e18b 100644 --- a/tools/perf/ucc_pt_coll.h +++ b/tools/perf/ucc_pt_coll.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -14,6 +14,11 @@ extern "C" { #include } +ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len, + ucc_memory_type_t mem_type); + +ucc_status_t ucc_pt_free(ucc_mc_buffer_header_t *h_ptr); + typedef union { ucc_coll_args_t coll_args; ucc_ee_executor_task_args_t executor_args; diff --git a/tools/perf/ucc_pt_coll_allgather.cc b/tools/perf/ucc_pt_coll_allgather.cc index f294c411ed..c59871b050 100644 --- a/tools/perf/ucc_pt_coll_allgather.cc +++ b/tools/perf/ucc_pt_coll_allgather.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -37,19 +43,19 @@ ucc_status_t ucc_pt_coll_allgather::init_args(size_t single_rank_count, args = coll_args; args.dst.info.count = single_rank_count * comm->get_size(); - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info.mem_type), exit, st); args.dst.info.buffer = dst_header->addr; if (!UCC_IS_INPLACE(args)) { args.src.info.count = single_rank_count; UCCCHECK_GOTO( - ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type), + ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type), free_dst, st); args.src.info.buffer = src_header->addr; } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -70,7 +76,7 @@ void ucc_pt_coll_allgather::free_args(ucc_pt_test_args_t &test_args) ucc_coll_args_t &args = test_args.coll_args; if (!UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } diff --git a/tools/perf/ucc_pt_coll_allgatherv.cc b/tools/perf/ucc_pt_coll_allgatherv.cc index e6fc40bbc4..cee75c6597 100644 --- a/tools/perf/ucc_pt_coll_allgatherv.cc +++ b/tools/perf/ucc_pt_coll_allgatherv.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -40,13 +46,13 @@ ucc_status_t ucc_pt_coll_allgatherv::init_args(size_t count, UCC_MALLOC_CHECK_GOTO(args.dst.info_v.counts, exit, st); args.dst.info_v.displacements = (ucc_aint_t *) ucc_malloc(comm_size * sizeof(uint32_t), "displacements buf"); UCC_MALLOC_CHECK_GOTO(args.dst.info_v.displacements, free_count, st); - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info_v.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info_v.mem_type), free_displ, st); args.dst.info_v.buffer = dst_header->addr; if (!UCC_IS_INPLACE(args)) { args.src.info.count = count; UCCCHECK_GOTO( - ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type), + ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type), free_dst, st); args.src.info.buffer = src_header->addr; } @@ -56,7 +62,7 @@ ucc_status_t ucc_pt_coll_allgatherv::init_args(size_t count, } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); free_displ: ucc_free(args.dst.info_v.displacements); free_count: @@ -70,9 +76,9 @@ void ucc_pt_coll_allgatherv::free_args(ucc_pt_test_args_t &test_args) ucc_coll_args_t &args = test_args.coll_args; if (!UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); ucc_free(args.dst.info_v.counts); ucc_free(args.dst.info_v.displacements); } diff --git a/tools/perf/ucc_pt_coll_allreduce.cc b/tools/perf/ucc_pt_coll_allreduce.cc index 4d206012f5..007e8f2b47 100644 --- a/tools/perf/ucc_pt_coll_allreduce.cc +++ b/tools/perf/ucc_pt_coll_allreduce.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -38,17 +44,17 @@ ucc_status_t ucc_pt_coll_allreduce::init_args(size_t count, args = coll_args; args.src.info.count = count; args.dst.info.count = count; - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, args.dst.info.mem_type), exit, + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, args.dst.info.mem_type), exit, st); args.dst.info.buffer = dst_header->addr; if (!UCC_IS_INPLACE(args)) { - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info.mem_type), free_dst, st); args.src.info.buffer = src_header->addr; } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -58,9 +64,9 @@ void ucc_pt_coll_allreduce::free_args(ucc_pt_test_args_t &test_args) ucc_coll_args_t &args = test_args.coll_args; if (!UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } float ucc_pt_coll_allreduce::get_bw(float time_ms, int grsize, diff --git a/tools/perf/ucc_pt_coll_alltoall.cc b/tools/perf/ucc_pt_coll_alltoall.cc index cf651df7a0..22a7f2e7e9 100644 --- a/tools/perf/ucc_pt_coll_alltoall.cc +++ b/tools/perf/ucc_pt_coll_alltoall.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -36,18 +42,18 @@ ucc_status_t ucc_pt_coll_alltoall::init_args(size_t single_rank_count, args = coll_args; args.dst.info.count = single_rank_count * comm_size; - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, args.dst.info.mem_type), exit, + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, args.dst.info.mem_type), exit, st); args.dst.info.buffer = dst_header->addr; if (!UCC_IS_INPLACE(args)) { args.src.info.count = single_rank_count * comm_size; - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info.mem_type), free_dst, st); args.src.info.buffer = src_header->addr; } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -57,9 +63,9 @@ void ucc_pt_coll_alltoall::free_args(ucc_pt_test_args_t &test_args) ucc_coll_args_t &args = test_args.coll_args; if (!UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } float ucc_pt_coll_alltoall::get_bw(float time_ms, int grsize, diff --git a/tools/perf/ucc_pt_coll_alltoallv.cc b/tools/perf/ucc_pt_coll_alltoallv.cc index 3492380839..785d5b286a 100644 --- a/tools/perf/ucc_pt_coll_alltoallv.cc +++ b/tools/perf/ucc_pt_coll_alltoallv.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -44,11 +50,11 @@ ucc_status_t ucc_pt_coll_alltoallv::init_args(size_t count, UCC_MALLOC_CHECK_GOTO(args.dst.info_v.counts, free_src_displ, st); args.dst.info_v.displacements = (ucc_aint_t *) ucc_malloc(comm_size * sizeof(uint32_t), "displacements buf"); UCC_MALLOC_CHECK_GOTO(args.dst.info_v.displacements, free_dst_count, st); - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, args.dst.info_v.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, args.dst.info_v.mem_type), free_dst_displ, st); args.dst.info_v.buffer = dst_header->addr; if (!UCC_IS_INPLACE(args)) { - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info_v.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info_v.mem_type), free_dst, st); args.src.info_v.buffer = src_header->addr; } @@ -60,7 +66,7 @@ ucc_status_t ucc_pt_coll_alltoallv::init_args(size_t count, } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); free_dst_displ: ucc_free(args.dst.info_v.displacements); free_dst_count: @@ -78,9 +84,9 @@ void ucc_pt_coll_alltoallv::free_args(ucc_pt_test_args_t &test_args) ucc_coll_args_t &args = test_args.coll_args; if (!UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); ucc_free(args.dst.info_v.counts); ucc_free(args.dst.info_v.displacements); ucc_free(args.src.info_v.counts); diff --git a/tools/perf/ucc_pt_coll_bcast.cc b/tools/perf/ucc_pt_coll_bcast.cc index a239b25d31..45613c8237 100644 --- a/tools/perf/ucc_pt_coll_bcast.cc +++ b/tools/perf/ucc_pt_coll_bcast.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -29,7 +35,7 @@ ucc_status_t ucc_pt_coll_bcast::init_args(size_t count, args = coll_args; args.src.info.count = count; - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info.mem_type), exit, + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info.mem_type), exit, st); args.src.info.buffer = src_header->addr; exit: @@ -38,7 +44,7 @@ ucc_status_t ucc_pt_coll_bcast::init_args(size_t count, void ucc_pt_coll_bcast::free_args(ucc_pt_test_args_t &test_args) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } float ucc_pt_coll_bcast::get_bw(float time_ms, int grsize, diff --git a/tools/perf/ucc_pt_coll_gather.cc b/tools/perf/ucc_pt_coll_gather.cc index b650f6bcab..08cbcf510f 100644 --- a/tools/perf/ucc_pt_coll_gather.cc +++ b/tools/perf/ucc_pt_coll_gather.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -41,21 +47,21 @@ ucc_status_t ucc_pt_coll_gather::init_args(size_t single_rank_count, args.src.info.count = single_rank_count; is_root = (comm->get_rank() == args.root); if (is_root) { - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info.mem_type), exit, st_dst); args.dst.info.buffer = dst_header->addr; } if (!is_root || !UCC_IS_INPLACE(args)) { UCCCHECK_GOTO( - ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type), + ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type), free_dst, st_src); args.src.info.buffer = src_header->addr; } return UCC_OK; free_dst: if (is_root && st_dst == UCC_OK) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } return st_src; exit: @@ -79,9 +85,9 @@ void ucc_pt_coll_gather::free_args(ucc_pt_test_args_t &test_args) bool is_root = (comm->get_rank() == args.root); if (!is_root || !UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } if (is_root) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } } diff --git a/tools/perf/ucc_pt_coll_gatherv.cc b/tools/perf/ucc_pt_coll_gatherv.cc index 875c08f9aa..fb80925e27 100644 --- a/tools/perf/ucc_pt_coll_gatherv.cc +++ b/tools/perf/ucc_pt_coll_gatherv.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -46,7 +52,7 @@ ucc_status_t ucc_pt_coll_gatherv::init_args(size_t count, args.dst.info_v.displacements = (ucc_aint_t *) ucc_malloc(comm_size * sizeof(uint32_t), "displacements buf"); UCC_MALLOC_CHECK_GOTO(args.dst.info_v.displacements, free_count, st); - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info_v.mem_type), free_displ, st); args.dst.info_v.buffer = dst_header->addr; for (int i = 0; i < comm->get_size(); i++) { @@ -57,7 +63,7 @@ ucc_status_t ucc_pt_coll_gatherv::init_args(size_t count, if (!is_root || !UCC_IS_INPLACE(args)) { args.src.info.count = count; - st = ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type); + st = ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type); if (UCC_OK != st) { std::cerr << "UCC perftest error: " << ucc_status_string(st) << " in " << STR(_call) << "\n"; @@ -71,7 +77,7 @@ ucc_status_t ucc_pt_coll_gatherv::init_args(size_t count, } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); free_displ: ucc_free(args.dst.info_v.displacements); free_count: @@ -86,10 +92,10 @@ void ucc_pt_coll_gatherv::free_args(ucc_pt_test_args_t &test_args) bool is_root = (comm->get_rank() == args.root); if (!is_root || !UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } if (is_root) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); ucc_free(args.dst.info_v.displacements); ucc_free(args.dst.info_v.counts); } diff --git a/tools/perf/ucc_pt_coll_reduce.cc b/tools/perf/ucc_pt_coll_reduce.cc index 8616334e2f..5d42aa14b2 100644 --- a/tools/perf/ucc_pt_coll_reduce.cc +++ b/tools/perf/ucc_pt_coll_reduce.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -41,19 +47,19 @@ ucc_status_t ucc_pt_coll_reduce::init_args(size_t count, args.dst.info.count = count; bool is_root = (comm->get_rank() == args.root); if (is_root) { - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, args.dst.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, args.dst.info.mem_type), exit, st_dst); args.dst.info.buffer = dst_header->addr; } if (!is_root || !UCC_IS_INPLACE(args)) { - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info.mem_type), free_dst, st_src); args.src.info.buffer = src_header->addr; } return UCC_OK; free_dst: if (is_root && st_dst == UCC_OK) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } return st_src; exit: @@ -66,10 +72,10 @@ void ucc_pt_coll_reduce::free_args(ucc_pt_test_args_t &test_args) bool is_root = (comm->get_rank() == args.root); if (!is_root || !UCC_IS_INPLACE(args)) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } if (is_root) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } } diff --git a/tools/perf/ucc_pt_coll_reduce_scatter.cc b/tools/perf/ucc_pt_coll_reduce_scatter.cc index 398a3350cc..03dd9c557d 100644 --- a/tools/perf/ucc_pt_coll_reduce_scatter.cc +++ b/tools/perf/ucc_pt_coll_reduce_scatter.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -47,18 +53,18 @@ ucc_status_t ucc_pt_coll_reduce_scatter::init_args(size_t count, } size = args.dst.info.count * ucc_dt_size(args.dst.info.datatype); - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, args.dst.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, args.dst.info.mem_type), exit, st); args.dst.info.buffer = dst_header->addr; if (args.src.info.count != 0) { size = args.src.info.count * ucc_dt_size(args.src.info.datatype); - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info.mem_type), free_dst, st); args.src.info.buffer = src_header->addr; } return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -66,11 +72,11 @@ ucc_status_t ucc_pt_coll_reduce_scatter::init_args(size_t count, void ucc_pt_coll_reduce_scatter::free_args(ucc_pt_test_args_t &test_args) { if (dst_header) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); dst_header = nullptr; } if (src_header) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); src_header = nullptr; } } diff --git a/tools/perf/ucc_pt_coll_reduce_scatterv.cc b/tools/perf/ucc_pt_coll_reduce_scatterv.cc index 68fb66cd2c..71b97a5e39 100644 --- a/tools/perf/ucc_pt_coll_reduce_scatterv.cc +++ b/tools/perf/ucc_pt_coll_reduce_scatterv.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -59,13 +65,13 @@ ucc_status_t ucc_pt_coll_reduce_scatterv::init_args(size_t count, displs = (ucc_aint_t*)ucc_malloc(tsize * sizeof(uint32_t), "displ buf"); UCC_MALLOC_CHECK_GOTO(displs, free_counts, st); - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info_v.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info_v.mem_type), free_displs, st); args.dst.info_v.buffer = dst_header->addr; if (!UCC_IS_INPLACE(args)) { args.src.info.count = count * tsize; UCCCHECK_GOTO( - ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type), + ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type), free_dst, st); args.src.info.buffer = src_header->addr; } @@ -81,7 +87,7 @@ ucc_status_t ucc_pt_coll_reduce_scatterv::init_args(size_t count, return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); free_displs: ucc_free(displs); free_counts: @@ -105,11 +111,11 @@ void ucc_pt_coll_reduce_scatterv::free_args(ucc_pt_test_args_t &test_args) } if (dst_header) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); dst_header = nullptr; } if (src_header) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); src_header = nullptr; } } diff --git a/tools/perf/ucc_pt_coll_scatter.cc b/tools/perf/ucc_pt_coll_scatter.cc index ab1bd4db93..d27ef50de2 100644 --- a/tools/perf/ucc_pt_coll_scatter.cc +++ b/tools/perf/ucc_pt_coll_scatter.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -42,19 +48,19 @@ ucc_status_t ucc_pt_coll_scatter::init_args(size_t single_rank_count, if (is_root) { args.src.info.count = single_rank_count * comm->get_size(); UCCCHECK_GOTO( - ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type), + ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type), exit, st_src); args.src.info.buffer = src_header->addr; } if (!is_root || !UCC_IS_INPLACE(args)) { - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info.mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info.mem_type), free_src, st_dst); args.dst.info.buffer = dst_header->addr; return UCC_OK; } free_src: if (is_root && st_src == UCC_OK) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } return st_dst; exit: @@ -77,9 +83,9 @@ void ucc_pt_coll_scatter::free_args(ucc_pt_test_args_t &test_args) bool is_root = (comm->get_rank() == args.root); if (!is_root || !UCC_IS_INPLACE(args)) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } if (is_root) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); } } diff --git a/tools/perf/ucc_pt_coll_scatterv.cc b/tools/perf/ucc_pt_coll_scatterv.cc index 6b12d30a9e..b2f92bd669 100644 --- a/tools/perf/ucc_pt_coll_scatterv.cc +++ b/tools/perf/ucc_pt_coll_scatterv.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -47,7 +53,7 @@ ucc_status_t ucc_pt_coll_scatterv::init_args(size_t count, ucc_malloc(comm_size * sizeof(uint32_t), "displacements buf"); UCC_MALLOC_CHECK_GOTO(args.src.info_v.displacements, free_count, st); UCCCHECK_GOTO( - ucc_mc_alloc(&src_header, size_src, args.src.info_v.mem_type), + ucc_pt_alloc(&src_header, size_src, args.src.info_v.mem_type), free_displ, st); args.src.info_v.buffer = src_header->addr; for (int i = 0; i < comm->get_size(); i++) { @@ -57,7 +63,7 @@ ucc_status_t ucc_pt_coll_scatterv::init_args(size_t count, } if (!is_root || !UCC_IS_INPLACE(args)) { args.dst.info.count = count; - st = ucc_mc_alloc(&dst_header, size_dst, args.dst.info.mem_type); + st = ucc_pt_alloc(&dst_header, size_dst, args.dst.info.mem_type); if (UCC_OK != st) { std::cerr << "UCC perftest error: " << ucc_status_string(st) << " in " << STR(_call) << "\n"; @@ -71,7 +77,7 @@ ucc_status_t ucc_pt_coll_scatterv::init_args(size_t count, return UCC_OK; } free_src: - ucc_mc_free(src_header); + ucc_pt_free(src_header); free_displ: ucc_free(args.src.info_v.displacements); free_count: @@ -86,10 +92,10 @@ void ucc_pt_coll_scatterv::free_args(ucc_pt_test_args_t &test_args) bool is_root = (comm->get_rank() == args.root); if (!is_root || !UCC_IS_INPLACE(args)) { - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); } if (is_root) { - ucc_mc_free(src_header); + ucc_pt_free(src_header); ucc_free(args.src.info_v.displacements); ucc_free(args.src.info_v.counts); } diff --git a/tools/perf/ucc_pt_op_memcpy.cc b/tools/perf/ucc_pt_op_memcpy.cc index 1b01dba343..fd2084f31e 100644 --- a/tools/perf/ucc_pt_op_memcpy.cc +++ b/tools/perf/ucc_pt_op_memcpy.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -24,8 +30,8 @@ ucc_status_t ucc_pt_op_memcpy::init_args(size_t count, size_t size = count * ucc_dt_size(data_type); ucc_status_t st; - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, mem_type), exit, st); - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, mem_type), free_dst, st); + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, mem_type), exit, st); + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, mem_type), free_dst, st); args.task_type = UCC_EE_EXECUTOR_TASK_COPY; args.copy.dst = dst_header->addr; @@ -34,7 +40,7 @@ ucc_status_t ucc_pt_op_memcpy::init_args(size_t count, return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -50,6 +56,6 @@ float ucc_pt_op_memcpy::get_bw(float time_ms, int grsize, void ucc_pt_op_memcpy::free_args(ucc_pt_test_args_t &test_args) { - ucc_mc_free(src_header); - ucc_mc_free(dst_header); + ucc_pt_free(src_header); + ucc_pt_free(dst_header); } diff --git a/tools/perf/ucc_pt_op_reduce.cc b/tools/perf/ucc_pt_op_reduce.cc index b3525ecec4..9e5225428e 100644 --- a/tools/perf/ucc_pt_op_reduce.cc +++ b/tools/perf/ucc_pt_op_reduce.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -38,8 +44,8 @@ ucc_status_t ucc_pt_op_reduce::init_args(size_t count, ucc_status_t st; int i; - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, mem_type), exit, st); - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size * num_bufs, mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, mem_type), exit, st); + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size * num_bufs, mem_type), free_dst, st); args.task_type = UCC_EE_EXECUTOR_TASK_REDUCE; @@ -55,7 +61,7 @@ ucc_status_t ucc_pt_op_reduce::init_args(size_t count, return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -71,6 +77,6 @@ float ucc_pt_op_reduce::get_bw(float time_ms, int grsize, void ucc_pt_op_reduce::free_args(ucc_pt_test_args_t &test_args) { - ucc_mc_free(src_header); - ucc_mc_free(dst_header); + ucc_pt_free(src_header); + ucc_pt_free(dst_header); } diff --git a/tools/perf/ucc_pt_op_reduce_strided.cc b/tools/perf/ucc_pt_op_reduce_strided.cc index 532cfb7a57..f3e9f66171 100644 --- a/tools/perf/ucc_pt_op_reduce_strided.cc +++ b/tools/perf/ucc_pt_op_reduce_strided.cc @@ -1,3 +1,9 @@ +/** + * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + #include "ucc_pt_coll.h" #include "ucc_perftest.h" #include @@ -34,8 +40,8 @@ ucc_status_t ucc_pt_op_reduce_strided::init_args(size_t count, size_t stride = count * ucc_dt_size(data_type); ucc_status_t st; - UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, mem_type), exit, st); - UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size * num_bufs, mem_type), + UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, mem_type), exit, st); + UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size * num_bufs, mem_type), free_dst, st); args.task_type = UCC_EE_EXECUTOR_TASK_REDUCE_STRIDED; @@ -51,7 +57,7 @@ ucc_status_t ucc_pt_op_reduce_strided::init_args(size_t count, return UCC_OK; free_dst: - ucc_mc_free(dst_header); + ucc_pt_free(dst_header); exit: return st; } @@ -68,6 +74,6 @@ float ucc_pt_op_reduce_strided::get_bw(float time_ms, int grsize, void ucc_pt_op_reduce_strided::free_args(ucc_pt_test_args_t &test_args) { - ucc_mc_free(src_header); - ucc_mc_free(dst_header); + ucc_pt_free(src_header); + ucc_pt_free(dst_header); }