Skip to content

Commit

Permalink
MC: add memset function (#740)
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev authored Mar 16, 2023
1 parent b837e87 commit c319e25
Show file tree
Hide file tree
Showing 25 changed files with 285 additions and 84 deletions.
2 changes: 1 addition & 1 deletion .azure/azure-pipelines-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,4 @@ stages:
cd build
make gtest
displayName: Launch Gtest
timeoutInMinutes: 40
timeoutInMinutes: 120
3 changes: 2 additions & 1 deletion src/components/mc/base/ucc_mc_base.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -105,6 +105,7 @@ typedef struct ucc_mc_ops {
ucc_status_t (*memcpy)(void *dst, const void *src, size_t len,
ucc_memory_type_t dst_mem,
ucc_memory_type_t src_mem);
ucc_status_t (*memset)(void *dst, int value, size_t len);
ucc_status_t (*flush)();
} ucc_mc_ops_t;

Expand Down
9 changes: 8 additions & 1 deletion src/components/mc/cpu/mc_cpu.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -167,6 +167,12 @@ static ucc_status_t ucc_mc_cpu_memcpy(void *dst, const void *src, size_t len,
return UCC_OK;
}

static ucc_status_t ucc_mc_cpu_memset(void *ptr, int value, size_t len)
{
memset(ptr, value, len);
return UCC_OK;
}

static ucc_status_t ucc_mc_cpu_mem_query(const void *ptr, //NOLINT
ucc_mem_attr_t *mem_attr) //NOLINT
{
Expand Down Expand Up @@ -199,6 +205,7 @@ ucc_mc_cpu_t ucc_mc_cpu = {
.super.ops.mem_alloc = ucc_mc_cpu_mem_pool_alloc_with_init,
.super.ops.mem_free = ucc_mc_cpu_mem_pool_free,
.super.ops.memcpy = ucc_mc_cpu_memcpy,
.super.ops.memset = ucc_mc_cpu_memset,
.super.ops.flush = NULL,
.super.config_table =
{
Expand Down
31 changes: 29 additions & 2 deletions src/components/mc/cuda/mc_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ static ucc_status_t ucc_mc_cuda_memcpy(void *dst, const void *src, size_t len,
ucc_memory_type_t dst_mem,
ucc_memory_type_t src_mem)
{
cudaError_t st;
cudaError_t st;
ucc_assert(dst_mem == UCC_MEMORY_TYPE_CUDA ||
src_mem == UCC_MEMORY_TYPE_CUDA);

Expand All @@ -286,7 +286,7 @@ static ucc_status_t ucc_mc_cuda_memcpy(void *dst, const void *src, size_t len,
if (ucc_unlikely(st != cudaSuccess)) {
cudaGetLastError();
mc_error(&ucc_mc_cuda.super,
"failed to launch cudaMemcpyAsync, dst %p, src %p, len %zd "
"failed to launch cudaMemcpyAsync, dst %p, src %p, len %zd "
"cuda error %d(%s)",
dst, src, len, st, cudaGetErrorString(st));
return UCC_ERR_NO_MESSAGE;
Expand All @@ -303,6 +303,32 @@ static ucc_status_t ucc_mc_cuda_memcpy(void *dst, const void *src, size_t len,
return UCC_OK;
}

static ucc_status_t ucc_mc_cuda_memset(void *ptr, int val, size_t len)
{
cudaError_t st;

UCC_MC_CUDA_INIT_STREAM();
st = cudaMemsetAsync(ptr, val, len, ucc_mc_cuda.stream);
if (ucc_unlikely(st != cudaSuccess)) {
cudaGetLastError();
mc_error(&ucc_mc_cuda.super,
"failed to launch cudaMemsetAsync, dst %p, len %zd "
"cuda error %d(%s)",
ptr, len, st, cudaGetErrorString(st));
return UCC_ERR_NO_MESSAGE;
}
st = cudaStreamSynchronize(ucc_mc_cuda.stream);
if (ucc_unlikely(st != cudaSuccess)) {
cudaGetLastError();
mc_error(&ucc_mc_cuda.super,
"failed to synchronize mc_cuda.stream "
"cuda error %d(%s)",
st, cudaGetErrorString(st));
return UCC_ERR_NO_MESSAGE;
}
return UCC_OK;
}

static ucc_status_t ucc_mc_cuda_mem_query(const void *ptr,
ucc_mem_attr_t *mem_attr)
{
Expand Down Expand Up @@ -399,6 +425,7 @@ ucc_mc_cuda_t ucc_mc_cuda = {
.super.ops.mem_alloc = ucc_mc_cuda_mem_pool_alloc_with_init,
.super.ops.mem_free = ucc_mc_cuda_mem_pool_free,
.super.ops.memcpy = ucc_mc_cuda_memcpy,
.super.ops.memset = ucc_mc_cuda_memset,
.super.ops.flush = ucc_mc_cuda_flush_not_supported,
.super.config_table =
{
Expand Down
27 changes: 27 additions & 0 deletions src/components/mc/rocm/mc_rocm.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,32 @@ static ucc_status_t ucc_mc_rocm_memcpy(void *dst, const void *src, size_t len,
return UCC_OK;
}

static ucc_status_t ucc_mc_rocm_memset(void *ptr, int val, size_t len)
{
hipError_t st;

UCC_MC_ROCM_INIT_STREAM();
st = hipMemsetAsync(ptr, val, len, ucc_mc_rocm.stream);
if (ucc_unlikely(st != hipSuccess)) {
hipGetLastError();
mc_error(&ucc_mc_rocm.super,
"failed to launch hipMemsetAsync, dst %p, len %zd "
"hip error %d(%s)",
ptr, len, st, hipGetErrorString(st));
return hip_error_to_ucc_status(st);
}
st = hipStreamSynchronize(ucc_mc_rocm.stream);
if (ucc_unlikely(st != hipSuccess)) {
hipGetLastError();
mc_error(&ucc_mc_rocm.super,
"failed to synchronize mc_rocm.stream "
"hip error %d(%s)",
st, hipGetErrorString(st));
return hip_error_to_ucc_status(st);
}
return UCC_OK;
}

static ucc_status_t ucc_mc_rocm_mem_query(const void *ptr,
ucc_mem_attr_t *mem_attr)
{
Expand Down Expand Up @@ -330,6 +356,7 @@ ucc_mc_rocm_t ucc_mc_rocm = {
.super.ops.mem_alloc = ucc_mc_rocm_mem_pool_alloc_with_init,
.super.ops.mem_free = ucc_mc_rocm_mem_pool_free,
.super.ops.memcpy = ucc_mc_rocm_memcpy,
.super.ops.memset = ucc_mc_rocm_memset,
.super.config_table =
{
.name = "ROCM memory component",
Expand Down
7 changes: 7 additions & 0 deletions src/components/mc/ucc_mc.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ UCC_MC_PROFILE_FUNC(ucc_status_t, ucc_mc_memcpy,
return mc_ops[mt]->memcpy(dst, src, len, dst_mem, src_mem);
}

ucc_status_t ucc_mc_memset(void *ptr, int value, size_t size,
ucc_memory_type_t mem_type)
{
UCC_CHECK_MC_AVAILABLE(mem_type);
return mc_ops[mem_type]->memset(ptr, value, size);
}

ucc_status_t ucc_mc_flush(ucc_memory_type_t mem_type)
{
UCC_CHECK_MC_AVAILABLE(mem_type);
Expand Down
4 changes: 3 additions & 1 deletion src/components/mc/ucc_mc.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* See file LICENSE for terms.
*/

Expand Down Expand Up @@ -35,5 +35,7 @@ ucc_status_t ucc_mc_memcpy(void *dst, const void *src, size_t len,
ucc_memory_type_t dst_mem,
ucc_memory_type_t src_mem);

ucc_status_t ucc_mc_memset(void *ptr, int value, size_t size,
ucc_memory_type_t mem_type);

#endif
29 changes: 29 additions & 0 deletions tools/perf/ucc_pt_coll.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "ucc_pt_coll.h"

ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len,
ucc_memory_type_t mem_type)
{
ucc_status_t status;

status = ucc_mc_alloc(h_ptr, len, mem_type);
if (status != UCC_OK) {
return status;
}

status = ucc_mc_memset((*h_ptr)->addr, 0, len, mem_type);
if (status != UCC_OK) {
ucc_mc_free(*h_ptr);
return status;
}
return UCC_OK;
}

ucc_status_t ucc_pt_free(ucc_mc_buffer_header_t *h_ptr)
{
return ucc_mc_free(h_ptr);
}

bool ucc_pt_coll::has_reduction()
{
return has_reduction_;
Expand Down
7 changes: 6 additions & 1 deletion tools/perf/ucc_pt_coll.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -14,6 +14,11 @@ extern "C" {
#include <components/mc/ucc_mc.h>
}

ucc_status_t ucc_pt_alloc(ucc_mc_buffer_header_t **h_ptr, size_t len,
ucc_memory_type_t mem_type);

ucc_status_t ucc_pt_free(ucc_mc_buffer_header_t *h_ptr);

typedef union {
ucc_coll_args_t coll_args;
ucc_ee_executor_task_args_t executor_args;
Expand Down
16 changes: 11 additions & 5 deletions tools/perf/ucc_pt_coll_allgather.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "ucc_pt_coll.h"
#include "ucc_perftest.h"
#include <ucc/api/ucc.h>
Expand Down Expand Up @@ -37,19 +43,19 @@ ucc_status_t ucc_pt_coll_allgather::init_args(size_t single_rank_count,

args = coll_args;
args.dst.info.count = single_rank_count * comm->get_size();
UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info.mem_type),
UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info.mem_type),
exit, st);
args.dst.info.buffer = dst_header->addr;
if (!UCC_IS_INPLACE(args)) {
args.src.info.count = single_rank_count;
UCCCHECK_GOTO(
ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type),
ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type),
free_dst, st);
args.src.info.buffer = src_header->addr;
}
return UCC_OK;
free_dst:
ucc_mc_free(dst_header);
ucc_pt_free(dst_header);
exit:
return st;
}
Expand All @@ -70,7 +76,7 @@ void ucc_pt_coll_allgather::free_args(ucc_pt_test_args_t &test_args)
ucc_coll_args_t &args = test_args.coll_args;

if (!UCC_IS_INPLACE(args)) {
ucc_mc_free(src_header);
ucc_pt_free(src_header);
}
ucc_mc_free(dst_header);
ucc_pt_free(dst_header);
}
16 changes: 11 additions & 5 deletions tools/perf/ucc_pt_coll_allgatherv.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "ucc_pt_coll.h"
#include "ucc_perftest.h"
#include <ucc/api/ucc.h>
Expand Down Expand Up @@ -40,13 +46,13 @@ ucc_status_t ucc_pt_coll_allgatherv::init_args(size_t count,
UCC_MALLOC_CHECK_GOTO(args.dst.info_v.counts, exit, st);
args.dst.info_v.displacements = (ucc_aint_t *) ucc_malloc(comm_size * sizeof(uint32_t), "displacements buf");
UCC_MALLOC_CHECK_GOTO(args.dst.info_v.displacements, free_count, st);
UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size_dst, args.dst.info_v.mem_type),
UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size_dst, args.dst.info_v.mem_type),
free_displ, st);
args.dst.info_v.buffer = dst_header->addr;
if (!UCC_IS_INPLACE(args)) {
args.src.info.count = count;
UCCCHECK_GOTO(
ucc_mc_alloc(&src_header, size_src, args.src.info.mem_type),
ucc_pt_alloc(&src_header, size_src, args.src.info.mem_type),
free_dst, st);
args.src.info.buffer = src_header->addr;
}
Expand All @@ -56,7 +62,7 @@ ucc_status_t ucc_pt_coll_allgatherv::init_args(size_t count,
}
return UCC_OK;
free_dst:
ucc_mc_free(dst_header);
ucc_pt_free(dst_header);
free_displ:
ucc_free(args.dst.info_v.displacements);
free_count:
Expand All @@ -70,9 +76,9 @@ void ucc_pt_coll_allgatherv::free_args(ucc_pt_test_args_t &test_args)
ucc_coll_args_t &args = test_args.coll_args;

if (!UCC_IS_INPLACE(args)) {
ucc_mc_free(src_header);
ucc_pt_free(src_header);
}
ucc_mc_free(dst_header);
ucc_pt_free(dst_header);
ucc_free(args.dst.info_v.counts);
ucc_free(args.dst.info_v.displacements);
}
16 changes: 11 additions & 5 deletions tools/perf/ucc_pt_coll_allreduce.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "ucc_pt_coll.h"
#include "ucc_perftest.h"
#include <ucc/api/ucc.h>
Expand Down Expand Up @@ -38,17 +44,17 @@ ucc_status_t ucc_pt_coll_allreduce::init_args(size_t count,
args = coll_args;
args.src.info.count = count;
args.dst.info.count = count;
UCCCHECK_GOTO(ucc_mc_alloc(&dst_header, size, args.dst.info.mem_type), exit,
UCCCHECK_GOTO(ucc_pt_alloc(&dst_header, size, args.dst.info.mem_type), exit,
st);
args.dst.info.buffer = dst_header->addr;
if (!UCC_IS_INPLACE(args)) {
UCCCHECK_GOTO(ucc_mc_alloc(&src_header, size, args.src.info.mem_type),
UCCCHECK_GOTO(ucc_pt_alloc(&src_header, size, args.src.info.mem_type),
free_dst, st);
args.src.info.buffer = src_header->addr;
}
return UCC_OK;
free_dst:
ucc_mc_free(dst_header);
ucc_pt_free(dst_header);
exit:
return st;
}
Expand All @@ -58,9 +64,9 @@ void ucc_pt_coll_allreduce::free_args(ucc_pt_test_args_t &test_args)
ucc_coll_args_t &args = test_args.coll_args;

if (!UCC_IS_INPLACE(args)) {
ucc_mc_free(src_header);
ucc_pt_free(src_header);
}
ucc_mc_free(dst_header);
ucc_pt_free(dst_header);
}

float ucc_pt_coll_allreduce::get_bw(float time_ms, int grsize,
Expand Down
Loading

0 comments on commit c319e25

Please sign in to comment.