From 9ceb0577f476f0241ae2a59f23e9c36e68cace32 Mon Sep 17 00:00:00 2001 From: Sergey Lebedev Date: Mon, 4 Mar 2024 14:53:24 +0100 Subject: [PATCH] TEST: do local checks for gather (#901) --- src/utils/ucc_component.c | 6 ++++-- src/utils/ucc_log.h | 20 ++++++++++++++++++++ test/mpi/test_allgather.cc | 6 +----- test/mpi/test_bcast.cc | 10 +++++----- test/mpi/test_gather.cc | 35 ++++++++++++++++------------------- test/mpi/test_gatherv.cc | 30 ++++++++++++++---------------- test/mpi/test_scatter.cc | 1 - 7 files changed, 60 insertions(+), 48 deletions(-) diff --git a/src/utils/ucc_component.c b/src/utils/ucc_component.c index b19bd2e397..83d4aa8558 100644 --- a/src/utils/ucc_component.c +++ b/src/utils/ucc_component.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2020, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * See file LICENSE for terms. */ #include "config.h" @@ -52,7 +52,9 @@ static ucc_status_t ucc_component_load_one(const char *so_path, handle = dlopen(so_path, RTLD_LAZY); if (!handle) { - ucc_debug("failed to load UCC component library: %s", so_path); + error = dlerror(); + ucc_debug("failed to load UCC component library: %s (%s)", + so_path, error); goto error; } iface = (ucc_component_iface_t *)dlsym(handle, iface_struct); diff --git a/src/utils/ucc_log.h b/src/utils/ucc_log.h index b480ee55ae..d9aafab7ab 100644 --- a/src/utils/ucc_log.h +++ b/src/utils/ucc_log.h @@ -67,6 +67,26 @@ #define ucc_coll_trace_debug(_fmt, ...) \ ucc_log_component_collective_trace(UCS_LOG_LEVEL_DEBUG, _fmt, ##__VA_ARGS__) +/** + * Print a message regardless of current log level. Output can be + * enabled/disabled via environment variable/configuration settings. + * + * During debugging it can be useful to add a few prints to the code + * without changing a current log level. Also it is useful to be able + * to see messages only from specific processes. For example, one may + * want to see prints only from rank 0 when debugging MPI. + * + * The function is intended for debugging only. It should not be used + * in the real code. + */ + +#define ucc_print(_fmt, ...) \ + do { \ + ucs_log_dispatch(__FILE__, __LINE__, __FUNCTION__, \ + UCS_LOG_LEVEL_PRINT, \ + &ucc_global_config.log_component, \ + _fmt, ## __VA_ARGS__); \ + } while(0) static inline const char* ucc_coll_type_str(ucc_coll_type_t ct) { diff --git a/test/mpi/test_allgather.cc b/test/mpi/test_allgather.cc index ebca8c4c95..a98bbd8ee4 100644 --- a/test/mpi/test_allgather.cc +++ b/test/mpi/test_allgather.cc @@ -51,7 +51,7 @@ ucc_status_t TestAllgather::set_input(int iter_persistent) size_t single_rank_count = msgsize / dt_size; size_t single_rank_size = single_rank_count * dt_size; int rank; - void *buf, *check; + void *buf; this->iter_persistent = iter_persistent; MPI_Comm_rank(team.comm, &rank); @@ -60,12 +60,9 @@ ucc_status_t TestAllgather::set_input(int iter_persistent) } else { buf = sbuf; } - check = PTR_OFFSET(check_buf, rank * single_rank_size); init_buffer(buf, single_rank_count, dt, mem_type, rank * (iter_persistent + 1)); - UCC_CHECK(ucc_mc_memcpy(check, buf, single_rank_size, - UCC_MEMORY_TYPE_HOST, mem_type)); return UCC_OK; } @@ -83,7 +80,6 @@ ucc_status_t TestAllgather::check() i * (iter_persistent + 1)); } - return compare_buffers(rbuf, check_buf, single_rank_count * size, dt, mem_type); } diff --git a/test/mpi/test_bcast.cc b/test/mpi/test_bcast.cc index 080cbb436f..d5d1c92a7c 100644 --- a/test/mpi/test_bcast.cc +++ b/test/mpi/test_bcast.cc @@ -49,8 +49,6 @@ ucc_status_t TestBcast::set_input(int iter_persistent) MPI_Comm_rank(team.comm, &rank); if (rank == root) { init_buffer(sbuf, count, dt, mem_type, rank * (iter_persistent + 1)); - UCC_CHECK(ucc_mc_memcpy(check_buf, sbuf, count * dt_size, - UCC_MEMORY_TYPE_HOST, mem_type)); } return UCC_OK; } @@ -61,9 +59,11 @@ ucc_status_t TestBcast::check() int rank; MPI_Comm_rank(team.comm, &rank); + if (rank == root) { + return UCC_OK; + } + init_buffer(check_buf, count, dt, UCC_MEMORY_TYPE_HOST, root * (iter_persistent + 1)); - return (rank == root) - ? UCC_OK - : compare_buffers(sbuf, check_buf, count, dt, mem_type); + return compare_buffers(sbuf, check_buf, count, dt, mem_type); } diff --git a/test/mpi/test_gather.cc b/test/mpi/test_gather.cc index 4b87fe5397..0f21455fab 100644 --- a/test/mpi/test_gather.cc +++ b/test/mpi/test_gather.cc @@ -75,8 +75,9 @@ ucc_status_t TestGather::set_input(int iter_persistent) size_t single_rank_count = msgsize / dt_size; size_t single_rank_size = single_rank_count * dt_size; int rank; - void *buf, *check; + void *buf; + this->iter_persistent = iter_persistent; MPI_Comm_rank(team.comm, &rank); if (rank == root) { if (inplace) { @@ -87,34 +88,30 @@ ucc_status_t TestGather::set_input(int iter_persistent) } else { buf = sbuf; } - check = PTR_OFFSET(check_buf, rank * single_rank_size); - init_buffer(buf, single_rank_count, dt, mem_type, rank * (iter_persistent + 1)); - UCC_CHECK(ucc_mc_memcpy(check, buf, single_rank_size, - UCC_MEMORY_TYPE_HOST, mem_type)); return UCC_OK; } ucc_status_t TestGather::check() { - size_t single_rank_count = msgsize / ucc_dt_size(dt); - MPI_Datatype mpi_dt = ucc_dt_to_mpi(dt); - MPI_Request req; - int size, rank, completed; + int size, rank, i; + size_t dt_size, single_rank_count; MPI_Comm_size(team.comm, &size); MPI_Comm_rank(team.comm, &rank); + if (rank != root) { + return UCC_OK; + } - MPI_Iallgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, check_buf, - single_rank_count, mpi_dt, team.comm, &req); - do { - MPI_Test(&req, &completed, MPI_STATUS_IGNORE); - ucc_context_progress(team.ctx); - } while(!completed); + dt_size = ucc_dt_size(dt); + single_rank_count = msgsize / dt_size; + for (i = 0; i < size; i++) { + init_buffer(PTR_OFFSET(check_buf, i * single_rank_count * dt_size), + single_rank_count, dt, UCC_MEMORY_TYPE_HOST, + i * (iter_persistent + 1)); + } - return (rank != root) - ? UCC_OK - : compare_buffers(rbuf, check_buf, single_rank_count * size, dt, - mem_type); + return compare_buffers(rbuf, check_buf, single_rank_count * size, dt, + mem_type); } diff --git a/test/mpi/test_gatherv.cc b/test/mpi/test_gatherv.cc index 7468a56307..445b9faa14 100644 --- a/test/mpi/test_gatherv.cc +++ b/test/mpi/test_gatherv.cc @@ -106,8 +106,9 @@ ucc_status_t TestGatherv::set_input(int iter_persistent) { size_t dt_size = ucc_dt_size(dt); int rank; - void *buf, *check; + void *buf; + this->iter_persistent = iter_persistent; MPI_Comm_rank(team.comm, &rank); if (rank == root) { if (inplace) { @@ -118,11 +119,8 @@ ucc_status_t TestGatherv::set_input(int iter_persistent) } else { buf = sbuf; } - check = PTR_OFFSET(check_buf, displacements[rank] * dt_size); - init_buffer(buf, counts[rank], dt, mem_type, rank * (iter_persistent + 1)); - UCC_CHECK(ucc_mc_memcpy(check, buf, counts[rank] * dt_size, - UCC_MEMORY_TYPE_HOST, mem_type)); + return UCC_OK; } @@ -138,21 +136,21 @@ TestGatherv::~TestGatherv() ucc_status_t TestGatherv::check() { - size_t count = msgsize / ucc_dt_size(dt); - MPI_Datatype mpi_dt = ucc_dt_to_mpi(dt); - MPI_Request req; - int size, rank, completed; + size_t count = msgsize / ucc_dt_size(dt); + int size, rank, i; MPI_Comm_size(team.comm, &size); MPI_Comm_rank(team.comm, &rank); - MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, check_buf, - (int *)counts, (int *)displacements, mpi_dt, team.comm, - &req); - do { - MPI_Test(&req, &completed, MPI_STATUS_IGNORE); - ucc_context_progress(team.ctx); - } while(!completed); + if (rank != root) { + return UCC_OK; + } + + for (i = 0; i < size; i++) { + init_buffer(PTR_OFFSET(check_buf, displacements[i] * ucc_dt_size(dt)), + counts[i], dt, UCC_MEMORY_TYPE_HOST, + i * (iter_persistent + 1)); + } return (rank != root) ? UCC_OK diff --git a/test/mpi/test_scatter.cc b/test/mpi/test_scatter.cc index 016ed7465b..4d4438b635 100644 --- a/test/mpi/test_scatter.cc +++ b/test/mpi/test_scatter.cc @@ -25,7 +25,6 @@ TestScatter::TestScatter(ucc_test_team_t &_team, TestCaseParams ¶ms) : TEST_SKIP_MEM_LIMIT, team.comm)) { return; } - if (rank == root) { UCC_CHECK(ucc_mc_alloc(&sbuf_mc_header, msgsize * size, mem_type)); sbuf = sbuf_mc_header->addr;