diff --git a/src/coll_patterns/recursive_knomial.h b/src/coll_patterns/recursive_knomial.h index 4f5d2ac672..1888169f8b 100644 --- a/src/coll_patterns/recursive_knomial.h +++ b/src/coll_patterns/recursive_knomial.h @@ -48,8 +48,10 @@ typedef struct ucc_knomial_pattern { ucc_rank_t n_extra; /* number of "extra" ranks to be served by "proxies" */ size_t block_size_counts; size_t count; /* collective buffer size */ + ucc_count_t *counts; ucc_rank_t block_size; ptrdiff_t block_offset; + int is64; } ucc_knomial_pattern_t; /** diff --git a/src/coll_patterns/sra_knomial.h b/src/coll_patterns/sra_knomial.h index 9d0906b5db..11b99dcf53 100644 --- a/src/coll_patterns/sra_knomial.h +++ b/src/coll_patterns/sra_knomial.h @@ -211,6 +211,20 @@ ucc_kn_agx_pattern_init(ucc_rank_t size, ucc_rank_t rank, ucc_kn_radix_t radix, } } +static inline void +ucc_kn_agv_pattern_init(ucc_rank_t size, ucc_rank_t rank, ucc_kn_radix_t radix, + ucc_count_t *counts, int is64, + ucc_knomial_pattern_t *p) +{ + ucc_knomial_pattern_init(size, rank, radix, p); + p->type = KN_PATTERN_ALLGATHERV; + p->counts = counts; + p->is64 = is64; + p->block_size = p->radix_pow * radix; + p->block_offset = ucc_knomial_pattern_loop_rank(p, rank) / p->block_size * + p->block_size; +} + static inline void ucc_kn_ag_pattern_peer_seg(ucc_rank_t peer, ucc_knomial_pattern_t *p, size_t *seg_count, ptrdiff_t *seg_offset) @@ -236,8 +250,12 @@ ucc_kn_ag_pattern_peer_seg(ucc_rank_t peer, ucc_knomial_pattern_t *p, *seg_offset; return; case KN_PATTERN_ALLGATHERV: - /* not implemented */ - ucc_assert(0); + ucc_kn_seg_desc_compute(p, &s, peer); + *seg_offset = ucc_buffer_vector_block_offset(p->counts, p->is64, + s.seg_start); + *seg_count = ucc_buffer_vector_block_offset(p->counts, p->is64, + s.seg_end) - *seg_offset; + return; default: ucc_assert(0); } diff --git a/src/components/tl/ucp/Makefile.am b/src/components/tl/ucp/Makefile.am index b196479893..02270feaa7 100644 --- a/src/components/tl/ucp/Makefile.am +++ b/src/components/tl/ucp/Makefile.am @@ -18,10 +18,11 @@ allgather = \ allgather/allgather_sparbit.c \ allgather/allgather_knomial.c -allgatherv = \ - allgatherv/allgatherv.h \ - allgatherv/allgatherv.c \ - allgatherv/allgatherv_ring.c +allgatherv = \ + allgatherv/allgatherv.h \ + allgatherv/allgatherv.c \ + allgatherv/allgatherv_ring.c \ + allgatherv/allgatherv_knomial.c alltoall = \ alltoall/alltoall.h \ diff --git a/src/components/tl/ucp/allgather/allgather_knomial.c b/src/components/tl/ucp/allgather/allgather_knomial.c index d5a760a23a..1fbcf773cc 100644 --- a/src/components/tl/ucp/allgather/allgather_knomial.c +++ b/src/components/tl/ucp/allgather/allgather_knomial.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -25,6 +25,27 @@ (_rank)) \ : (_args)->dst.info.count / (_size) +#define GET_TOTAL_COUNT(_args, _size) \ + ((_args)->coll_type == UCC_COLL_TYPE_ALLGATHERV) \ + ? ucc_coll_args_get_total_count((_args), (_args)->dst.info_v.counts, \ + (_size)) \ + : (_args)->dst.info.count + +#define GET_DT(_args) \ + ((_args)->coll_type == UCC_COLL_TYPE_ALLGATHERV) \ + ? (_args)->dst.info_v.datatype \ + : (_args)->dst.info.datatype + +#define GET_DST(_args) \ + ((_args)->coll_type == UCC_COLL_TYPE_ALLGATHERV) \ + ? (_args)->dst.info_v.buffer \ + : (_args)->dst.info.buffer + +#define GET_MT(_args) \ + ((_args)->coll_type == UCC_COLL_TYPE_ALLGATHERV) \ + ? (_args)->dst.info_v.mem_type \ + : (_args)->dst.info.mem_type + /* Bcast will first call scatter and then allgather. * In case of non-full tree with "extra" ranks, scatter will give each rank * a new virtual rank number - "vrank". @@ -40,12 +61,11 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) ucc_kn_radix_t radix = task->allgather_kn.p.radix; uint8_t node_type = task->allgather_kn.p.node_type; ucc_knomial_pattern_t *p = &task->allgather_kn.p; - void *rbuf = args->dst.info.buffer; - ucc_memory_type_t mem_type = args->dst.info.mem_type; - size_t count = args->dst.info.count; - size_t dt_size = ucc_dt_size(args->dst.info.datatype); - size_t data_size = count * dt_size; + void *rbuf = GET_DST(args); + ucc_memory_type_t mem_type = GET_MT(args); + size_t dt_size = ucc_dt_size(GET_DT(args)); ucc_rank_t size = task->subset.map.ep_num; + size_t data_size = GET_TOTAL_COUNT(args, size); ucc_rank_t broot = args->coll_type == UCC_COLL_TYPE_BCAST ? args->root : 0; ucc_rank_t rank = VRANK(task->subset.myrank, broot, size); @@ -72,7 +92,7 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) team, task), task, out); } - UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(rbuf, data_size, mem_type, + UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(rbuf, data_size * dt_size, mem_type, ucc_ep_map_eval(task->subset.map, INV_VRANK(peer,broot,size)), team, task), @@ -80,7 +100,10 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) } if ((p->type != KN_PATTERN_ALLGATHERX) && (node_type == KN_NODE_PROXY)) { peer = ucc_knomial_pattern_get_extra(p, rank); - extra_count = GET_LOCAL_COUNT(args, size, peer); + extra_count = + coll_task->bargs.args.coll_type == UCC_COLL_TYPE_ALLGATHER + ? local + : ucc_coll_args_get_count(args, args->dst.info_v.counts, peer); peer = ucc_ep_map_eval(task->subset.map, peer); UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(task->allgather_kn.sbuf, local * dt_size), extra_count * dt_size, @@ -154,8 +177,7 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task) if (KN_NODE_PROXY == node_type) { peer = ucc_knomial_pattern_get_extra(p, rank); - UCPCHECK_GOTO(ucc_tl_ucp_send_nb(args->dst.info.buffer, data_size, - mem_type, + UCPCHECK_GOTO(ucc_tl_ucp_send_nb(rbuf, data_size * dt_size, mem_type, ucc_ep_map_eval(task->subset.map, INV_VRANK(peer, broot, size)), team, task), @@ -190,6 +212,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) ucc_status_t status; ptrdiff_t offset; ucc_ee_executor_t *exec; + void *rbuf; UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_start", 0); ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); @@ -200,6 +223,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) &task->allgather_kn.p); offset = ucc_buffer_block_offset(args->dst.info.count, size, rank) * ucc_dt_size(args->dst.info.datatype); + rbuf = args->dst.info.buffer; if (!UCC_IS_INPLACE(*args)) { status = ucc_coll_task_get_executor(&task->super, &exec); if (ucc_unlikely(status != UCC_OK)) { @@ -218,18 +242,46 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task) return status; } } + } else if (ct == UCC_COLL_TYPE_ALLGATHERV) { + ucc_kn_agv_pattern_init(size, rank, radix, args->dst.info_v.counts, + UCC_COLL_ARGS_COUNT64(args), + &task->allgather_kn.p); + offset = ucc_buffer_vector_block_offset(args->dst.info_v.counts, + UCC_COLL_ARGS_COUNT64(args), + rank) * + ucc_dt_size(args->dst.info_v.datatype); + rbuf = args->dst.info_v.buffer; + if (!UCC_IS_INPLACE(*args)) { + status = ucc_coll_task_get_executor(&task->super, &exec); + if (ucc_unlikely(status != UCC_OK)) { + task->super.status = status; + return status; + } + eargs.task_type = UCC_EE_EXECUTOR_TASK_COPY; + eargs.copy.dst = PTR_OFFSET(args->dst.info_v.buffer, offset); + eargs.copy.src = args->src.info.buffer; + eargs.copy.len = args->src.info.count * + ucc_dt_size(args->src.info.datatype); + status = ucc_ee_executor_task_post(exec, &eargs, + &task->allgather_kn.etask); + if (ucc_unlikely(status != UCC_OK)) { + task->super.status = status; + return status; + } + } } else { ucc_kn_agx_pattern_init(size, rank, radix, args->dst.info.count, &task->allgather_kn.p); offset = ucc_sra_kn_get_offset(args->dst.info.count, ucc_dt_size(args->dst.info.datatype), rank, size, radix); + rbuf = args->dst.info.buffer; task->allgather_kn.recv_dist = ucc_knomial_calc_recv_dist( size - p->n_extra, ucc_knomial_pattern_loop_rank(p, rank), p->radix, 0); } - task->allgather_kn.sbuf = PTR_OFFSET(args->dst.info.buffer, offset); + task->allgather_kn.sbuf = PTR_OFFSET(rbuf, offset); return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); } diff --git a/src/components/tl/ucp/allgatherv/allgatherv.c b/src/components/tl/ucp/allgatherv/allgatherv.c index 39fbc5472d..ce284a87f1 100644 --- a/src/components/tl/ucp/allgatherv/allgatherv.c +++ b/src/components/tl/ucp/allgatherv/allgatherv.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -15,6 +15,10 @@ ucc_base_coll_alg_info_t {.id = UCC_TL_UCP_ALLGATHERV_ALG_RING, .name = "ring", .desc = "O(N) Ring"}, + [UCC_TL_UCP_ALLGATHERV_ALG_KNOMIAL] = + {.id = UCC_TL_UCP_ALLGATHERV_ALG_KNOMIAL, + .name = "knomial", + .desc = "recursive k-ing with arbitrary radix"}, [UCC_TL_UCP_ALLGATHERV_ALG_LAST] = { .id = 0, .name = NULL, .desc = NULL}}; diff --git a/src/components/tl/ucp/allgatherv/allgatherv.h b/src/components/tl/ucp/allgatherv/allgatherv.h index e9faf27ed1..03177c2aa7 100644 --- a/src/components/tl/ucp/allgatherv/allgatherv.h +++ b/src/components/tl/ucp/allgatherv/allgatherv.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -12,13 +12,38 @@ enum { UCC_TL_UCP_ALLGATHERV_ALG_RING, + UCC_TL_UCP_ALLGATHERV_ALG_KNOMIAL, UCC_TL_UCP_ALLGATHERV_ALG_LAST }; extern ucc_base_coll_alg_info_t ucc_tl_ucp_allgatherv_algs[UCC_TL_UCP_ALLGATHERV_ALG_LAST + 1]; +#define UCC_TL_UCP_ALLGATHERV_DEFAULT_ALG_SELECT_STR \ + "allgatherv:@0" + +char *ucc_tl_ucp_allgatherv_score_str_get(ucc_tl_ucp_team_t *team); + +static inline int ucc_tl_ucp_allgatherv_alg_from_str(const char *str) +{ + int i; + for (i = 0; i < UCC_TL_UCP_ALLGATHERV_ALG_LAST; i++) { + if (0 == strcasecmp(str, ucc_tl_ucp_allgatherv_algs[i].name)) { + break; + } + } + return i; +} + ucc_status_t ucc_tl_ucp_allgatherv_ring_init_common(ucc_tl_ucp_task_t *task); +ucc_status_t ucc_tl_ucp_allgatherv_ring_init(ucc_base_coll_args_t *coll_args, + ucc_base_team_t *team, + ucc_coll_task_t **task_h); + +ucc_status_t ucc_tl_ucp_allgatherv_knomial_init(ucc_base_coll_args_t *coll_args, + ucc_base_team_t *team, + ucc_coll_task_t **task_h); + ucc_status_t ucc_tl_ucp_allgatherv_init(ucc_tl_ucp_task_t *task); #endif diff --git a/src/components/tl/ucp/allgatherv/allgatherv_knomial.c b/src/components/tl/ucp/allgatherv/allgatherv_knomial.c new file mode 100644 index 0000000000..db80b82b30 --- /dev/null +++ b/src/components/tl/ucp/allgatherv/allgatherv_knomial.c @@ -0,0 +1,18 @@ +/** + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * See file LICENSE for terms. + */ + +#include "allgatherv/allgatherv.h" +#include "allgather/allgather.h" + +ucc_status_t ucc_tl_ucp_allgatherv_knomial_init(ucc_base_coll_args_t *coll_args, + ucc_base_team_t *team, + ucc_coll_task_t **task_h) +{ + if (!UCC_COLL_IS_DST_CONTIG(&coll_args->args)) { + return ucc_tl_ucp_allgatherv_ring_init(coll_args, team, task_h); + } + return ucc_tl_ucp_allgather_knomial_init(coll_args, team, task_h); +} diff --git a/src/components/tl/ucp/allgatherv/allgatherv_ring.c b/src/components/tl/ucp/allgatherv/allgatherv_ring.c index efc3a06099..77e2635496 100644 --- a/src/components/tl/ucp/allgatherv/allgatherv_ring.c +++ b/src/components/tl/ucp/allgatherv/allgatherv_ring.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -128,3 +128,20 @@ ucc_status_t ucc_tl_ucp_allgatherv_ring_init_common(ucc_tl_ucp_task_t *task) return UCC_OK; } + +ucc_status_t ucc_tl_ucp_allgatherv_ring_init(ucc_base_coll_args_t *coll_args, + ucc_base_team_t *team, + ucc_coll_task_t **task_h) +{ + ucc_tl_ucp_task_t *task; + ucc_status_t status; + + task = ucc_tl_ucp_init_task(coll_args, team); + status = ucc_tl_ucp_allgatherv_ring_init_common(task); + if (status != UCC_OK) { + ucc_tl_ucp_put_task(task); + return status; + } + *task_h = &task->super; + return UCC_OK; +} diff --git a/src/components/tl/ucp/tl_ucp_coll.c b/src/components/tl/ucp/tl_ucp_coll.c index 88e22e6957..419b602156 100644 --- a/src/components/tl/ucp/tl_ucp_coll.c +++ b/src/components/tl/ucp/tl_ucp_coll.c @@ -30,6 +30,10 @@ const ucc_tl_ucp_default_alg_desc_t .select_str = NULL, .str_get_fn = ucc_tl_ucp_allgather_score_str_get }, + { + .select_str = UCC_TL_UCP_ALLGATHERV_DEFAULT_ALG_SELECT_STR, + .str_get_fn = NULL + }, { .select_str = NULL, .str_get_fn = ucc_tl_ucp_alltoall_score_str_get @@ -219,6 +223,8 @@ static inline int alg_id_from_str(ucc_coll_type_t coll_type, const char *str) switch (coll_type) { case UCC_COLL_TYPE_ALLGATHER: return ucc_tl_ucp_allgather_alg_from_str(str); + case UCC_COLL_TYPE_ALLGATHERV: + return ucc_tl_ucp_allgatherv_alg_from_str(str); case UCC_COLL_TYPE_ALLREDUCE: return ucc_tl_ucp_allreduce_alg_from_str(str); case UCC_COLL_TYPE_ALLTOALL: @@ -273,6 +279,19 @@ ucc_status_t ucc_tl_ucp_alg_id_to_init(int alg_id, const char *alg_id_str, break; }; break; + case UCC_COLL_TYPE_ALLGATHERV: + switch (alg_id) { + case UCC_TL_UCP_ALLGATHERV_ALG_KNOMIAL: + *init = ucc_tl_ucp_allgatherv_knomial_init; + break; + case UCC_TL_UCP_ALLGATHERV_ALG_RING: + *init = ucc_tl_ucp_allgatherv_ring_init; + break; + default: + status = UCC_ERR_INVALID_PARAM; + break; + }; + break; case UCC_COLL_TYPE_ALLREDUCE: switch (alg_id) { case UCC_TL_UCP_ALLREDUCE_ALG_KNOMIAL: diff --git a/src/components/tl/ucp/tl_ucp_coll.h b/src/components/tl/ucp/tl_ucp_coll.h index 848b113b13..9668e46183 100644 --- a/src/components/tl/ucp/tl_ucp_coll.h +++ b/src/components/tl/ucp/tl_ucp_coll.h @@ -17,8 +17,8 @@ #include "tl_ucp_tag.h" #define UCC_UUNITS_AUTO_RADIX 4 -#define UCC_TL_UCP_N_DEFAULT_ALG_SELECT_STR 8 #define UCC_TL_UCP_TASK_PLUGIN_MAX_DATA 128 +#define UCC_TL_UCP_N_DEFAULT_ALG_SELECT_STR 9 ucc_status_t ucc_tl_ucp_team_default_score_str_alloc(ucc_tl_ucp_team_t *team, char *default_select_str[UCC_TL_UCP_N_DEFAULT_ALG_SELECT_STR]); diff --git a/src/utils/ucc_coll_utils.h b/src/utils/ucc_coll_utils.h index 6f83b17c97..2cb563b24a 100644 --- a/src/utils/ucc_coll_utils.h +++ b/src/utils/ucc_coll_utils.h @@ -281,6 +281,25 @@ static inline size_t ucc_buffer_block_offset(size_t total_count, return (block < left) ? offset - (left - block) : offset; } +static inline size_t ucc_buffer_vector_block_offset(ucc_count_t *counts, + int is64, + ucc_rank_t rank) +{ + size_t offset = 0; + ucc_rank_t i; + + if (is64) { + for (i = 0; i < rank; i++) { + offset += ((uint64_t *)counts)[i]; + } + } else { + for (i = 0; i < rank; i++) { + offset += ((uint32_t *)counts)[i]; + } + } + return offset; +} + /* Given the rank space A (e.g. core ucc team), a subset B (e.g. active set within the core team), the ep_map that maps ranks from the subset B to A, and the rank of a process within A. The function below computes the local diff --git a/test/gtest/coll/test_allgatherv.cc b/test/gtest/coll/test_allgatherv.cc index 4bd266c16b..70bc325732 100644 --- a/test/gtest/coll/test_allgatherv.cc +++ b/test/gtest/coll/test_allgatherv.cc @@ -1,5 +1,6 @@ /** - * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * * See file LICENSE for terms. */ @@ -8,6 +9,7 @@ using Param_0 = std::tuple; using Param_1 = std::tuple; +using Param_2 = std::tuple; class test_allgatherv : public UccCollArgs, public ucc::test { @@ -33,7 +35,8 @@ class test_allgatherv : public UccCollArgs, public ucc::test displs[i] = all_counts; all_counts += counts[i]; } - coll->mask = 0; + coll->mask = UCC_COLL_ARGS_FIELD_FLAGS; + coll->flags = UCC_COLL_ARGS_FLAG_CONTIG_DST_BUFFER; coll->coll_type = UCC_COLL_TYPE_ALLGATHERV; coll->src.info.mem_type = mem_type; @@ -250,7 +253,7 @@ UCC_TEST_P(test_allgatherv_1, multiple) UccReq::waitall(reqs); for (auto ctx : ctxs) { - EXPECT_EQ(true, data_validate(ctx));; + EXPECT_EQ(true, data_validate(ctx)); data_fini(ctx); } } @@ -267,3 +270,56 @@ INSTANTIATE_TEST_CASE_P( #endif ::testing::Values(1,3,8192), // count ::testing::Values(TEST_INPLACE, TEST_NO_INPLACE))); + +class test_allgatherv_alg : public test_allgatherv, + public ::testing::WithParamInterface {}; + +UCC_TEST_P(test_allgatherv_alg, alg) +{ + const ucc_datatype_t dtype = std::get<0>(GetParam()); + const ucc_memory_type_t mem_type = std::get<1>(GetParam()); + const int count = std::get<2>(GetParam()); + const gtest_ucc_inplace_t inplace = std::get<3>(GetParam()); + int n_procs = 5; + char tune[32]; + + sprintf(tune, "allgatherv:@%s:inf", std::get<4>(GetParam()).c_str()); + ucc_job_env_t env = {{"UCC_CL_BASIC_TUNE", "inf"}, + {"UCC_TL_UCP_TUNE", tune}}; + UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); + UccTeam_h team = job.create_team(n_procs); + UccCollCtxVec ctxs; + + set_inplace(inplace); + SET_MEM_TYPE(mem_type); + + data_init(n_procs, dtype, count, ctxs, false); + UccReq req(team, ctxs); + req.start(); + req.wait(); + EXPECT_EQ(true, data_validate(ctxs));; + data_fini(ctxs); +} + +INSTANTIATE_TEST_CASE_P( + , test_allgatherv_alg, + ::testing::Combine( + PREDEFINED_DTYPES, +#ifdef HAVE_CUDA + ::testing::Values(UCC_MEMORY_TYPE_HOST, UCC_MEMORY_TYPE_CUDA, + UCC_MEMORY_TYPE_CUDA_MANAGED), +#else + ::testing::Values(UCC_MEMORY_TYPE_HOST), +#endif + ::testing::Values(1,3,8192), // count + ::testing::Values(TEST_INPLACE, TEST_NO_INPLACE), + ::testing::Values("knomial", "ring")), + [](const testing::TestParamInfo& info) { + std::string name; + name += ucc_datatype_str(std::get<0>(info.param)); + name += std::string("_") + std::string(ucc_mem_type_str(std::get<1>(info.param))); + name += std::string("_count_")+std::to_string(std::get<2>(info.param)); + name += std::string("_inplace_")+std::to_string(std::get<3>(info.param)); + name += std::string("_")+std::get<4>(info.param); + return name; + }); diff --git a/test/mpi/test_allgatherv.cc b/test/mpi/test_allgatherv.cc index a3bfa55d93..6e038488bd 100644 --- a/test/mpi/test_allgatherv.cc +++ b/test/mpi/test_allgatherv.cc @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -61,6 +61,8 @@ TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) : UCC_MALLOC_CHECK(check_buf); fill_counts_and_displacements(size, count, counts, displacements); + args.mask |= UCC_COLL_ARGS_FIELD_FLAGS; + args.flags |= UCC_COLL_ARGS_FLAG_CONTIG_DST_BUFFER; if (!inplace) { UCC_CHECK(ucc_mc_alloc(&sbuf_mc_header, counts[rank] * dt_size, mem_type)); sbuf = sbuf_mc_header->addr; @@ -74,6 +76,7 @@ TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) : args.dst.info_v.displacements = (ucc_aint_t*)displacements; args.dst.info_v.datatype = dt; args.dst.info_v.mem_type = mem_type; + UCC_CHECK(set_input()); UCC_CHECK_SKIP(ucc_collective_init(&args, &req, team.team), test_skip); } diff --git a/tools/perf/ucc_pt_coll_allgatherv.cc b/tools/perf/ucc_pt_coll_allgatherv.cc index c6c18a7c5a..dbcb8456a5 100644 --- a/tools/perf/ucc_pt_coll_allgatherv.cc +++ b/tools/perf/ucc_pt_coll_allgatherv.cc @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -21,8 +21,8 @@ ucc_pt_coll_allgatherv::ucc_pt_coll_allgatherv(ucc_datatype_t dt, has_bw_ = false; root_shift_ = 0; - coll_args.mask = 0; - coll_args.flags = 0; + coll_args.mask |= UCC_COLL_ARGS_FIELD_FLAGS; + coll_args.flags |= UCC_COLL_ARGS_FLAG_IN_PLACE; coll_args.coll_type = UCC_COLL_TYPE_ALLGATHERV; coll_args.src.info.datatype = dt; coll_args.src.info.mem_type = mt;