diff --git a/src/Makefile.am b/src/Makefile.am index 47489c60b6..c505c31344 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -36,63 +36,63 @@ nobase_dist_libucc_la_HEADERS = \ ucc/api/ucc_version.h \ ucc/api/ucc_status.h -noinst_HEADERS = \ - core/ucc_global_opts.h \ - core/ucc_lib.h \ - core/ucc_context.h \ - core/ucc_team.h \ - core/ucc_ee.h \ - core/ucc_progress_queue.h \ - core/ucc_service_coll.h \ - core/ucc_dt.h \ - schedule/ucc_schedule.h \ - schedule/ucc_schedule_pipelined.h \ - coll_score/ucc_coll_score.h \ - utils/arch/aarch64/cpu.h \ - utils/arch/ppc64/cpu.h \ - utils/arch/riscv64/cpu.h \ - utils/arch/x86_64/cpu.h \ - utils/arch/cpu.h \ - utils/arch/cuda_def.h \ - utils/ucc_compiler_def.h \ - utils/ucc_log.h \ - utils/ucc_parser.h \ - utils/ucc_component.h \ - utils/ucc_datastruct.h \ - utils/ucc_math.h \ - utils/ucc_coll_utils.h \ - utils/ucc_list.h \ - utils/ucc_string.h \ - utils/ucc_queue.h \ - utils/ucc_proc_info.h \ - utils/khash.h \ - utils/ini.h \ - utils/ucc_spinlock.h \ - utils/ucc_mpool.h \ - utils/ucc_rcache.h \ - utils/profile/ucc_profile.h \ - utils/profile/ucc_profile_on.h \ - utils/profile/ucc_profile_off.h \ - utils/ucc_time.h \ - utils/ucc_sys.h \ - utils/ucc_assert.h \ - components/base/ucc_base_iface.h \ - components/cl/ucc_cl.h \ - components/cl/ucc_cl_log.h \ - components/cl/ucc_cl_type.h \ - components/tl/ucc_tl.h \ - components/tl/ucc_tl_log.h \ - components/mc/ucc_mc.h \ - components/mc/base/ucc_mc_base.h \ - components/mc/ucc_mc_log.h \ - components/ec/ucc_ec.h \ - components/ec/base/ucc_ec_base.h \ - components/ec/ucc_ec_log.h \ - coll_patterns/recursive_knomial.h \ - coll_patterns/sra_knomial.h \ - coll_patterns/bruck_alltoall.h \ - coll_patterns/two_tree.h \ - components/topo/ucc_topo.h \ +noinst_HEADERS = \ + core/ucc_global_opts.h \ + core/ucc_lib.h \ + core/ucc_context.h \ + core/ucc_team.h \ + core/ucc_ee.h \ + core/ucc_progress_queue.h \ + core/ucc_service_coll.h \ + core/ucc_dt.h \ + schedule/ucc_schedule.h \ + schedule/ucc_schedule_pipelined.h \ + coll_score/ucc_coll_score.h \ + utils/arch/aarch64/cpu.h \ + utils/arch/ppc64/cpu.h \ + utils/arch/riscv64/cpu.h \ + utils/arch/x86_64/cpu.h \ + utils/arch/cpu.h \ + utils/arch/cuda_def.h \ + utils/ucc_compiler_def.h \ + utils/ucc_log.h \ + utils/ucc_parser.h \ + utils/ucc_component.h \ + utils/ucc_datastruct.h \ + utils/ucc_math.h \ + utils/ucc_coll_utils.h \ + utils/ucc_list.h \ + utils/ucc_string.h \ + utils/ucc_queue.h \ + utils/ucc_proc_info.h \ + utils/khash.h \ + utils/ini.h \ + utils/ucc_spinlock.h \ + utils/ucc_mpool.h \ + utils/ucc_rcache.h \ + utils/profile/ucc_profile.h \ + utils/profile/ucc_profile_on.h \ + utils/profile/ucc_profile_off.h \ + utils/ucc_time.h \ + utils/ucc_sys.h \ + utils/ucc_assert.h \ + components/base/ucc_base_iface.h \ + components/cl/ucc_cl.h \ + components/cl/ucc_cl_log.h \ + components/cl/ucc_cl_type.h \ + components/tl/ucc_tl.h \ + components/tl/ucc_tl_log.h \ + components/mc/ucc_mc.h \ + components/mc/base/ucc_mc_base.h \ + components/mc/ucc_mc_log.h \ + components/ec/ucc_ec.h \ + components/ec/base/ucc_ec_base.h \ + components/ec/ucc_ec_log.h \ + coll_patterns/recursive_knomial.h \ + coll_patterns/sra_knomial.h \ + coll_patterns/bruck_alltoall.h \ + coll_patterns/double_binary_tree.h \ + components/topo/ucc_topo.h \ components/topo/ucc_sbgp.h libucc_la_SOURCES = \ diff --git a/src/coll_patterns/two_tree.h b/src/coll_patterns/double_binary_tree.h similarity index 72% rename from src/coll_patterns/two_tree.h rename to src/coll_patterns/double_binary_tree.h index 97ad88f1eb..47e2aed520 100644 --- a/src/coll_patterns/two_tree.h +++ b/src/coll_patterns/double_binary_tree.h @@ -4,8 +4,8 @@ * See file LICENSE for terms. */ -#ifndef TWO_TREE_H_ -#define TWO_TREE_H_ +#ifndef DOUBLE_BINARY_TREE_H_ +#define DOUBLE_BINARY_TREE_H_ enum { LEFT_CHILD, @@ -103,8 +103,8 @@ static inline int get_parent(int vsize, int vrank, int height, int troot) } } -static inline void ucc_two_tree_build_t2_mirror(ucc_dbt_single_tree_t t1, - ucc_dbt_single_tree_t *t2) +static inline void ucc_dbt_build_t2_mirror(ucc_dbt_single_tree_t t1, + ucc_dbt_single_tree_t *t2) { ucc_rank_t size = t1.size; ucc_dbt_single_tree_t t; @@ -123,8 +123,8 @@ static inline void ucc_two_tree_build_t2_mirror(ucc_dbt_single_tree_t t1, *t2 = t; } -static inline void ucc_two_tree_build_t2_shift(ucc_dbt_single_tree_t t1, - ucc_dbt_single_tree_t *t2) +static inline void ucc_dbt_build_t2_shift(ucc_dbt_single_tree_t t1, + ucc_dbt_single_tree_t *t2) { ucc_rank_t size = t1.size; ucc_dbt_single_tree_t t; @@ -143,12 +143,12 @@ static inline void ucc_two_tree_build_t2_shift(ucc_dbt_single_tree_t t1, *t2 = t; } -static inline void ucc_two_tree_build_t1(ucc_rank_t rank, ucc_rank_t size, - ucc_dbt_single_tree_t *t1) +static inline void ucc_dbt_build_t1(ucc_rank_t rank, ucc_rank_t size, + ucc_dbt_single_tree_t *t1) { - int height = get_height(rank); - ucc_rank_t root = get_root(size); - ucc_rank_t parent = get_parent(size, rank, height, root); + int height = get_height(rank); + ucc_rank_t root = get_root(size); + ucc_rank_t parent = get_parent(size, rank, height, root); get_children(size, rank, height, root, &t1->children[LEFT_CHILD], &t1->children[RIGHT_CHILD]); @@ -160,8 +160,8 @@ static inline void ucc_two_tree_build_t1(ucc_rank_t rank, ucc_rank_t size, t1->recv = 0; } -static inline ucc_rank_t ucc_two_tree_convert_rank_for_shift(ucc_rank_t rank, - ucc_rank_t size) +static inline ucc_rank_t ucc_dbt_convert_rank_for_shift(ucc_rank_t rank, + ucc_rank_t size) { ucc_rank_t i; for (i = 0; i < size; i++) { @@ -172,8 +172,8 @@ static inline ucc_rank_t ucc_two_tree_convert_rank_for_shift(ucc_rank_t rank, return i; } -static inline ucc_rank_t ucc_two_tree_convert_rank_for_mirror(ucc_rank_t rank, - ucc_rank_t size) +static inline ucc_rank_t ucc_dbt_convert_rank_for_mirror(ucc_rank_t rank, + ucc_rank_t size) { ucc_rank_t i; for (i = 0; i < size; i++) { @@ -184,27 +184,27 @@ static inline ucc_rank_t ucc_two_tree_convert_rank_for_mirror(ucc_rank_t rank, return i; } -static inline void ucc_two_tree_build_t2(ucc_rank_t rank, ucc_rank_t size, - ucc_dbt_single_tree_t *t2) { +static inline void ucc_dbt_build_t2(ucc_rank_t rank, ucc_rank_t size, + ucc_dbt_single_tree_t *t2) { ucc_rank_t temp_rank = (size % 2) ? - ucc_two_tree_convert_rank_for_shift(rank, size) : - ucc_two_tree_convert_rank_for_mirror(rank, size); + ucc_dbt_convert_rank_for_shift(rank, size) : + ucc_dbt_convert_rank_for_mirror(rank, size); ucc_dbt_single_tree_t t1_temp; - ucc_two_tree_build_t1(temp_rank, size, &t1_temp); + ucc_dbt_build_t1(temp_rank, size, &t1_temp); if (size % 2) { - ucc_two_tree_build_t2_shift(t1_temp, t2); + ucc_dbt_build_t2_shift(t1_temp, t2); } else { - ucc_two_tree_build_t2_mirror(t1_temp, t2); + ucc_dbt_build_t2_mirror(t1_temp, t2); } } -static inline void ucc_two_tree_build_trees(ucc_rank_t rank, ucc_rank_t size, - ucc_dbt_single_tree_t *t1, - ucc_dbt_single_tree_t *t2) +static inline void ucc_dbt_build_trees(ucc_rank_t rank, ucc_rank_t size, + ucc_dbt_single_tree_t *t1, + ucc_dbt_single_tree_t *t2) { - ucc_two_tree_build_t1(rank, size, t1); - ucc_two_tree_build_t2(rank, size, t2); + ucc_dbt_build_t1(rank, size, t1); + ucc_dbt_build_t2(rank, size, t2); } #endif diff --git a/src/components/tl/ucp/Makefile.am b/src/components/tl/ucp/Makefile.am index 78c2dfd157..b35578aa9f 100644 --- a/src/components/tl/ucp/Makefile.am +++ b/src/components/tl/ucp/Makefile.am @@ -50,7 +50,7 @@ bcast = \ bcast/bcast.c \ bcast/bcast_knomial.c \ bcast/bcast_sag_knomial.c \ - bcast/bcast_two_tree.c + bcast/bcast_dbt.c fanin = \ fanin/fanin.h \ diff --git a/src/components/tl/ucp/bcast/bcast.c b/src/components/tl/ucp/bcast/bcast.c index 071194ba11..8ba6698b3a 100644 --- a/src/components/tl/ucp/bcast/bcast.c +++ b/src/components/tl/ucp/bcast/bcast.c @@ -19,9 +19,9 @@ ucc_base_coll_alg_info_t .name = "sag_knomial", .desc = "recursive knomial scatter followed by knomial " "allgather (optimized for BW)"}, - [UCC_TL_UCP_BCAST_ALG_TWO_TREE] = - {.id = UCC_TL_UCP_BCAST_ALG_TWO_TREE, - .name = "two_tree", + [UCC_TL_UCP_BCAST_ALG_DBT] = + {.id = UCC_TL_UCP_BCAST_ALG_DBT, + .name = "dbt", .desc = "bcast over double binary tree where a leaf in one tree " "will be intermediate in other (optimized for latency)"}, [UCC_TL_UCP_BCAST_ALG_LAST] = { diff --git a/src/components/tl/ucp/bcast/bcast.h b/src/components/tl/ucp/bcast/bcast.h index da3f77a9cb..baaa40c313 100644 --- a/src/components/tl/ucp/bcast/bcast.h +++ b/src/components/tl/ucp/bcast/bcast.h @@ -11,7 +11,7 @@ enum { UCC_TL_UCP_BCAST_ALG_KNOMIAL, UCC_TL_UCP_BCAST_ALG_SAG_KNOMIAL, - UCC_TL_UCP_BCAST_ALG_TWO_TREE, + UCC_TL_UCP_BCAST_ALG_DBT, UCC_TL_UCP_BCAST_ALG_LAST }; @@ -48,7 +48,7 @@ ucc_status_t ucc_tl_ucp_bcast_sag_knomial_init(ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, ucc_coll_task_t **task_h); -ucc_status_t ucc_tl_ucp_bcast_two_tree_init( +ucc_status_t ucc_tl_ucp_bcast_dbt_init( ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, ucc_coll_task_t **task_h); diff --git a/src/components/tl/ucp/bcast/bcast_two_tree.c b/src/components/tl/ucp/bcast/bcast_dbt.c similarity index 75% rename from src/components/tl/ucp/bcast/bcast_two_tree.c rename to src/components/tl/ucp/bcast/bcast_dbt.c index a76ab7fdb3..7abeb8fd3d 100644 --- a/src/components/tl/ucp/bcast/bcast_two_tree.c +++ b/src/components/tl/ucp/bcast/bcast_dbt.c @@ -17,16 +17,16 @@ enum { TEST, }; -#define UCC_BCAST_TWO_TREE_CHECK_STATE(_p) \ +#define UCC_BCAST_DBT_CHECK_STATE(_p) \ case _p: \ goto _p; -#define UCC_BCAST_TWO_TREE_GOTO_STATE(_state) \ +#define UCC_BCAST_DBT_GOTO_STATE(_state) \ do { \ switch (_state) { \ - UCC_BCAST_TWO_TREE_CHECK_STATE(SEND_T1); \ - UCC_BCAST_TWO_TREE_CHECK_STATE(SEND_T2); \ - UCC_BCAST_TWO_TREE_CHECK_STATE(TEST); \ + UCC_BCAST_DBT_CHECK_STATE(SEND_T1); \ + UCC_BCAST_DBT_CHECK_STATE(SEND_T2); \ + UCC_BCAST_DBT_CHECK_STATE(TEST); \ }; \ } while (0) @@ -52,7 +52,7 @@ static void recv_completion_1(void *request, ucs_status_t status, { ucc_tl_ucp_task_t *task = (ucc_tl_ucp_task_t *)user_data; - task->bcast_two_tree.t1.recv++; + task->bcast_dbt.t1.recv++; recv_completion_common(request, status, info, user_data); } @@ -62,19 +62,19 @@ static void recv_completion_2(void *request, ucs_status_t status, { ucc_tl_ucp_task_t *task = (ucc_tl_ucp_task_t *)user_data; - task->bcast_two_tree.t2.recv++; + task->bcast_dbt.t2.recv++; recv_completion_common(request, status, info, user_data); } -void ucc_tl_ucp_bcast_two_tree_progress(ucc_coll_task_t *coll_task) +void ucc_tl_ucp_bcast_dbt_progress(ucc_coll_task_t *coll_task) { ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); ucc_tl_ucp_team_t *team = TASK_TEAM(task); ucc_rank_t rank = UCC_TL_TEAM_RANK(team); - ucc_dbt_single_tree_t t1 = task->bcast_two_tree.t1; - ucc_dbt_single_tree_t t2 = task->bcast_two_tree.t2; + ucc_dbt_single_tree_t t1 = task->bcast_dbt.t1; + ucc_dbt_single_tree_t t2 = task->bcast_dbt.t2; void *buffer = TASK_ARGS(task).src.info.buffer; ucc_memory_type_t mtype = TASK_ARGS(task).src.info.mem_type; ucc_datatype_t dt = TASK_ARGS(task).src.info.datatype; @@ -85,7 +85,7 @@ void ucc_tl_ucp_bcast_two_tree_progress(ucc_coll_task_t *coll_task) recv_completion_2}; uint32_t i; - UCC_BCAST_TWO_TREE_GOTO_STATE(task->bcast_two_tree.state); + UCC_BCAST_DBT_GOTO_STATE(task->bcast_dbt.state); if (rank != t1.root && rank != coll_root) { UCPCHECK_GOTO(ucc_tl_ucp_recv_cb(buffer, data_size, mtype, t1.parent, @@ -99,10 +99,10 @@ void ucc_tl_ucp_bcast_two_tree_progress(ucc_coll_task_t *coll_task) task, cb[1], (void *)task), task, out); } - task->bcast_two_tree.state = SEND_T1; + task->bcast_dbt.state = SEND_T1; SEND_T1: - if ((coll_root == rank) || (task->bcast_two_tree.t1.recv > 0)) { + if ((coll_root == rank) || (task->bcast_dbt.t1.recv > 0)) { for (i = 0; i < 2; i++) { if (t1.children[i] != -1 && t1.children[i] != coll_root) { UCPCHECK_GOTO(ucc_tl_ucp_send_nb(buffer, data_size, mtype, @@ -113,10 +113,10 @@ void ucc_tl_ucp_bcast_two_tree_progress(ucc_coll_task_t *coll_task) } else { goto out; } - task->bcast_two_tree.state = SEND_T2; + task->bcast_dbt.state = SEND_T2; SEND_T2: - if ((coll_root == rank) || (task->bcast_two_tree.t2.recv > 0)) { + if ((coll_root == rank) || (task->bcast_dbt.t2.recv > 0)) { for (i = 0; i < 2; i++) { if (t2.children[i] != -1 && t2.children[i] != coll_root) { UCPCHECK_GOTO(ucc_tl_ucp_send_nb(PTR_OFFSET(buffer, data_size), @@ -131,18 +131,18 @@ void ucc_tl_ucp_bcast_two_tree_progress(ucc_coll_task_t *coll_task) TEST: if (UCC_INPROGRESS == ucc_tl_ucp_test_send(task)) { - task->bcast_two_tree.state = TEST; + task->bcast_dbt.state = TEST; return; } task->super.status = UCC_OK; - UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_bcast_tow_tree_done", 0); + UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_bcast_dbt_done", 0); out: return; } -ucc_status_t ucc_tl_ucp_bcast_two_tree_start(ucc_coll_task_t *coll_task) +ucc_status_t ucc_tl_ucp_bcast_dbt_start(ucc_coll_task_t *coll_task) { ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); @@ -155,13 +155,13 @@ ucc_status_t ucc_tl_ucp_bcast_two_tree_start(ucc_coll_task_t *coll_task) size_t count = TASK_ARGS(task).src.info.count; size_t data_size = count * ucc_dt_size(dt) / 2; ucc_rank_t coll_root = (ucc_rank_t)TASK_ARGS(task).root; - ucc_rank_t t1_root = task->bcast_two_tree.t1.root; - ucc_rank_t t2_root = task->bcast_two_tree.t2.root; + ucc_rank_t t1_root = task->bcast_dbt.t1.root; + ucc_rank_t t2_root = task->bcast_dbt.t2.root; ucp_tag_recv_nbx_callback_t cb[2] = {recv_completion_1, recv_completion_2}; - task->bcast_two_tree.t1.recv = 0; - task->bcast_two_tree.t2.recv = 0; + task->bcast_dbt.t1.recv = 0; + task->bcast_dbt.t2.recv = 0; ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); if (rank == coll_root && coll_root != t1_root) { @@ -197,33 +197,33 @@ ucc_status_t ucc_tl_ucp_bcast_two_tree_start(ucc_coll_task_t *coll_task) } } - task->bcast_two_tree.state = RECV; - UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_bcast_two_tree_start", 0); + task->bcast_dbt.state = RECV; + UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_bcast_dbt_start", 0); return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); } -ucc_status_t ucc_tl_ucp_bcast_two_tree_finalize(ucc_coll_task_t *coll_task) +ucc_status_t ucc_tl_ucp_bcast_dbt_finalize(ucc_coll_task_t *coll_task) { return ucc_tl_ucp_coll_finalize(coll_task); } -ucc_status_t ucc_tl_ucp_bcast_two_tree_init( +ucc_status_t ucc_tl_ucp_bcast_dbt_init( ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, ucc_coll_task_t **task_h) { ucc_tl_ucp_team_t *tl_team; ucc_tl_ucp_task_t *task; - ucc_rank_t rank, size; - - task = ucc_tl_ucp_init_task(coll_args, team); - task->super.post = ucc_tl_ucp_bcast_two_tree_start; - task->super.progress = ucc_tl_ucp_bcast_two_tree_progress; - task->super.finalize = ucc_tl_ucp_bcast_two_tree_finalize; - tl_team = TASK_TEAM(task); - rank = UCC_TL_TEAM_RANK(tl_team); - size = UCC_TL_TEAM_SIZE(tl_team); - ucc_two_tree_build_trees(rank, size, &task->bcast_two_tree.t1, - &task->bcast_two_tree.t2); + ucc_rank_t rank, size; + + task = ucc_tl_ucp_init_task(coll_args, team); + task->super.post = ucc_tl_ucp_bcast_dbt_start; + task->super.progress = ucc_tl_ucp_bcast_dbt_progress; + task->super.finalize = ucc_tl_ucp_bcast_dbt_finalize; + tl_team = TASK_TEAM(task); + rank = UCC_TL_TEAM_RANK(tl_team); + size = UCC_TL_TEAM_SIZE(tl_team); + ucc_dbt_build_trees(rank, size, &task->bcast_dbt.t1, + &task->bcast_dbt.t2); *task_h = &task->super; return UCC_OK; diff --git a/src/components/tl/ucp/tl_ucp_coll.c b/src/components/tl/ucp/tl_ucp_coll.c index d477f5ae59..872f064d16 100644 --- a/src/components/tl/ucp/tl_ucp_coll.c +++ b/src/components/tl/ucp/tl_ucp_coll.c @@ -281,8 +281,8 @@ ucc_status_t ucc_tl_ucp_alg_id_to_init(int alg_id, const char *alg_id_str, case UCC_TL_UCP_BCAST_ALG_SAG_KNOMIAL: *init = ucc_tl_ucp_bcast_sag_knomial_init; break; - case UCC_TL_UCP_BCAST_ALG_TWO_TREE: - *init = ucc_tl_ucp_bcast_two_tree_init; + case UCC_TL_UCP_BCAST_ALG_DBT: + *init = ucc_tl_ucp_bcast_dbt_init; break; default: status = UCC_ERR_INVALID_PARAM; diff --git a/src/components/tl/ucp/tl_ucp_coll.h b/src/components/tl/ucp/tl_ucp_coll.h index 05fd4ff24d..16b932e70b 100644 --- a/src/components/tl/ucp/tl_ucp_coll.h +++ b/src/components/tl/ucp/tl_ucp_coll.h @@ -11,7 +11,7 @@ #include "tl_ucp.h" #include "schedule/ucc_schedule_pipelined.h" #include "coll_patterns/recursive_knomial.h" -#include "coll_patterns/two_tree.h" +#include "coll_patterns/double_binary_tree.h" #include "components/mc/base/ucc_mc_base.h" #include "components/ec/ucc_ec.h" #include "tl_ucp_tag.h" @@ -188,7 +188,7 @@ typedef struct ucc_tl_ucp_task { ucc_dbt_single_tree_t t1; ucc_dbt_single_tree_t t2; int state; - } bcast_two_tree; + } bcast_dbt; struct { ucc_rank_t dist; ucc_rank_t max_dist; @@ -200,11 +200,6 @@ typedef struct ucc_tl_ucp_task { ucc_ee_executor_task_t *etask; ucc_ee_executor_t *executor; } reduce_kn; - struct { - ucc_dbt_single_tree_t t1; - ucc_dbt_single_tree_t t2; - int state; - } reduce_two_tree; struct { ucc_rank_t dist; ucc_rank_t max_dist; diff --git a/test/gtest/coll/test_bcast.cc b/test/gtest/coll/test_bcast.cc index b66e416517..6d80816a31 100644 --- a/test/gtest/coll/test_bcast.cc +++ b/test/gtest/coll/test_bcast.cc @@ -8,6 +8,7 @@ using Param_0 = std::tuple; using Param_1 = std::tuple; +using Param_2 = std::tuple; class test_bcast : public UccCollArgs, public ucc::test { @@ -241,116 +242,49 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values(1,3,65536), // count ::testing::Values(0,1))); // root -class test_bcast_alg : public test_bcast +class test_bcast_alg : public test_bcast, + public ::testing::WithParamInterface {}; -UCC_TEST_F(test_bcast_alg, 2step) { - int n_procs = 15; - ucc_job_env_t env = {{"UCC_CL_HIER_TUNE", "bcast:@2step:0-inf:inf"}, - {"UCC_CLS", "all"}}; - UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); - UccTeam_h team = job.create_team(n_procs); - int repeat = 1; +UCC_TEST_P(test_bcast_alg,) { + const ucc_memory_type_t mt = std::get<0>(GetParam()); + const ucc_job_env_t env = std::get<1>(GetParam()); + const int count = std::get<2>(GetParam()); + const int n_procs = std::get<3>(GetParam()); + UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); + UccTeam_h team = job.create_team(n_procs); + int repeat = 1; UccCollCtxVec ctxs; - std::vector mt = {UCC_MEMORY_TYPE_HOST}; - if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA)) { - mt.push_back(UCC_MEMORY_TYPE_CUDA); - } - if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA_MANAGED)) { - mt.push_back(UCC_MEMORY_TYPE_CUDA_MANAGED); - } - - for (auto count : {8, 65536}) { - for (int root = 0; root < n_procs; root++) { - for (auto m : mt) { - this->set_root(root); - SET_MEM_TYPE(m); - this->data_init(n_procs, UCC_DT_INT8, count, ctxs, false); - UccReq req(team, ctxs); - - for (auto i = 0; i < repeat; i++) { - req.start(); - req.wait(); - EXPECT_EQ(true, this->data_validate(ctxs)); - this->reset(ctxs); - } - this->data_fini(ctxs); - } + SET_MEM_TYPE(mt); + for (int root = 0; root < n_procs; root++) { + this->set_root(root); + this->data_init(n_procs, UCC_DT_INT8, count, ctxs, false); + UccReq req(team, ctxs); + + for (auto i = 0; i < repeat; i++) { + req.start(); + req.wait(); + EXPECT_EQ(true, this->data_validate(ctxs)); + this->reset(ctxs); } + this->data_fini(ctxs); } } -UCC_TEST_F(test_bcast_alg, two_tree_odd_shift) { - int n_procs = 15; - ucc_job_env_t env = {{"UCC_TL_UCP_TUNE", "bcast:@two_tree:0-inf:inf"}, - {"UCC_CLS", "basic"}}; - UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); - UccTeam_h team = job.create_team(n_procs); - int repeat = 1; - UccCollCtxVec ctxs; - std::vector mt = {UCC_MEMORY_TYPE_HOST}; - - if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA)) { - mt.push_back(UCC_MEMORY_TYPE_CUDA); - } - if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA_MANAGED)) { - mt.push_back(UCC_MEMORY_TYPE_CUDA_MANAGED); - } - - for (auto count : {8, 65536}) { - for (int root = 0; root < n_procs; root++) { - for (auto m : mt) { - this->set_root(root); - SET_MEM_TYPE(m); - this->data_init(n_procs, UCC_DT_INT8, count, ctxs, false); - UccReq req(team, ctxs); - - for (auto i = 0; i < repeat; i++) { - req.start(); - req.wait(); - EXPECT_EQ(true, this->data_validate(ctxs)); - this->reset(ctxs); - } - this->data_fini(ctxs); - } - } - } -} - -UCC_TEST_F(test_bcast_alg, two_tree_even_mirror) { - int n_procs = 16; - ucc_job_env_t env = {{"UCC_TL_UCP_TUNE", "bcast:@two_tree:0-inf:inf"}, - {"UCC_CLS", "basic"}}; - UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); - UccTeam_h team = job.create_team(n_procs); - int repeat = 1; - UccCollCtxVec ctxs; - std::vector mt = {UCC_MEMORY_TYPE_HOST}; - - if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA)) { - mt.push_back(UCC_MEMORY_TYPE_CUDA); - } - if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA_MANAGED)) { - mt.push_back(UCC_MEMORY_TYPE_CUDA_MANAGED); - } - - for (auto count : {8, 65536}) { - for (int root = 0; root < n_procs; root++) { - for (auto m : mt) { - this->set_root(root); - SET_MEM_TYPE(m); - this->data_init(n_procs, UCC_DT_INT8, count, ctxs, false); - UccReq req(team, ctxs); - - for (auto i = 0; i < repeat; i++) { - req.start(); - req.wait(); - EXPECT_EQ(true, this->data_validate(ctxs)); - this->reset(ctxs); - } - this->data_fini(ctxs); - } - } - } -} +ucc_job_env_t two_step_env = {{"UCC_CL_HIER_TUNE", "bcast:@2step:0-inf:inf"}, + {"UCC_CLS", "all"}}; +ucc_job_env_t dbt_env = {{"UCC_TL_UCP_TUNE", "bcast:@dbt:0-inf:inf"}, + {"UCC_CLS", "basic"}}; +INSTANTIATE_TEST_CASE_P( + , test_bcast_alg, + ::testing::Combine( +#ifdef HAVE_CUDA + ::testing::Values(UCC_MEMORY_TYPE_HOST, UCC_MEMORY_TYPE_CUDA, + UCC_MEMORY_TYPE_CUDA_MANAGED), +#else + ::testing::Values(UCC_MEMORY_TYPE_HOST), +#endif + ::testing::Values(two_step_env, dbt_env), //env + ::testing::Values(8, 65536), // count + ::testing::Values(15,16))); // n_procs