Skip to content

Commit

Permalink
TL/UCP: reduce dbt
Browse files Browse the repository at this point in the history
  • Loading branch information
shimmybalsam authored and Sergei-Lebedev committed Dec 8, 2023
1 parent 28455f6 commit ae8764a
Show file tree
Hide file tree
Showing 8 changed files with 562 additions and 62 deletions.
34 changes: 28 additions & 6 deletions src/coll_patterns/double_binary_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typedef struct ucc_dbt_single_tree {
ucc_rank_t root;
ucc_rank_t parent;
ucc_rank_t children[2];
int n_children;
int height;
int recv;
} ucc_dbt_single_tree_t;
Expand Down Expand Up @@ -86,6 +87,21 @@ static inline void get_children(ucc_rank_t size, ucc_rank_t rank, int height,
*r_c = get_right_child(size, rank, height, root);
}

static inline int get_n_children(ucc_rank_t l_c, ucc_rank_t r_c)
{
int n_children = 0;

if (l_c != UCC_RANK_INVALID) {
n_children++;
}

if (r_c != UCC_RANK_INVALID) {
n_children++;
}

return n_children;
}

static inline ucc_rank_t get_parent(int vsize, int vrank, int height, int troot)
{
if (vrank == troot) {
Expand Down Expand Up @@ -121,6 +137,8 @@ static inline void ucc_dbt_build_t2_mirror(ucc_dbt_single_tree_t t1,
t.children[RIGHT_CHILD] = (t1.children[LEFT_CHILD] == UCC_RANK_INVALID) ?
UCC_RANK_INVALID :
size - 1 - t1.children[LEFT_CHILD];
t.n_children = get_n_children(t.children[LEFT_CHILD],
t.children[RIGHT_CHILD]);
t.recv = 0;

*t2 = t;
Expand All @@ -144,6 +162,8 @@ static inline void ucc_dbt_build_t2_shift(ucc_dbt_single_tree_t t1,
t.children[RIGHT_CHILD] = (t1.children[RIGHT_CHILD] == UCC_RANK_INVALID) ?
UCC_RANK_INVALID :
(t1.children[RIGHT_CHILD] + 1) % size;
t.n_children = get_n_children(t.children[LEFT_CHILD],
t.children[RIGHT_CHILD]);
t.recv = 0;

*t2 = t;
Expand All @@ -158,12 +178,14 @@ static inline void ucc_dbt_build_t1(ucc_rank_t rank, ucc_rank_t size,

get_children(size, rank, height, root, &t1->children[LEFT_CHILD],
&t1->children[RIGHT_CHILD]);
t1->height = height;
t1->parent = parent;
t1->size = size;
t1->rank = rank;
t1->root = root;
t1->recv = 0;
t1->n_children = get_n_children(t1->children[LEFT_CHILD],
t1->children[RIGHT_CHILD]);
t1->height = height;
t1->parent = parent;
t1->size = size;
t1->rank = rank;
t1->root = root;
t1->recv = 0;
}

static inline ucc_rank_t ucc_dbt_convert_rank_for_shift(ucc_rank_t rank,
Expand Down
3 changes: 2 additions & 1 deletion src/components/tl/ucp/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ gatherv = \
reduce = \
reduce/reduce.h \
reduce/reduce.c \
reduce/reduce_knomial.c
reduce/reduce_knomial.c \
reduce/reduce_dbt.c

reduce_scatter = \
reduce_scatter/reduce_scatter.h \
Expand Down
18 changes: 18 additions & 0 deletions src/components/tl/ucp/reduce/reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ ucc_base_coll_alg_info_t
.name = "knomial",
.desc = "reduce over knomial tree with arbitrary radix "
"(optimized for latency)"},
[UCC_TL_UCP_REDUCE_ALG_DBT] =
{.id = UCC_TL_UCP_REDUCE_ALG_DBT,
.name = "dbt",
.desc = "bcast over double binary tree where a leaf in one tree "
"will be intermediate in other (optimized for BW)"},
[UCC_TL_UCP_REDUCE_ALG_LAST] = {
.id = 0, .name = NULL, .desc = NULL}};

Expand Down Expand Up @@ -66,3 +71,16 @@ ucc_status_t ucc_tl_ucp_reduce_init(ucc_tl_ucp_task_t *task)

return status;
}

ucc_status_t ucc_tl_ucp_reduce_knomial_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task_h)
{
ucc_tl_ucp_task_t *task;
ucc_status_t status;

task = ucc_tl_ucp_init_task(coll_args, team);
status = ucc_tl_ucp_reduce_init(task);
*task_h = &task->super;
return status;
}
24 changes: 24 additions & 0 deletions src/components/tl/ucp/reduce/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,16 @@

enum {
UCC_TL_UCP_REDUCE_ALG_KNOMIAL,
UCC_TL_UCP_REDUCE_ALG_DBT,
UCC_TL_UCP_REDUCE_ALG_LAST
};

extern ucc_base_coll_alg_info_t
ucc_tl_ucp_reduce_algs[UCC_TL_UCP_REDUCE_ALG_LAST + 1];

#define UCC_TL_UCP_REDUCE_DEFAULT_ALG_SELECT_STR \
"reduce:0-inf:@0"

/* A set of convenience macros used to implement sw based progress
of the reduce algorithm that uses kn pattern */
enum {
Expand All @@ -36,12 +40,32 @@ enum {
}; \
} while (0)


static inline int ucc_tl_ucp_reduce_alg_from_str(const char *str)
{
int i;
for (i = 0; i < UCC_TL_UCP_REDUCE_ALG_LAST; i++) {
if (0 == strcasecmp(str, ucc_tl_ucp_reduce_algs[i].name)) {
break;
}
}
return i;
}

ucc_status_t ucc_tl_ucp_reduce_init(ucc_tl_ucp_task_t *task);

ucc_status_t ucc_tl_ucp_reduce_knomial_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task_h);

ucc_status_t ucc_tl_ucp_reduce_knomial_start(ucc_coll_task_t *task);

void ucc_tl_ucp_reduce_knomial_progress(ucc_coll_task_t *task);

ucc_status_t ucc_tl_ucp_reduce_knomial_finalize(ucc_coll_task_t *task);

ucc_status_t ucc_tl_ucp_reduce_dbt_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task_h);

#endif
Loading

0 comments on commit ae8764a

Please sign in to comment.