Skip to content

Commit

Permalink
Merge branch 'master' into sharp-oob
Browse files Browse the repository at this point in the history
  • Loading branch information
manjugv authored Mar 15, 2023
2 parents b9313fa + 45a0b49 commit 6a35346
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 104 deletions.
2 changes: 1 addition & 1 deletion .azure/azure-pipelines-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ stages:
--prefix=$(Build.Repository.LocalPath)/install --enable-gtest
make -j install
displayName: Build ucc artifact
timeoutInMinutes: 50
timeoutInMinutes: 60
- bash: |
cd build
make gtest
Expand Down
19 changes: 19 additions & 0 deletions src/coll_patterns/recursive_knomial.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,25 @@ ucc_knomial_pattern_get_min_radix(ucc_kn_radix_t cfg_radix,
return radix;
}

/* Calculates for each rank at which distance it should recieve */
static inline ucc_rank_t
ucc_knomial_calc_recv_dist(ucc_rank_t team_size, ucc_rank_t rank,
ucc_rank_t radix, ucc_rank_t root)
{
if (rank == root) {
return 0;
}
ucc_rank_t root_base = 0 ;
ucc_rank_t dist = 1;
while (dist <= team_size) {
if (rank < root_base + radix * dist) {
break;
}
dist *= radix;
}
return dist;
}

/* A set of convenience macros used to implement sw based progress
of the algorithms that use kn pattern */
enum {
Expand Down
38 changes: 10 additions & 28 deletions src/coll_patterns/sra_knomial.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,45 +90,27 @@ ucc_sra_kn_get_offset_and_seglen(size_t count, size_t dt_size, ucc_rank_t rank,
ptrdiff_t *offset, size_t *seglen)
{
ptrdiff_t _offset = 0;
size_t block_count = count;
ucc_rank_t step_radix = 0;
size_t my_seg_len = 0;
ucc_rank_t k, r, peer, my_si;
ucc_rank_t my_si, step_radix;
size_t my_seg_offset;
ucc_knomial_pattern_t p;

ucc_knomial_pattern_init(size, rank, radix, &p);

if (KN_NODE_EXTRA == p.node_type) {
if (offset)
*offset = 0;
if (seglen)
*seglen = count;
return;
goto out;
}

while (!ucc_knomial_pattern_loop_done(&p)) {
r = 0;
for (k = 1; k < p.radix; k++) {
peer = ucc_knomial_pattern_get_loop_peer(&p, rank, k);
if (peer == UCC_KN_PEER_NULL)
continue;
r++;
}
step_radix = r + 1;
my_si = ucc_kn_compute_seg_index(rank, p.radix_pow, &p);
my_seg_offset =
ucc_sra_kn_compute_seg_offset(block_count, step_radix, my_si);
step_radix = ucc_kn_compute_step_radix(&p);
my_si = ucc_kn_compute_seg_index(rank, p.radix_pow, &p);
my_seg_offset = ucc_sra_kn_compute_seg_offset(count, step_radix, my_si);
count = ucc_sra_kn_compute_seg_size(count, step_radix, my_si);
_offset += my_seg_offset * dt_size;
if (!ucc_knomial_pattern_loop_last_iteration(&p)) {
block_count =
ucc_sra_kn_compute_seg_size(block_count, step_radix, my_si);
}
ucc_knomial_pattern_next_iteration(&p);
}
if (step_radix) {
my_seg_len =
ucc_sra_kn_compute_seg_size(block_count, step_radix, my_si);
}
my_seg_len = count;

out:
if (offset)
*offset = _offset;
if (seglen)
Expand Down
41 changes: 30 additions & 11 deletions src/components/tl/ucp/allgather/allgather_knomial.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* a new virtual rank number - "vrank".
* As such allgather must keep to this ranking to be aligned with scatter.
*/

void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
{
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task,
Expand All @@ -51,7 +52,7 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
size_t local = GET_LOCAL_COUNT(args, size, rank);
void *sbuf;
ptrdiff_t peer_seg_offset, local_seg_offset;
ucc_rank_t peer;
ucc_rank_t peer, peer_dist;
ucc_kn_radix_t loop_step;
size_t peer_seg_count, local_seg_count;
ucc_status_t status;
Expand Down Expand Up @@ -103,6 +104,13 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
peer = ucc_knomial_pattern_get_loop_peer(p, rank, loop_step);
if (peer == UCC_KN_PEER_NULL)
continue;
if (coll_task->bargs.args.coll_type == UCC_COLL_TYPE_BCAST) {
peer_dist = ucc_knomial_calc_recv_dist(size - p->n_extra,
ucc_knomial_pattern_loop_rank(p, peer), p->radix, 0);
if (peer_dist < task->allgather_kn.recv_dist) {
continue;
}
}

UCPCHECK_GOTO(ucc_tl_ucp_send_nb(sbuf, local_seg_count * dt_size,
mem_type,
Expand All @@ -118,14 +126,21 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
ucc_kn_ag_pattern_peer_seg(peer, p, &peer_seg_count,
&peer_seg_offset);

if (coll_task->bargs.args.coll_type == UCC_COLL_TYPE_BCAST) {
peer_dist = ucc_knomial_calc_recv_dist(size - p->n_extra,
ucc_knomial_pattern_loop_rank(p, peer), p->radix, 0);
if (peer_dist > task->allgather_kn.recv_dist) {
continue;
}
}
UCPCHECK_GOTO(
ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, peer_seg_offset * dt_size),
peer_seg_count * dt_size, mem_type,
INV_VRANK(peer, broot, size), team, task),
task, out);
}
UCC_KN_PHASE_LOOP:
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
if (UCC_INPROGRESS == ucc_tl_ucp_test_recv(task)) {
SAVE_STATE(UCC_KN_PHASE_LOOP);
return;
}
Expand All @@ -138,8 +153,6 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
mem_type, INV_VRANK(peer, broot, size),
team, task),
task, out);
} else {
goto out;
}
UCC_KN_PHASE_PROXY:
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
Expand All @@ -148,19 +161,21 @@ void ucc_tl_ucp_allgather_knomial_progress(ucc_coll_task_t *coll_task)
}

out:
ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task));
task->super.status = UCC_OK;
UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_done", 0);
}

ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)
{
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
ucc_coll_args_t *args = &TASK_ARGS(task);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
ucc_coll_type_t ct = args->coll_type;
ucc_rank_t size = UCC_TL_TEAM_SIZE(team);
ucc_kn_radix_t radix = task->allgather_kn.p.radix;
ucc_rank_t rank = VRANK(UCC_TL_TEAM_RANK(team),
ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t);
ucc_coll_args_t *args = &TASK_ARGS(task);
ucc_tl_ucp_team_t *team = TASK_TEAM(task);
ucc_coll_type_t ct = args->coll_type;
ucc_rank_t size = UCC_TL_TEAM_SIZE(team);
ucc_kn_radix_t radix = task->allgather_kn.p.radix;
ucc_knomial_pattern_t *p = &task->allgather_kn.p;
ucc_rank_t rank = VRANK(UCC_TL_TEAM_RANK(team),
ct == UCC_COLL_TYPE_BCAST ? args->root : 0,
size);
ucc_status_t status;
Expand Down Expand Up @@ -201,6 +216,10 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)
offset = ucc_sra_kn_get_offset(args->dst.info.count,
ucc_dt_size(args->dst.info.datatype), rank,
size, radix);
task->allgather_kn.recv_dist = ucc_knomial_calc_recv_dist(
size - p->n_extra,
ucc_knomial_pattern_loop_rank(p, rank),
p->radix, 0);
}
task->allgather_kn.sbuf = PTR_OFFSET(args->dst.info.buffer, offset);

Expand Down
5 changes: 3 additions & 2 deletions src/components/tl/ucp/bcast/bcast.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -17,8 +17,9 @@ enum {
extern ucc_base_coll_alg_info_t
ucc_tl_ucp_bcast_algs[UCC_TL_UCP_BCAST_ALG_LAST + 1];

/* SAG bcast supports team size 2, but Knomial is always better in this case */
#define UCC_TL_UCP_BCAST_DEFAULT_ALG_SELECT_STR \
"bcast:0-32k:@0#bcast:32k-inf:@1"
"bcast:0-inf:[2-2]:@0#bcast:0-32k:[3-inf]:@0#bcast:32k-inf:[3-inf]:@1"

static inline int ucc_tl_ucp_bcast_alg_from_str(const char *str)
{
Expand Down
6 changes: 5 additions & 1 deletion src/components/tl/ucp/bcast/bcast_knomial.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,15 @@ void ucc_tl_ucp_bcast_knomial_progress(ucc_coll_task_t *coll_task)
}
}
dist /= radix;
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
if (UCC_INPROGRESS == ucc_tl_ucp_test_recv(task)) {
task->bcast_kn.dist = dist;
return;
}
}
if (UCC_INPROGRESS == ucc_tl_ucp_test(task)) {
task->bcast_kn.dist = dist;
return;
}

ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task));
task->super.status = UCC_OK;
Expand Down
Loading

0 comments on commit 6a35346

Please sign in to comment.