Skip to content

Commit

Permalink
seperate loopbacks per algo
Browse files Browse the repository at this point in the history
  • Loading branch information
yaeliyac committed Oct 2, 2024
1 parent 5b34c1a commit 5e852a7
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 47 deletions.
20 changes: 0 additions & 20 deletions src/components/tl/ucp/allgather/allgather.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,6 @@

#define ALLGATHER_MAX_PATTERN_SIZE (sizeof(UCC_TL_UCP_ALLGATHER_DEFAULT_ALG_SELECT_STR))

/*--------------YAELIS FUNCTION---------------------*/

ucc_status_t new_ucp_tl_self_copy_nb(void *dst, void *src, size_t len, ucc_memory_type_t dst_mem,ucc_memory_type_t src_mem, ucc_rank_t rank, ucc_tl_ucp_team_t *team, ucc_tl_ucp_task_t *task){
ucc_status_t status;
status = ucc_tl_ucp_send_nb(src, len, src_mem, rank, team, task);
if (ucc_unlikely(UCC_OK != status)) {
printf("\n allgather.c line 18 \n");
task->super.status = status;
return status;
}
status = ucc_tl_ucp_recv_nb(dst, len, dst_mem, rank, team, task);
if (ucc_unlikely(UCC_OK != status)) {
printf("\n allgather.c line 24 \n");
task->super.status = status;
return status;
}
return UCC_OK;
}

/*--------------YAELIS FUNCTION---------------------*/
ucc_base_coll_alg_info_t
ucc_tl_ucp_allgather_algs[UCC_TL_UCP_ALLGATHER_ALG_LAST + 1] = {
[UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL] =
Expand Down
7 changes: 0 additions & 7 deletions src/components/tl/ucp/allgather/allgather.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
#include "tl_ucp_sendrecv.h"



#define NEW_MEMCPY(use_cuda, dst, src, len, dst_mem_type, src_mem_type, rank, team, task) \
((use_cuda) ? ucc_mc_memcpy(dst, src, len, dst_mem_type, src_mem_type) : \
new_ucp_tl_self_copy_nb(dst, src, len, dst_mem_type, src_mem_type, rank, team, task))



enum {
UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL,
UCC_TL_UCP_ALLGATHER_ALG_RING,
Expand Down
2 changes: 2 additions & 0 deletions src/components/tl/ucp/allgather/allgather_bruck.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ ucc_status_t ucc_tl_ucp_allgather_bruck_start(ucc_coll_task_t *coll_task)
/* initial step: copy data on non root ranks to the beginning of buffer */

uint32_t USE_CUDA = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_cuda;

if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
// not inplace: copy chunk from source buff to beginning of receive
if(USE_CUDA){
Expand All @@ -254,6 +255,7 @@ ucc_status_t ucc_tl_ucp_allgather_bruck_start(ucc_coll_task_t *coll_task)
}

} else if (trank != 0) {
printf(" inplace\n");
// inplace: copy chunk to the begin
if(USE_CUDA){
status = ucc_mc_memcpy(rbuf, PTR_OFFSET(rbuf, data_size * trank),
Expand Down
10 changes: 4 additions & 6 deletions src/components/tl/ucp/allgather/allgather_knomial.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)


uint32_t USE_CUDA = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_cuda;
if(rank==0){
printf("knomial, rank0 start\n");
}

UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_kn_start", 0);
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS);
task->allgather_kn.etask = NULL;
Expand Down Expand Up @@ -239,9 +237,9 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)
} else {
/* Loopback */
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(args->src.info.buffer, args->src.info.count * ucc_dt_size(args->src.info.datatype),
args->src.info.mem_type, rank, team, task),task, out2);
args->src.info.mem_type, rank, team, task),task, enqueue);
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(args->dst.info.buffer, offset), args->src.info.count * ucc_dt_size(args->src.info.datatype),
args->dst.info.mem_type, rank, team, task),task, out2);
args->dst.info.mem_type, rank, team, task),task, enqueue);
}
}

Expand All @@ -256,7 +254,7 @@ ucc_status_t ucc_tl_ucp_allgather_knomial_start(ucc_coll_task_t *coll_task)
ucc_knomial_pattern_loop_rank(p, rank),
p->radix, 0);
}
out2:
enqueue:
task->allgather_kn.sbuf = PTR_OFFSET(args->dst.info.buffer, offset);
return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super);
}
Expand Down
24 changes: 12 additions & 12 deletions src/components/tl/ucp/allgather/allgather_neighbor.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,27 +145,27 @@ ucc_status_t ucc_tl_ucp_allgather_neighbor_start(ucc_coll_task_t *coll_task)
ucc_rank_t neighbor;
void *tmprecv, *tmpsend;


UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_allgather_neighbor_start",
0);
ucc_tl_ucp_task_reset(task, UCC_INPROGRESS);

uint32_t USE_CUDA = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_cuda;

if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
/*
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
if(!USE_CUDA){
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
data_size, rmem, smem);
if (ucc_unlikely(UCC_OK != status)) {
return status;
}
*/
status = NEW_MEMCPY(USE_CUDA, PTR_OFFSET(rbuf, data_size * trank), sbuf, data_size, rmem, smem, trank, team, task);
if (ucc_unlikely(UCC_OK != status)) {
printf("error neighbor line 162\n");
return status;
if (ucc_unlikely(UCC_OK != status)) {
return status;
}
} else {
/* Loopback */
UCPCHECK_GOTO(ucc_tl_ucp_send_nb(sbuf, data_size, smem, trank, team, task),task, tmp);
UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, data_size * trank), data_size, rmem, trank, team, task),task, tmp);
}
}

tmp:
if (trank % 2) {
neighbor = (trank - 1 + tsize) % tsize;
} else {
Expand Down
1 change: 0 additions & 1 deletion src/components/tl/ucp/allgather/allgather_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ ucc_status_t ucc_tl_ucp_allgather_ring_start(ucc_coll_task_t *coll_task)

if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
block = task->allgather_ring.get_send_block(&task->subset, trank, tsize, 0);

if(USE_CUDA){
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * block),
sbuf, data_size, rmem, smem);
Expand Down
9 changes: 8 additions & 1 deletion src/components/tl/ucp/allgather/allgather_sparbit.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,14 @@ ucc_status_t ucc_tl_ucp_allgather_sparbit_start(ucc_coll_task_t *coll_task)
task->allgather_sparbit.data_expected = 1;

uint32_t USE_CUDA = UCC_TL_UCP_TEAM_LIB(team)->cfg.allgather_use_cuda;

if(trank == 0){
printf("\nin sparbit using: ");
if(USE_CUDA){
printf("cuda\n");
} else {
printf("loop\n");
}
}
if (!UCC_IS_INPLACE(TASK_ARGS(task))) {
if(USE_CUDA){
status = ucc_mc_memcpy(PTR_OFFSET(rbuf, data_size * trank), sbuf,
Expand Down

0 comments on commit 5e852a7

Please sign in to comment.