Skip to content

Commit

Permalink
Team create
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Sarkauskas committed Mar 25, 2024
1 parent f6baf0a commit c264bb7
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 54 deletions.
2 changes: 2 additions & 0 deletions src/components/cl/doca_urom/cl_doca_urom.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct context_create_result {
/* UCC team create result */
struct team_create_result {
void *team; /* Pointer to UCC team */
int status; /* nick: 0=nothing, 1=team create in progress, 2=team create done */
};

/* UCC collective result */
Expand Down Expand Up @@ -131,6 +132,7 @@ typedef struct ucc_cl_doca_urom_team {
unsigned n_teams;
ucc_coll_score_t *score;
ucc_score_map_t *score_map;
union doca_data cookie;
} ucc_cl_doca_urom_team_t;
UCC_CLASS_DECLARE(ucc_cl_doca_urom_team_t, ucc_base_context_t *,
const ucc_base_team_params_t *);
Expand Down
127 changes: 73 additions & 54 deletions src/components/cl/doca_urom/cl_doca_urom_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,39 @@
#include "utils/ucc_malloc.h"
#include "core/ucc_team.h"

#include "contrib/worker_ucc.h"

/*
* UCC team create callback
*
* @result [in]: task result
* @cookie [in]: program cookie
* @dpu_worker_id [in]: UROM DPU worker id
* @team [in]: pointer to UCC team
*/
static void urom_ucc_team_create_finished(doca_error_t result,
union doca_data cookie,
uint64_t dpu_worker_id,
void *team)
{
struct ucc_result *res = (struct ucc_result *)cookie.ptr;
if (res == NULL)
return;

res->dpu_worker_id = dpu_worker_id;
res->result = result;
res->team_create.team = team;
res->team_create.status = 2; // set done
}

UCC_CLASS_INIT_FUNC(ucc_cl_doca_urom_team_t, ucc_base_context_t *cl_context,
const ucc_base_team_params_t *params)
{
ucc_cl_doca_urom_context_t *ctx =
ucc_derived_of(cl_context, ucc_cl_doca_urom_context_t);
//ucc_cl_doca_urom_lib_t *doca_urom_lib = ucc_derived_of(ctx->super.super.lib, ucc_cl_doca_urom_lib_t);
//doca_error_t doca_urom_status;
ucc_status_t status;/*
doca_urom_worker_cmd_t team_cmd = {
.cmd_type = UROM_WORKER_CMD_UCC,
.ucc.dpu_worker_id = ctx->ctx_rank,
.ucc.cmd_type = UROM_WORKER_CMD_UCC_TEAM_CREATE,
// FIXME: proper way: use ec map.. for now assume linear
.ucc.team_create_cmd =
{
.start = 0,
.stride = 1,
.size = params->size,
},
};*/
ucc_cl_doca_urom_lib_t *doca_urom_lib = ucc_derived_of(ctx->super.super.lib, ucc_cl_doca_urom_lib_t);
ucc_status_t status;
doca_error_t result = DOCA_SUCCESS;

UCC_CLASS_CALL_SUPER_INIT(ucc_cl_team_t, &ctx->super, params);
self->teams = (ucc_team_h **)ucc_malloc(sizeof(ucc_team_h *) * 16);
Expand All @@ -38,12 +51,22 @@ UCC_CLASS_INIT_FUNC(ucc_cl_doca_urom_team_t, ucc_base_context_t *cl_context,
}
self->n_teams = 0;
self->score_map = NULL;
/*
doca_urom_status = doca_urom_worker_push_cmdq(doca_urom_lib->doca_urom_ctx.doca_urom_worker, 0, &team_cmd);
if (UROM_OK != doca_urom_status) {
cl_error(cl_context->lib, "failed to create team");
return UCC_ERR_NO_MESSAGE;
}*/

result = doca_urom_ucc_task_team_create(doca_urom_lib->urom_ctx.urom_worker,
self->cookie,
doca_urom_lib->urom_ctx.ctx_rank,
0,
1,
params->params.oob.n_oob_eps,
doca_urom_lib->urom_ctx.urom_ucc_context,
urom_ucc_team_create_finished);
if (result != DOCA_SUCCESS) {
cl_error(cl_context->lib, "Failed to create UCC team task");
return UCC_ERR_NO_RESOURCE;
}

((struct ucc_result *)self->cookie.ptr)->team_create.status = 1; // set in progress

cl_debug(cl_context->lib, "posted cl team: %p", self);
return UCC_OK;
}
Expand All @@ -63,41 +86,37 @@ ucc_status_t ucc_cl_doca_urom_team_destroy(ucc_base_team_t *cl_team)

ucc_status_t ucc_cl_doca_urom_team_create_test(ucc_base_team_t *cl_team)
{
//ucc_cl_doca_urom_team_t *team = ucc_derived_of(cl_team, ucc_cl_doca_urom_team_t);
//ucc_cl_doca_urom_context_t *ctx = UCC_CL_DOCA_UROM_TEAM_CTX(team);
//ucc_cl_doca_urom_lib_t *doca_urom_lib = ucc_derived_of(ctx->super.super.lib, ucc_cl_doca_urom_lib_t);
//ucc_memory_type_t mem_types[2] = {UCC_MEMORY_TYPE_HOST,UCC_MEMORY_TYPE_CUDA};
//int mt_n = 2;
//ucc_coll_score_t *score = NULL;
//doca_error_t doca_urom_status;
//ucc_status_t ucc_status;
/*
doca_urom_status = doca_urom_worker_pop_notifyq(doca_urom_lib->doca_urom_ctx.doca_urom_worker, 0, &notif);
if (DOCA_ERROR_EMPTY != doca_urom_status) {
if (doca_urom_status == UROM_OK) {
if (notif->ucc.status == (doca_urom_status_t)UCC_OK) {
team->teams[team->n_teams] = notif->ucc.team_create_nqe.team;
++team->n_teams;
ucc_status = ucc_coll_score_build_default(cl_team, UCC_CL_DOCA_UROM_DEFAULT_SCORE,
ucc_cl_doca_urom_coll_init, UCC_COLL_TYPE_ALLTOALL,// | UCC_COLL_TYPE_ALLREDUCE | UCC_COLL_TYPE_ALLTOALLV | UCC_COLL_TYPE_REDUCE_SCATTER | UCC_COLL_TYPE_REDUCE_SCATTERV | UCC_COLL_TYPE_ALLGATHER,
mem_types, mt_n, &score);
if (UCC_OK != ucc_status) {
return ucc_status;
}
ucc_status = ucc_coll_score_build_map(score, &team->score_map);
if (UCC_OK != ucc_status) {
cl_error(ctx->super.super.lib, "failed to build score map");
}
team->score = score;
ucc_coll_score_set(team->score, UCC_CL_DOCA_UROM_DEFAULT_SCORE);
*/
return UCC_OK; /*
}
ucc_cl_doca_urom_team_t *team = ucc_derived_of(cl_team, ucc_cl_doca_urom_team_t);
ucc_cl_doca_urom_context_t *ctx = UCC_CL_DOCA_UROM_TEAM_CTX(team);
ucc_memory_type_t mem_types[2] = {UCC_MEMORY_TYPE_HOST,UCC_MEMORY_TYPE_CUDA};
int mt_n = 2;
ucc_coll_score_t *score = NULL;
ucc_status_t ucc_status;
struct team_create_result *team_create = &((struct ucc_result *)team->cookie.ptr)->team_create;

if (!team_create->status) return UCC_ERR_NO_MESSAGE; // 0=nothing

if (team_create->status == 2) { // 2=done
team->teams[team->n_teams] = team_create->team;
++team->n_teams;
ucc_status = ucc_coll_score_build_default(cl_team, UCC_CL_DOCA_UROM_DEFAULT_SCORE,
ucc_cl_doca_urom_coll_init, UCC_COLL_TYPE_ALLTOALL,// | UCC_COLL_TYPE_ALLREDUCE | UCC_COLL_TYPE_ALLTOALLV | UCC_COLL_TYPE_REDUCE_SCATTER | UCC_COLL_TYPE_REDUCE_SCATTERV | UCC_COLL_TYPE_ALLGATHER,
mem_types, mt_n, &score);
if (UCC_OK != ucc_status) {
return ucc_status;
}
return UCC_ERR_NO_MESSAGE;

ucc_status = ucc_coll_score_build_map(score, &team->score_map);
if (UCC_OK != ucc_status) {
cl_error(ctx->super.super.lib, "failed to build score map");
}
team->score = score;
ucc_coll_score_set(team->score, UCC_CL_DOCA_UROM_DEFAULT_SCORE);

return UCC_OK;
}
return UCC_INPROGRESS;*/

return UCC_INPROGRESS; // 1=in progress
}

ucc_status_t ucc_cl_doca_urom_team_get_scores(ucc_base_team_t *cl_team,
Expand Down

0 comments on commit c264bb7

Please sign in to comment.