diff --git a/src/components/cl/doca_urom/cl_doca_urom.h b/src/components/cl/doca_urom/cl_doca_urom.h index 0d4182f0ff..8292c485a5 100644 --- a/src/components/cl/doca_urom/cl_doca_urom.h +++ b/src/components/cl/doca_urom/cl_doca_urom.h @@ -54,6 +54,7 @@ struct context_create_result { /* UCC team create result */ struct team_create_result { void *team; /* Pointer to UCC team */ + int status; /* nick: 0=nothing, 1=team create in progress, 2=team create done */ }; /* UCC collective result */ @@ -131,6 +132,7 @@ typedef struct ucc_cl_doca_urom_team { unsigned n_teams; ucc_coll_score_t *score; ucc_score_map_t *score_map; + union doca_data cookie; } ucc_cl_doca_urom_team_t; UCC_CLASS_DECLARE(ucc_cl_doca_urom_team_t, ucc_base_context_t *, const ucc_base_team_params_t *); diff --git a/src/components/cl/doca_urom/cl_doca_urom_team.c b/src/components/cl/doca_urom/cl_doca_urom_team.c index 15d1a000f1..11975fc95d 100644 --- a/src/components/cl/doca_urom/cl_doca_urom_team.c +++ b/src/components/cl/doca_urom/cl_doca_urom_team.c @@ -8,26 +8,39 @@ #include "utils/ucc_malloc.h" #include "core/ucc_team.h" +#include "contrib/worker_ucc.h" + +/* + * UCC team create callback + * + * @result [in]: task result + * @cookie [in]: program cookie + * @dpu_worker_id [in]: UROM DPU worker id + * @team [in]: pointer to UCC team + */ +static void urom_ucc_team_create_finished(doca_error_t result, + union doca_data cookie, + uint64_t dpu_worker_id, + void *team) +{ + struct ucc_result *res = (struct ucc_result *)cookie.ptr; + if (res == NULL) + return; + + res->dpu_worker_id = dpu_worker_id; + res->result = result; + res->team_create.team = team; + res->team_create.status = 2; // set done +} + UCC_CLASS_INIT_FUNC(ucc_cl_doca_urom_team_t, ucc_base_context_t *cl_context, const ucc_base_team_params_t *params) { ucc_cl_doca_urom_context_t *ctx = ucc_derived_of(cl_context, ucc_cl_doca_urom_context_t); - //ucc_cl_doca_urom_lib_t *doca_urom_lib = ucc_derived_of(ctx->super.super.lib, ucc_cl_doca_urom_lib_t); - //doca_error_t doca_urom_status; - ucc_status_t status;/* - doca_urom_worker_cmd_t team_cmd = { - .cmd_type = UROM_WORKER_CMD_UCC, - .ucc.dpu_worker_id = ctx->ctx_rank, - .ucc.cmd_type = UROM_WORKER_CMD_UCC_TEAM_CREATE, - // FIXME: proper way: use ec map.. for now assume linear - .ucc.team_create_cmd = - { - .start = 0, - .stride = 1, - .size = params->size, - }, - };*/ + ucc_cl_doca_urom_lib_t *doca_urom_lib = ucc_derived_of(ctx->super.super.lib, ucc_cl_doca_urom_lib_t); + ucc_status_t status; + doca_error_t result = DOCA_SUCCESS; UCC_CLASS_CALL_SUPER_INIT(ucc_cl_team_t, &ctx->super, params); self->teams = (ucc_team_h **)ucc_malloc(sizeof(ucc_team_h *) * 16); @@ -38,12 +51,22 @@ UCC_CLASS_INIT_FUNC(ucc_cl_doca_urom_team_t, ucc_base_context_t *cl_context, } self->n_teams = 0; self->score_map = NULL; -/* - doca_urom_status = doca_urom_worker_push_cmdq(doca_urom_lib->doca_urom_ctx.doca_urom_worker, 0, &team_cmd); - if (UROM_OK != doca_urom_status) { - cl_error(cl_context->lib, "failed to create team"); - return UCC_ERR_NO_MESSAGE; - }*/ + + result = doca_urom_ucc_task_team_create(doca_urom_lib->urom_ctx.urom_worker, + self->cookie, + doca_urom_lib->urom_ctx.ctx_rank, + 0, + 1, + params->params.oob.n_oob_eps, + doca_urom_lib->urom_ctx.urom_ucc_context, + urom_ucc_team_create_finished); + if (result != DOCA_SUCCESS) { + cl_error(cl_context->lib, "Failed to create UCC team task"); + return UCC_ERR_NO_RESOURCE; + } + + ((struct ucc_result *)self->cookie.ptr)->team_create.status = 1; // set in progress + cl_debug(cl_context->lib, "posted cl team: %p", self); return UCC_OK; } @@ -63,41 +86,37 @@ ucc_status_t ucc_cl_doca_urom_team_destroy(ucc_base_team_t *cl_team) ucc_status_t ucc_cl_doca_urom_team_create_test(ucc_base_team_t *cl_team) { - //ucc_cl_doca_urom_team_t *team = ucc_derived_of(cl_team, ucc_cl_doca_urom_team_t); - //ucc_cl_doca_urom_context_t *ctx = UCC_CL_DOCA_UROM_TEAM_CTX(team); - //ucc_cl_doca_urom_lib_t *doca_urom_lib = ucc_derived_of(ctx->super.super.lib, ucc_cl_doca_urom_lib_t); - //ucc_memory_type_t mem_types[2] = {UCC_MEMORY_TYPE_HOST,UCC_MEMORY_TYPE_CUDA}; - //int mt_n = 2; - //ucc_coll_score_t *score = NULL; - //doca_error_t doca_urom_status; - //ucc_status_t ucc_status; -/* - doca_urom_status = doca_urom_worker_pop_notifyq(doca_urom_lib->doca_urom_ctx.doca_urom_worker, 0, ¬if); - if (DOCA_ERROR_EMPTY != doca_urom_status) { - if (doca_urom_status == UROM_OK) { - if (notif->ucc.status == (doca_urom_status_t)UCC_OK) { - team->teams[team->n_teams] = notif->ucc.team_create_nqe.team; - ++team->n_teams; - ucc_status = ucc_coll_score_build_default(cl_team, UCC_CL_DOCA_UROM_DEFAULT_SCORE, - ucc_cl_doca_urom_coll_init, UCC_COLL_TYPE_ALLTOALL,// | UCC_COLL_TYPE_ALLREDUCE | UCC_COLL_TYPE_ALLTOALLV | UCC_COLL_TYPE_REDUCE_SCATTER | UCC_COLL_TYPE_REDUCE_SCATTERV | UCC_COLL_TYPE_ALLGATHER, - mem_types, mt_n, &score); - if (UCC_OK != ucc_status) { - return ucc_status; - } - - ucc_status = ucc_coll_score_build_map(score, &team->score_map); - if (UCC_OK != ucc_status) { - cl_error(ctx->super.super.lib, "failed to build score map"); - } - team->score = score; - ucc_coll_score_set(team->score, UCC_CL_DOCA_UROM_DEFAULT_SCORE); -*/ - return UCC_OK; /* - } + ucc_cl_doca_urom_team_t *team = ucc_derived_of(cl_team, ucc_cl_doca_urom_team_t); + ucc_cl_doca_urom_context_t *ctx = UCC_CL_DOCA_UROM_TEAM_CTX(team); + ucc_memory_type_t mem_types[2] = {UCC_MEMORY_TYPE_HOST,UCC_MEMORY_TYPE_CUDA}; + int mt_n = 2; + ucc_coll_score_t *score = NULL; + ucc_status_t ucc_status; + struct team_create_result *team_create = &((struct ucc_result *)team->cookie.ptr)->team_create; + + if (!team_create->status) return UCC_ERR_NO_MESSAGE; // 0=nothing + + if (team_create->status == 2) { // 2=done + team->teams[team->n_teams] = team_create->team; + ++team->n_teams; + ucc_status = ucc_coll_score_build_default(cl_team, UCC_CL_DOCA_UROM_DEFAULT_SCORE, + ucc_cl_doca_urom_coll_init, UCC_COLL_TYPE_ALLTOALL,// | UCC_COLL_TYPE_ALLREDUCE | UCC_COLL_TYPE_ALLTOALLV | UCC_COLL_TYPE_REDUCE_SCATTER | UCC_COLL_TYPE_REDUCE_SCATTERV | UCC_COLL_TYPE_ALLGATHER, + mem_types, mt_n, &score); + if (UCC_OK != ucc_status) { + return ucc_status; } - return UCC_ERR_NO_MESSAGE; + + ucc_status = ucc_coll_score_build_map(score, &team->score_map); + if (UCC_OK != ucc_status) { + cl_error(ctx->super.super.lib, "failed to build score map"); + } + team->score = score; + ucc_coll_score_set(team->score, UCC_CL_DOCA_UROM_DEFAULT_SCORE); + + return UCC_OK; } - return UCC_INPROGRESS;*/ + + return UCC_INPROGRESS; // 1=in progress } ucc_status_t ucc_cl_doca_urom_team_get_scores(ucc_base_team_t *cl_team,