Skip to content

Commit

Permalink
CL/HIER: 2step reduce algorithm (openucx#854)
Browse files Browse the repository at this point in the history
* CL/HIER: 2step reduce algorithm

originally implemented by V.Petrov

* TEST: add 2step reduce to gtest
  • Loading branch information
Sergei-Lebedev authored Mar 4, 2024
1 parent ce9821c commit e353542
Show file tree
Hide file tree
Showing 10 changed files with 472 additions and 60 deletions.
8 changes: 7 additions & 1 deletion src/components/cl/hier/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ bcast = \
bcast/bcast.c \
bcast/bcast_2step.c

reduce = \
reduce/reduce.h \
reduce/reduce.c \
reduce/reduce_2step.c

sources = \
cl_hier.h \
cl_hier.c \
Expand All @@ -37,7 +42,8 @@ sources = \
$(alltoallv) \
$(alltoall) \
$(barrier) \
$(bcast)
$(bcast) \
$(reduce)

module_LTLIBRARIES = libucc_cl_hier.la
libucc_cl_hier_la_SOURCES = $(sources)
Expand Down
5 changes: 5 additions & 0 deletions src/components/cl/hier/cl_hier.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ static ucc_config_field_t ucc_cl_hier_lib_config_table[] = {
ucc_offsetof(ucc_cl_hier_lib_config_t, bcast_2step_pipeline),
UCC_CONFIG_TYPE_PIPELINE_PARAMS},

{"REDUCE_2STEP_PIPELINE", "n",
"Pipelining settings for RAB reduce algorithm",
ucc_offsetof(ucc_cl_hier_lib_config_t, reduce_2step_pipeline),
UCC_CONFIG_TYPE_PIPELINE_PARAMS},

{NULL}};

static ucs_config_field_t ucc_cl_hier_context_config_table[] = {
Expand Down
1 change: 1 addition & 0 deletions src/components/cl/hier/cl_hier.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ typedef struct ucc_cl_hier_lib_config {
ucc_pipeline_params_t allreduce_split_rail_pipeline;
ucc_pipeline_params_t allreduce_rab_pipeline;
ucc_pipeline_params_t bcast_2step_pipeline;
ucc_pipeline_params_t reduce_2step_pipeline;
} ucc_cl_hier_lib_config_t;

typedef struct ucc_cl_hier_context_config {
Expand Down
49 changes: 32 additions & 17 deletions src/components/cl/hier/cl_hier_coll.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -13,7 +13,8 @@
const char *
ucc_cl_hier_default_alg_select_str[UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR] = {
UCC_CL_HIER_ALLREDUCE_DEFAULT_ALG_SELECT_STR,
UCC_CL_HIER_BCAST_DEFAULT_ALG_SELECT_STR};
UCC_CL_HIER_BCAST_DEFAULT_ALG_SELECT_STR,
UCC_CL_HIER_REDUCE_DEFAULT_ALG_SELECT_STR};

ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
Expand All @@ -22,14 +23,16 @@ ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
switch (coll_args->args.coll_type) {
case UCC_COLL_TYPE_ALLREDUCE:
return ucc_cl_hier_allreduce_rab_init(coll_args, team, task);
case UCC_COLL_TYPE_BARRIER:
return ucc_cl_hier_barrier_init(coll_args, team, task);
case UCC_COLL_TYPE_ALLTOALL:
return ucc_cl_hier_alltoall_init(coll_args, team, task);
case UCC_COLL_TYPE_ALLTOALLV:
return ucc_cl_hier_alltoallv_init(coll_args, team, task);
case UCC_COLL_TYPE_BARRIER:
return ucc_cl_hier_barrier_init(coll_args, team, task);
case UCC_COLL_TYPE_BCAST:
return ucc_cl_hier_bcast_2step_init(coll_args, team, task);
case UCC_COLL_TYPE_REDUCE:
return ucc_cl_hier_reduce_2step_init(coll_args, team, task);
default:
cl_error(team->context->lib, "coll_type %s is not supported",
ucc_coll_type_str(coll_args->args.coll_type));
Expand All @@ -41,14 +44,16 @@ ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
static inline int alg_id_from_str(ucc_coll_type_t coll_type, const char *str)
{
switch (coll_type) {
case UCC_COLL_TYPE_ALLREDUCE:
return ucc_cl_hier_allreduce_alg_from_str(str);
case UCC_COLL_TYPE_ALLTOALLV:
return ucc_cl_hier_alltoallv_alg_from_str(str);
case UCC_COLL_TYPE_ALLTOALL:
return ucc_cl_hier_alltoall_alg_from_str(str);
case UCC_COLL_TYPE_ALLREDUCE:
return ucc_cl_hier_allreduce_alg_from_str(str);
case UCC_COLL_TYPE_BCAST:
return ucc_cl_hier_bcast_alg_from_str(str);
case UCC_COLL_TYPE_REDUCE:
return ucc_cl_hier_reduce_alg_from_str(str);
default:
break;
}
Expand All @@ -66,6 +71,19 @@ ucc_status_t ucc_cl_hier_alg_id_to_init(int alg_id, const char *alg_id_str,
}

switch (coll_type) {
case UCC_COLL_TYPE_ALLREDUCE:
switch (alg_id) {
case UCC_CL_HIER_ALLREDUCE_ALG_RAB:
*init = ucc_cl_hier_allreduce_rab_init;
break;
case UCC_CL_HIER_ALLREDUCE_ALG_SPLIT_RAIL:
*init = ucc_cl_hier_allreduce_split_rail_init;
break;
default:
status = UCC_ERR_INVALID_PARAM;
break;
};
break;
case UCC_COLL_TYPE_ALLTOALLV:
switch (alg_id) {
case UCC_CL_HIER_ALLTOALLV_ALG_NODE_SPLIT:
Expand All @@ -86,28 +104,25 @@ ucc_status_t ucc_cl_hier_alg_id_to_init(int alg_id, const char *alg_id_str,
break;
};
break;
case UCC_COLL_TYPE_ALLREDUCE:
case UCC_COLL_TYPE_BCAST:
switch (alg_id) {
case UCC_CL_HIER_ALLREDUCE_ALG_RAB:
*init = ucc_cl_hier_allreduce_rab_init;
break;
case UCC_CL_HIER_ALLREDUCE_ALG_SPLIT_RAIL:
*init = ucc_cl_hier_allreduce_split_rail_init;
case UCC_CL_HIER_BCAST_ALG_2STEP:
*init = ucc_cl_hier_bcast_2step_init;
break;
default:
status = UCC_ERR_INVALID_PARAM;
break;
};
break;
case UCC_COLL_TYPE_BCAST:
switch (alg_id) {
case UCC_CL_HIER_BCAST_ALG_2STEP:
*init = ucc_cl_hier_bcast_2step_init;
case UCC_COLL_TYPE_REDUCE:
switch(alg_id) {
case UCC_CL_HIER_REDUCE_ALG_2STEP:
*init = ucc_cl_hier_reduce_2step_init;
break;
default:
status = UCC_ERR_INVALID_PARAM;
break;
};
}
break;
default:
status = UCC_ERR_NOT_SUPPORTED;
Expand Down
5 changes: 3 additions & 2 deletions src/components/cl/hier/cl_hier_coll.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -14,8 +14,9 @@
#include "alltoall/alltoall.h"
#include "barrier/barrier.h"
#include "bcast/bcast.h"
#include "reduce/reduce.h"

#define UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR 2
#define UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR 3

extern const char
*ucc_cl_hier_default_alg_select_str[UCC_CL_HIER_N_DEFAULT_ALG_SELECT_STR];
Expand Down
17 changes: 17 additions & 0 deletions src/components/cl/hier/reduce/reduce.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "reduce.h"
#include "../reduce/reduce.h"

ucc_base_coll_alg_info_t
ucc_cl_hier_reduce_algs[UCC_CL_HIER_REDUCE_ALG_LAST + 1] = {
[UCC_CL_HIER_REDUCE_ALG_2STEP] =
{.id = UCC_CL_HIER_REDUCE_ALG_2STEP,
.name = "2step",
.desc = "intra-node and inter-node reduces executed in parallel"},
[UCC_CL_HIER_REDUCE_ALG_LAST] = {
.id = 0, .name = NULL, .desc = NULL}};
38 changes: 38 additions & 0 deletions src/components/cl/hier/reduce/reduce.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#ifndef REDUCE_H_
#define REDUCE_H_
#include "../cl_hier.h"

enum
{
UCC_CL_HIER_REDUCE_ALG_2STEP,
UCC_CL_HIER_REDUCE_ALG_LAST,
};

extern ucc_base_coll_alg_info_t
ucc_cl_hier_reduce_algs[UCC_CL_HIER_REDUCE_ALG_LAST + 1];

#define UCC_CL_HIER_REDUCE_DEFAULT_ALG_SELECT_STR "reduce:0-4k:@2step"

ucc_status_t ucc_cl_hier_reduce_2step_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task);

static inline int ucc_cl_hier_reduce_alg_from_str(const char *str)
{
int i;

for (i = 0; i < UCC_CL_HIER_REDUCE_ALG_LAST; i++) {
if (0 == strcasecmp(str, ucc_cl_hier_reduce_algs[i].name)) {
break;
}
}
return i;
}

#endif
Loading

0 comments on commit e353542

Please sign in to comment.