Skip to content

Commit

Permalink
Merge branch 'master' into tl_mlx5_ci
Browse files Browse the repository at this point in the history
  • Loading branch information
samnordmann authored Mar 8, 2024
2 parents 3594368 + 9e0d759 commit 4fc4acc
Show file tree
Hide file tree
Showing 70 changed files with 1,946 additions and 384 deletions.
69 changes: 45 additions & 24 deletions config/m4/rocm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,28 @@ AC_DEFUN([ROCM_BUILD_FLAGS],
# Parse value of ARG into appropriate LIBS, LDFLAGS, and
# CPPFLAGS variables.
AC_DEFUN([HIP_BUILD_FLAGS],
$4="-D__HIP_PLATFORM_AMD__ -I$1/include/hip -I$1/include"
$3="-L$1/lib"
$4="-D__HIP_PLATFORM_AMD__ -I$1/include/hip -I$1/include -I$1/llvm/include"
$3="-L$1/lib -L$1/llvm/lib"
$2="-lamdhip64"
)

# CHECK_ROCM_VERSION(HIP_VERSION_MAJOR, ROCM_VERSION_CONDITION)
# ----------------------------------------------------------
# Checks ROCm version and marks condition as 1 (TRUE) or 0 (FALSE)
AC_DEFUN([CHECK_ROCM_VERSION], [
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <${with_rocm}/include/hip/hip_version.h>
]], [[
#if HIP_VERSION_MAJOR >= $1
return 0;
#else
intr make+compilation_fail()
#endif
]])],
[$2=1],
[$2=0])
])

#
# Check for ROCm support
#
Expand Down Expand Up @@ -102,28 +119,25 @@ AS_IF([test "x$with_rocm" != "xno"],
LDFLAGS="$SAVE_LDFLAGS"
LIBS="$SAVE_LIBS"
#Check whether we run on ROCm 5.0 or higher
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <${with_rocm}/include/rocm_version.h>
]], [[
#if ROCM_VERSION_MAJOR >= 5
return 0;
#else
intr make+compilation_fail()
#endif
]])],
[ROCM_VERSION_50_OR_GREATER=1],
[ROCM_VERSION_50_OR_GREATER=0])
HIP_BUILD_FLAGS([$with_rocm], [HIP_LIBS], [HIP_LDFLAGS], [HIP_CPPFLAGS])
AC_MSG_CHECKING([if ROCm version is 5.0 or above])
if test "$ROCM_VERSION_50_OR_GREATER" = "1" ; then
# Check whether we run on ROCm 6.0 or higher
CHECK_ROCM_VERSION(6, ROCM_VERSION_60_OR_GREATER)
AC_MSG_CHECKING([if ROCm version is 6.0 or above])
if test "$ROCM_VERSION_60_OR_GREATER" = "1" ; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
HIP_CPPFLAGS="${HIP_CPPFLAGS} -I${with_rocm}/hip/include"
HIP_LDFLAGS="${HIP_LDFLAGS} -L${with_rocm}/hip/lib"
# Check whether we run on ROCm 5.0-5.7
CHECK_ROCM_VERSION(5, ROCM_VERSION_50_OR_GREATER)
AC_MSG_CHECKING([if ROCm version is 5.0 - 5.7])
if test "$ROCM_VERSION_50_OR_GREATER" = "1" ; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
HIP_CPPFLAGS="${HIP_CPPFLAGS} -I${with_rocm}/hip/include"
HIP_LDFLAGS="${HIP_LDFLAGS} -L${with_rocm}/hip/lib"
fi
fi
CPPFLAGS="$HIP_CPPFLAGS $CPPFLAGS"
Expand All @@ -142,10 +156,17 @@ intr make+compilation_fail()
LDFLAGS="$SAVE_LDFLAGS"
LIBS="$SAVE_LIBS"
AS_IF([test "x$hip_happy" = "xyes"],
[AC_PATH_PROG([HIPCC], [hipcc], [notfound], [$PATH:$with_rocm/bin])])
AS_IF([test "$HIPCC" = "notfound"], [hip_happy="no"])
if test "$ROCM_VERSION_60_OR_GREATER" = "1" ; then
AC_MSG_NOTICE([using amdclang as ROCm version is 6.0 or above])
AS_IF([test "x$hip_happy" = "xyes"],
[AC_PATH_PROG([HIPCC], [amdclang], [notfound], [$PATH:$with_rocm/bin])])
AS_IF([test "$HIPCC" = "notfound"], [hip_happy="no"])
else
AC_MSG_NOTICE([using hipcc as ROCm version is 3.7.0 to ROCm 5.7.1])
AS_IF([test "x$hip_happy" = "xyes"],
[AC_PATH_PROG([HIPCC], [hipcc], [notfound], [$PATH:$with_rocm/bin])])
AS_IF([test "$HIPCC" = "notfound"], [hip_happy="no"])
fi
AS_IF([test "x$hip_happy" = "xyes"],
[AC_DEFINE([HAVE_HIP], 1, [Enable HIP support])
Expand Down
10 changes: 8 additions & 2 deletions cuda_lt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,21 @@ local_npic_filepath="${local_npic_dir}${o_filename}"
mkdir -p $pic_dir

tmpcmd="${@:3}"
if [[ "$tmpcmd" == *"hipcc"* ]]; then
if [[ "$tmpcmd" == *"amdclang"* ]]; then
cmd="${@:3:2} -x hip -target x86_64-unknown-linux-gnu --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx940 --offload-arch=gfx941 --offload-arch=gfx942 --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 --offload-arch=gfx1102 --offload-arch=native ${@:5} -fPIC -O3 -o ${pic_filepath}"
elif [[ "$tmpcmd" == *"hipcc"* ]]; then
cmd="${@:3} -fPIC -o ${pic_filepath}"
else
cmd="${@:3} -Xcompiler -fPIC -o ${pic_filepath}"
fi
echo $cmd
$cmd

cmd="${@:3} -o ${npic_filepath}"
if [[ "$tmpcmd" == *"amdclang"* ]]; then
cmd="${@:3:2} -x hip -target x86_64-unknown-linux-gnu --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx940 --offload-arch=gfx941 --offload-arch=gfx942 --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 --offload-arch=gfx1102 --offload-arch=native ${@:5} -O3 -o ${npic_filepath}"
else
cmd="${@:3} -o ${npic_filepath}"
fi
echo $cmd
$cmd

Expand Down
12 changes: 11 additions & 1 deletion src/coll_score/ucc_coll_score.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -9,6 +9,16 @@
#include "utils/ucc_log.h"
#include "utils/ucc_coll_utils.h"

char *ucc_score_to_str(ucc_score_t score, char *buf, size_t max) {
if (score == UCC_SCORE_MAX) {
ucc_strncpy_safe(buf, "inf", max);
} else {
ucc_snprintf_safe(buf, max, "%d", score);
}

return buf;
}

ucc_status_t ucc_coll_score_alloc(ucc_coll_score_t **score)
{
ucc_coll_score_t *s = ucc_malloc(sizeof(*s), "ucc_coll_score");
Expand Down
14 changes: 8 additions & 6 deletions src/coll_score/ucc_coll_score.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -63,6 +63,8 @@ typedef struct ucc_coll_score {

typedef struct ucc_score_map ucc_score_map_t;

char *ucc_score_to_str(ucc_score_t score, char *buf, size_t max);

/* Allocates empty score data structure */
ucc_status_t ucc_coll_score_alloc(ucc_coll_score_t **score);

Expand All @@ -77,7 +79,7 @@ ucc_status_t ucc_coll_score_add_range(ucc_coll_score_t *score,

/* Releases the score data structure and all the score ranges stored
there */
void ucc_coll_score_free(ucc_coll_score_t *score);
void ucc_coll_score_free(ucc_coll_score_t *score);

/* Merges 2 scores score1 and score2 into the new score "rst" selecting
larger score. Ie.: rst will contain a range from score1 if either
Expand All @@ -87,9 +89,9 @@ void ucc_coll_score_free(ucc_coll_score_t *score);
This fn is used by CL to merge scores from multiple TLs and produce
a score map. As a result the produced score map will select TL with
higher score.*/
ucc_status_t ucc_coll_score_merge(ucc_coll_score_t * score1,
ucc_coll_score_t * score2,
ucc_coll_score_t **rst, int free_inputs);
ucc_status_t ucc_coll_score_merge(ucc_coll_score_t * score1,
ucc_coll_score_t * score2,
ucc_coll_score_t **rst, int free_inputs);


/* Parses SCORE string (see ucc_base_iface.c for pattern description)
Expand Down Expand Up @@ -147,7 +149,7 @@ ucc_status_t ucc_coll_score_build_default(ucc_base_team_t *team,
ucc_status_t ucc_coll_score_build_map(ucc_coll_score_t *score,
ucc_score_map_t **map);

void ucc_coll_score_free_map(ucc_score_map_t *map);
void ucc_coll_score_free_map(ucc_score_map_t *map);

/* Initializes task based on args selection and score map.
Checks fallbacks if necessary. */
Expand Down
35 changes: 20 additions & 15 deletions src/coll_score/ucc_coll_score_map.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -74,22 +74,22 @@ void ucc_coll_score_free_map(ucc_score_map_t *map)
ucc_free(map);
}

static
ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map,
ucc_base_coll_args_t *bargs,
ucc_msg_range_t **range)
static ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map,
ucc_base_coll_args_t *bargs,
ucc_msg_range_t **range)
{
ucc_memory_type_t mt = ucc_coll_args_mem_type(&bargs->args,
map->team_rank);
unsigned ct = ucc_ilog2(bargs->args.coll_type);
size_t msgsize = ucc_coll_args_msgsize(&bargs->args,
map->team_rank,
map->team_size);
ucc_list_link_t *list;
ucc_msg_range_t *r;
ucc_list_link_t *list;
ucc_msg_range_t *r;

if (mt == UCC_MEMORY_TYPE_ASYMMETRIC) {
/* TODO */
ucc_debug("asymmetric memory type is not supported");
return UCC_ERR_NOT_SUPPORTED;
} else if (mt == UCC_MEMORY_TYPE_NOT_APPLY) {
/* Temporary solution: for Barrier, Fanin, Fanout - use
Expand Down Expand Up @@ -122,7 +122,9 @@ ucc_status_t ucc_coll_init(ucc_score_map_t *map,
ucc_status_t status;

status = ucc_coll_score_map_lookup(map, bargs, &r);
if (UCC_OK != status) {
if (ucc_unlikely(UCC_OK != status)) {
ucc_debug("coll_score_map lookup failed %d (%s)",
status, ucc_status_string(status));
return status;
}

Expand Down Expand Up @@ -160,11 +162,12 @@ ucc_status_t ucc_coll_init(ucc_score_map_t *map,

void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
{
size_t left;
ucc_msg_range_t *range;
int i, j, all_empty;
char range_str[128];
char coll_str[1024];
size_t left;
ucc_msg_range_t *range;
int i, j, all_empty;
char score_str[32];
char range_str[128];
char coll_str[1024];

for (i = 0; i < UCC_COLL_TYPE_NUM; i++) {
all_empty = 1;
Expand All @@ -191,10 +194,12 @@ void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
super.list_elem) {
ucc_memunits_range_str(range->start, range->end, range_str,
sizeof(range_str));
STR_APPEND(coll_str, left, 256, "{%s}:%s:%u ",
ucc_score_to_str(range->super.score, score_str,
sizeof(score_str));
STR_APPEND(coll_str, left, 256, "{%s}:%s:%s ",
range_str,
range->super.team->context->lib->log_component.name,
range->super.score);
score_str);
}
STR_APPEND(coll_str, left, 4, "\n");
}
Expand Down
8 changes: 7 additions & 1 deletion src/components/cl/hier/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ bcast = \
bcast/bcast.c \
bcast/bcast_2step.c

reduce = \
reduce/reduce.h \
reduce/reduce.c \
reduce/reduce_2step.c

sources = \
cl_hier.h \
cl_hier.c \
Expand All @@ -37,7 +42,8 @@ sources = \
$(alltoallv) \
$(alltoall) \
$(barrier) \
$(bcast)
$(bcast) \
$(reduce)

module_LTLIBRARIES = libucc_cl_hier.la
libucc_cl_hier_la_SOURCES = $(sources)
Expand Down
5 changes: 5 additions & 0 deletions src/components/cl/hier/cl_hier.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ static ucc_config_field_t ucc_cl_hier_lib_config_table[] = {
ucc_offsetof(ucc_cl_hier_lib_config_t, bcast_2step_pipeline),
UCC_CONFIG_TYPE_PIPELINE_PARAMS},

{"REDUCE_2STEP_PIPELINE", "n",
"Pipelining settings for RAB reduce algorithm",
ucc_offsetof(ucc_cl_hier_lib_config_t, reduce_2step_pipeline),
UCC_CONFIG_TYPE_PIPELINE_PARAMS},

{NULL}};

static ucs_config_field_t ucc_cl_hier_context_config_table[] = {
Expand Down
11 changes: 8 additions & 3 deletions src/components/cl/hier/cl_hier.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -53,6 +53,7 @@ typedef struct ucc_cl_hier_lib_config {
ucc_pipeline_params_t allreduce_split_rail_pipeline;
ucc_pipeline_params_t allreduce_rab_pipeline;
ucc_pipeline_params_t bcast_2step_pipeline;
ucc_pipeline_params_t reduce_2step_pipeline;
} ucc_cl_hier_lib_config_t;

typedef struct ucc_cl_hier_context_config {
Expand Down Expand Up @@ -109,8 +110,12 @@ typedef struct ucc_cl_hier_team {
UCC_CLASS_DECLARE(ucc_cl_hier_team_t, ucc_base_context_t *,
const ucc_base_team_params_t *);

#define UCC_CL_HIER_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | UCC_COLL_TYPE_ALLTOALLV)
#define UCC_CL_HIER_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | \
UCC_COLL_TYPE_ALLTOALLV | \
UCC_COLL_TYPE_ALLREDUCE | \
UCC_COLL_TYPE_BARRIER | \
UCC_COLL_TYPE_BCAST)

ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
Expand Down
Loading

0 comments on commit 4fc4acc

Please sign in to comment.