Skip to content

Commit

Permalink
TL/SELF: support team size 1 (#511)
Browse files Browse the repository at this point in the history
* TL/SELF: support team size 1

* TL/SELF: cleanup

* TL/SELF: fix lib name and formatting

* CODESTYLE: add TL/SELF

* TL/SELF: fix configure file and TL/CUDA creation

* TL/SELF: fix compilation errors

* CORE: skip service team for size 1

* TL/UCP: only skip team size 1

* REVIEW: add Meta copyright

* TL/SELF: fix team creation

* TEST: enable gtest for team size 1

* TL/CUDA: skip team size 1

* TL/SELF: address review comments

* CODESTYLE: code clean-up and formatting

* TL/SELF: use executor

* CODESTYLE: code formatting

* DOCS: update author email

* TL/SELF: address comments

* TL/SELF: fix compilation error

* TL/SELF: address comments

* CODESTYLE: address comments

* TEST: add more team size 1 tests

* CODESTYLE: address comments

* TEST: fix team size 1 gather/bcast gtest
  • Loading branch information
kingchc authored Jun 14, 2022
1 parent 4cd8811 commit d891b01
Show file tree
Hide file tree
Showing 19 changed files with 680 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codestyle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
fi
fi
H1="CODESTYLE|REVIEW|CORE|UTIL|TEST|API|DOCS|TOOLS|BUILD|MC|EC|SCHEDULE|TOPO"
H2="CL/|TL/|MC/|EC/|UCP|NCCL|SHARP|BASIC|HIER|CUDA|CPU|EE|RCCL|ROCM"
H2="CL/|TL/|MC/|EC/|UCP|NCCL|SHARP|BASIC|HIER|CUDA|CPU|EE|RCCL|ROCM|SELF"
if ! echo $msg | grep -qP '^Merge |^'"(($H1)|($H2))"'+: \w'
then
echo "Wrong header"
Expand Down
2 changes: 1 addition & 1 deletion AUTHORS
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Alex Margolin [email protected]
Anatoly Vildemanov [email protected]
Boris Karasev [email protected]
Ching-Hsiang Chu king770120@gmail.com
Ching-Hsiang Chu chchu@fb.com
Devendar Bureddy [email protected]
Ferrol Aderholdt [email protected]
Geoffroy Vallee [email protected]
Expand Down
4 changes: 4 additions & 0 deletions src/components/tl/cuda/tl_cuda_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ UCC_CLASS_INIT_FUNC(ucc_tl_cuda_team_t, ucc_base_context_t *tl_context,
self->stream = NULL;
self->topo = NULL;
self->scratch.loc = NULL;
if (UCC_TL_TEAM_SIZE(self) < 2) {
tl_trace(tl_context->lib, "team size is too small, min supported 2");
return UCC_ERR_NOT_SUPPORTED;
}
if (UCC_TL_TEAM_SIZE(self) > UCC_TL_CUDA_MAX_PEERS) {
tl_info(tl_context->lib, "team size is too large, max supported %d",
UCC_TL_CUDA_MAX_PEERS);
Expand Down
25 changes: 25 additions & 0 deletions src/components/tl/self/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2021-2022. ALL RIGHTS RESERVED.
# Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
#

if TL_SELF_ENABLED
sources = \
tl_self.h \
tl_self.c \
tl_self_coll.c \
tl_self_context.c \
tl_self_lib.c \
tl_self_team.c


module_LTLIBRARIES = libucc_tl_self.la
libucc_tl_self_la_SOURCES = $(sources)
libucc_tl_self_la_CPPFLAGS = $(AM_CPPFLAGS) $(BASE_CPPFLAGS)
libucc_tl_self_la_CFLAGS = $(BASE_CFLAGS)
libucc_tl_self_la_LDFLAGS = -version-info $(SOVERSION) --as-needed
libucc_tl_self_la_LIBADD = $(UCC_TOP_BUILDDIR)/src/libucc.la

include $(top_srcdir)/config/module.am

endif
21 changes: 21 additions & 0 deletions src/components/tl/self/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
# Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
#

tl_self_enabled=n
CHECK_TLS_REQUIRED(["self"])
AS_IF([test "$CHECKED_TL_REQUIRED" = "y"],
[
tl_modules="${tl_modules}:self"
tl_self_enabled=y
CHECK_NEED_TL_PROFILING(["tl_self"])
AS_IF([test "$TL_PROFILING_REQUIRED" = "y"],
[
AC_DEFINE([HAVE_PROFILING_TL_SELF], [1], [Enable profiling for TL SELF])
prof_modules="${prof_modules}:tl_self"
], [])
], [])

AM_CONDITIONAL([TL_SELF_ENABLED], [test "$tl_self_enabled" = "y"])
AC_CONFIG_FILES([src/components/tl/self/Makefile])
56 changes: 56 additions & 0 deletions src/components/tl/self/tl_self.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2021-2022. ALL RIGHTS RESERVED.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
*/

#include "tl_self.h"
#include "utils/ucc_malloc.h"
#include "components/mc/ucc_mc.h"
#include "components/mc/base/ucc_mc_base.h"

ucc_status_t ucc_tl_self_get_lib_attr(const ucc_base_lib_t *lib,
ucc_base_lib_attr_t *base_attr);
ucc_status_t ucc_tl_self_get_context_attr(const ucc_base_context_t *context,
ucc_base_ctx_attr_t *base_attr);

static ucc_config_field_t ucc_tl_self_lib_config_table[] = {
{"", "", NULL, ucc_offsetof(ucc_tl_self_lib_config_t, super),
UCC_CONFIG_TYPE_TABLE(ucc_tl_lib_config_table)},

{NULL}};

static ucs_config_field_t ucc_tl_self_context_config_table[] = {
{"", "", NULL, ucc_offsetof(ucc_tl_self_context_config_t, super),
UCC_CONFIG_TYPE_TABLE(ucc_tl_context_config_table)},

{NULL}};

UCC_CLASS_DEFINE_NEW_FUNC(ucc_tl_self_lib_t, ucc_base_lib_t,
const ucc_base_lib_params_t *,
const ucc_base_config_t *);

UCC_CLASS_DEFINE_DELETE_FUNC(ucc_tl_self_lib_t, ucc_base_lib_t);

UCC_CLASS_DEFINE_NEW_FUNC(ucc_tl_self_context_t, ucc_base_context_t,
const ucc_base_context_params_t *,
const ucc_base_config_t *);

UCC_CLASS_DEFINE_DELETE_FUNC(ucc_tl_self_context_t, ucc_base_context_t);

UCC_CLASS_DEFINE_NEW_FUNC(ucc_tl_self_team_t, ucc_base_team_t,
ucc_base_context_t *, const ucc_base_team_params_t *);

ucc_status_t ucc_tl_self_team_create_test(ucc_base_team_t *tl_team);

ucc_status_t ucc_tl_self_team_destroy(ucc_base_team_t *tl_team);

ucc_status_t ucc_tl_self_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task);

ucc_status_t ucc_tl_self_team_get_scores(ucc_base_team_t *tl_team,
ucc_coll_score_t **score);

UCC_TL_IFACE_DECLARE(self, SELF);
98 changes: 98 additions & 0 deletions src/components/tl/self/tl_self.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2021-2022. ALL RIGHTS RESERVED.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
*/

#ifndef UCC_TL_SELF_H_
#define UCC_TL_SELF_H_
#include <ucs/memory/memory_type.h>
#include "components/tl/ucc_tl.h"
#include "components/tl/ucc_tl_log.h"
#include "core/ucc_ee.h"
#include "utils/ucc_mpool.h"

#ifndef UCC_TL_SELF_DEFAULT_SCORE
#define UCC_TL_SELF_DEFAULT_SCORE 50
#endif

#ifdef HAVE_PROFILING_TL_SELF
#include "utils/profile/ucc_profile.h"
#else
#include "utils/profile/ucc_profile_off.h"
#endif

#define UCC_TL_SELF_PROFILE_FUNC UCC_PROFILE_FUNC
#define UCC_TL_SELF_PROFILE_FUNC_VOID UCC_PROFILE_FUNC_VOID
#define UCC_TL_SELF_PROFILE_REQUEST_NEW UCC_PROFILE_REQUEST_NEW
#define UCC_TL_SELF_PROFILE_REQUEST_EVENT UCC_PROFILE_REQUEST_EVENT
#define UCC_TL_SELF_PROFILE_REQUEST_FREE UCC_PROFILE_REQUEST_FREE

typedef struct ucc_tl_self_iface {
ucc_tl_iface_t super;
} ucc_tl_self_iface_t;
/* Extern iface should follow the pattern: ucc_tl_<tl_name> */
extern ucc_tl_self_iface_t ucc_tl_self;

typedef struct ucc_tl_self_lib_config {
ucc_tl_lib_config_t super;
} ucc_tl_self_lib_config_t;

typedef struct ucc_tl_self_context_config {
ucc_tl_context_config_t super;
} ucc_tl_self_context_config_t;

typedef struct ucc_tl_self_lib {
ucc_tl_lib_t super;
ucc_tl_self_lib_config_t cfg;
} ucc_tl_self_lib_t;
UCC_CLASS_DECLARE(ucc_tl_self_lib_t, const ucc_base_lib_params_t *,
const ucc_base_config_t *);

typedef struct ucc_tl_self_context {
ucc_tl_context_t super;
ucc_tl_self_context_config_t cfg;
ucc_mpool_t req_mp;
} ucc_tl_self_context_t;
UCC_CLASS_DECLARE(ucc_tl_self_context_t, const ucc_base_context_params_t *,
const ucc_base_config_t *);

typedef struct ucc_tl_self_task {
ucc_coll_task_t super;
void *src;
void *dst;
size_t size;
ucc_memory_type_t src_memtype;
ucc_memory_type_t dst_memtype;
ucc_ee_executor_task_t *etask;
} ucc_tl_self_task_t;

typedef struct ucc_tl_self_team {
ucc_tl_team_t super;
ucc_status_t status;
} ucc_tl_self_team_t;
UCC_CLASS_DECLARE(ucc_tl_self_team_t, ucc_base_context_t *,
const ucc_base_team_params_t *);

#define UCC_TL_SELF_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | UCC_COLL_TYPE_ALLTOALLV | \
UCC_COLL_TYPE_ALLGATHER | UCC_COLL_TYPE_ALLGATHERV | \
UCC_COLL_TYPE_ALLREDUCE | UCC_COLL_TYPE_BCAST | UCC_COLL_TYPE_BARRIER | \
UCC_COLL_TYPE_REDUCE | UCC_COLL_TYPE_FANIN | UCC_COLL_TYPE_FANOUT | \
UCC_COLL_TYPE_GATHER | UCC_COLL_TYPE_GATHERV | UCC_COLL_TYPE_SCATTER | \
UCC_COLL_TYPE_SCATTERV | UCC_COLL_TYPE_REDUCE_SCATTER | \
UCC_COLL_TYPE_REDUCE_SCATTERV)

#define UCC_TL_SELF_TEAM_LIB(_team) \
(ucc_derived_of((_team)->super.super.context->lib, ucc_tl_self_lib_t))

#define UCC_TL_SELF_TEAM_CTX(_team) \
(ucc_derived_of((_team)->super.super.context, ucc_tl_self_context_t))

ucc_status_t ucc_tl_self_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
ucc_coll_task_t **task_h);
ucc_status_t ucc_tl_self_coll_finalize(ucc_coll_task_t *coll_task);

#endif
Loading

0 comments on commit d891b01

Please sign in to comment.