Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TL/UCP: Check for ucp_memh_pack, if it's not present then disable sliding window and the doca plugin #1032

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions config/m4/ucx.m4
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ UCX_MIN_REQUIRED_MAJOR=1
UCX_MIN_REQUIRED_MINOR=11
AS_IF([test "x$ucx_checked" != "xyes"],[
ucx_happy="no"
ucp_memh_happy="no"

AC_ARG_WITH([ucx],
[AS_HELP_STRING([--with-ucx=(DIR)], [Enable the use of UCX (default is guess).])],
Expand Down Expand Up @@ -67,6 +68,14 @@ AS_IF([test "x$ucx_checked" != "xyes"],[
],
[])

AC_CHECK_LIB([ucp], [ucp_memh_pack],
[
ucp_memh_happy="yes"
],
[
ucp_memh_happy="no"
],[-luct -lucm -lucp])

AS_IF([test "x$ucx_happy" = "xyes"],
[
AC_COMPUTE_INT(ucx_major, [UCP_API_MAJOR], [#include <ucp/api/ucp_version.h>],
Expand Down Expand Up @@ -160,5 +169,6 @@ AS_IF([test "x$ucx_checked" != "xyes"],[

ucx_checked=yes
AM_CONDITIONAL([HAVE_UCX], [test "x$ucx_happy" != xno])
AM_CONDITIONAL([HAVE_UCP_MEMH_PACK], [test "x$ucp_memh_happy" != xno])
])
])
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_AARCH64_THUNDERX1], [false])
AM_CONDITIONAL([HAVE_AARCH64_HI1620], [false])
AM_CONDITIONAL([HAVE_UCX], [false])
AM_CONDITIONAL([HAVE_UCP_MEMH_PACK], [false])
AM_CONDITIONAL([HAVE_CUDA], [false])
AM_CONDITIONAL([HAVE_ROCM], [false])
AM_CONDITIONAL([HAVE_HIP], [false])
Expand Down
2 changes: 2 additions & 0 deletions contrib/doca_urom_ucc_plugin/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#

if HAVE_UCP_MEMH_PACK
if HAVE_DOCA_UROM

sources = \
Expand All @@ -20,3 +21,4 @@ libucc_doca_urom_plugin_la_LDFLAGS = -version-info $(SOVERSION) --as-needed $(U
libucc_doca_urom_plugin_la_LIBADD = $(UCX_LIBADD) $(DOCA_UROM_LIBADD) $(UCC_TOP_BUILDDIR)/src/libucc.la

endif
endif
4 changes: 4 additions & 0 deletions src/components/cl/doca_urom/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#

if HAVE_UCP_MEMH_PACK

sources = \
cl_doca_urom.h \
cl_doca_urom.c \
Expand All @@ -22,3 +24,5 @@ libucc_cl_doca_urom_la_LDFLAGS = -version-info $(SOVERSION) --as-needed $(DOCA_
libucc_cl_doca_urom_la_LIBADD = $(DOCA_UROM_LIBADD) $(UCC_TOP_BUILDDIR)/src/libucc.la

include $(top_srcdir)/config/module.am

endif
4 changes: 4 additions & 0 deletions src/components/tl/ucp/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ allreduce = \
allreduce/allreduce.c \
allreduce/allreduce_knomial.c \
allreduce/allreduce_sra_knomial.c \
if HAVE_UCP_MEMH_PACK
allreduce/allreduce_sliding_window.h \
allreduce/allreduce_sliding_window.c \
allreduce/allreduce_sliding_window_setup.c \
endif
allreduce/allreduce_dbt.c

barrier = \
Expand Down Expand Up @@ -114,8 +116,10 @@ sources = \
tl_ucp_ep.c \
tl_ucp_coll.c \
tl_ucp_service_coll.c \
if HAVE_UCP_MEMH_PACK
tl_ucp_dpu_offload.h \
tl_ucp_dpu_offload.c \
endif
$(allgather) \
$(allgatherv) \
$(alltoall) \
Expand Down
4 changes: 4 additions & 0 deletions src/components/tl/ucp/allreduce/allreduce_sliding_window.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* See file LICENSE for terms.
*/

#ifdef HAVE_UCP_MEMH_PACK

#include "allreduce.h"
#include "allreduce_sliding_window.h"
#include "../allgather/allgather.h"
Expand Down Expand Up @@ -655,3 +657,5 @@ ucc_tl_ucp_allreduce_sliding_window_init(ucc_base_coll_args_t *coll_args,
ucc_tl_ucp_put_schedule(schedule);
return status;
}

#endif
4 changes: 4 additions & 0 deletions src/components/tl/ucp/tl_ucp_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,11 @@ ucc_status_t ucc_tl_ucp_alg_id_to_init(int alg_id, const char *alg_id_str,
*init = ucc_tl_ucp_allreduce_dbt_init;
break;
case UCC_TL_UCP_ALLREDUCE_ALG_SLIDING_WINDOW:
#ifdef HAVE_UCP_MEMH_PACK
*init = ucc_tl_ucp_allreduce_sliding_window_init;
#else
status = UCC_ERR_NOT_SUPPORTED;
#endif
break;
default:
status = UCC_ERR_INVALID_PARAM;
Expand Down
2 changes: 2 additions & 0 deletions src/components/tl/ucp/tl_ucp_coll.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ typedef struct ucc_tl_ucp_task {
ucc_ee_executor_task_t *etask;
ucc_ee_executor_t *executor;
} allreduce_kn;
#ifdef HAVE_UCP_MEMH_PACK
struct {
ucc_tl_ucp_allreduce_sw_pipeline *pipe;
ucs_status_ptr_t *put_requests;
Expand All @@ -137,6 +138,7 @@ typedef struct ucc_tl_ucp_task {
ucc_ee_executor_task_t *reduce_task;
ucc_tl_ucp_dpu_offload_buf_info_t *bufs;
} allreduce_sliding_window;
#endif
struct {
int phase;
ucc_knomial_pattern_t p;
Expand Down
Loading