Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TL/MLX5/MCAST: adding ip over ib mcast helper functions as well as some of the environment variables #861

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/components/tl/mlx5/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ mcast = \
mcast/p2p/ucc_tl_mlx5_mcast_p2p.c \
mcast/tl_mlx5_mcast_progress.h \
mcast/tl_mlx5_mcast_helper.h \
mcast/tl_mlx5_mcast_helper.c \
mcast/tl_mlx5_mcast_team.c

sources = \
Expand Down
191 changes: 191 additions & 0 deletions src/components/tl/mlx5/mcast/tl_mlx5_mcast_helper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/**
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#include "tl_mlx5_mcast_helper.h"
#include <glob.h>
#include <net/if.h>
#include <ifaddrs.h>

#define PREF "/sys/class/net/"
#define SUFF "/device/resource"
#define MAX_STR_LEN 128

static ucc_status_t ucc_tl_mlx5_get_ipoib_ip(char *ifname, struct sockaddr_storage *addr)
{
ucc_status_t status = UCC_ERR_NO_RESOURCE;
struct ifaddrs *ifaddr = NULL;
struct ifaddrs *ifa = NULL;
int is_ipv4 = 0;
int family;
int n;
int is_up;

if (getifaddrs(&ifaddr) == -1) {
return UCC_ERR_NO_RESOURCE;
}

for (ifa = ifaddr, n = 0; ifa != NULL; ifa=ifa->ifa_next, n++) {
if (ifa->ifa_addr == NULL) {
continue;
}

family = ifa->ifa_addr->sa_family;
if (family != AF_INET && family != AF_INET6) {
continue;
}

is_up = (ifa->ifa_flags & IFF_UP) == IFF_UP;
is_ipv4 = (family == AF_INET) ? 1 : 0;

if (is_up && !strncmp(ifa->ifa_name, ifname, strlen(ifname)) ) {
if (is_ipv4) {
memcpy((struct sockaddr_in *) addr,
MamziB marked this conversation as resolved.
Show resolved Hide resolved
(struct sockaddr_in *) ifa->ifa_addr,
sizeof(struct sockaddr_in));
} else {
memcpy((struct sockaddr_in6 *) addr,
(struct sockaddr_in6 *) ifa->ifa_addr,
sizeof(struct sockaddr_in6));
}

status = UCC_OK;
break;
}
}

freeifaddrs(ifaddr);
return status;
MamziB marked this conversation as resolved.
Show resolved Hide resolved
}

static int cmp_files(char *f1, char *f2)
{
int answer = 0;
FILE *fp1;
FILE *fp2;
int ch1;
int ch2;

if ((fp1 = fopen(f1, "r")) == NULL) {
goto out;
} else if ((fp2 = fopen(f2, "r")) == NULL) {
goto close;
}

do {
ch1 = getc(fp1);
ch2 = getc(fp2);
} while((ch1 != EOF) && (ch2 != EOF) && (ch1 == ch2));


if (ch1 == ch2) {
answer = 1;
}

if (fclose(fp2) != 0) {
return 0;
}
close:
if (fclose(fp1) != 0) {
return 0;
}
out:
return answer;
}

static int port_from_file(char *port_file)
{
int res = -1;
char buf1[MAX_STR_LEN];
char buf2[MAX_STR_LEN];
FILE *fp;
int len;

if ((fp = fopen(port_file, "r")) == NULL) {
return -1;
}

if (fgets(buf1, MAX_STR_LEN - 1, fp) == NULL) {
goto out;
}

len = strlen(buf1) - 2;
strncpy(buf2, buf1 + 2, len);
MamziB marked this conversation as resolved.
Show resolved Hide resolved
buf2[len] = 0;
res = atoi(buf2);

out:
if (fclose(fp) != 0) {
return -1;
}
return res;
}

static ucc_status_t dev2if(char *dev_name, char *port, struct sockaddr_storage
*rdma_src_addr)
{
ucc_status_t status = UCC_OK;
glob_t glob_el = {0,};
char dev_file [MAX_STR_LEN];
char port_file[MAX_STR_LEN];
char net_file [MAX_STR_LEN];
char if_name [MAX_STR_LEN];
char glob_path[MAX_STR_LEN];
int i;
char **p;
int len;

sprintf(glob_path, PREF"*");
MamziB marked this conversation as resolved.
Show resolved Hide resolved

sprintf(dev_file, "/sys/class/infiniband/%s"SUFF, dev_name);
if (glob(glob_path, 0, 0, &glob_el)) {
return UCC_ERR_NO_RESOURCE;
}
p = glob_el.gl_pathv;

if (glob_el.gl_pathc >= 1) {
for (i = 0; i < glob_el.gl_pathc; i++, p++) {
sprintf(port_file, "%s/dev_id", *p);
sprintf(net_file, "%s"SUFF, *p);
if(cmp_files(net_file, dev_file) && port != NULL &&
port_from_file(port_file) == atoi(port) - 1) {
MamziB marked this conversation as resolved.
Show resolved Hide resolved
len = strlen(net_file) - strlen(PREF) - strlen(SUFF);
strncpy(if_name, net_file + strlen(PREF), len);
if_name[len] = 0;

status = ucc_tl_mlx5_get_ipoib_ip(if_name, rdma_src_addr);
if (UCC_OK == status) {
break;
}
}
}
}

globfree(&glob_el);
return status;
}

ucc_status_t ucc_tl_mlx5_probe_ip_over_ib(char* ib_dev, struct
sockaddr_storage *addr)
{
char *ib_name = NULL;
char *port = NULL;
ucc_status_t status;
struct sockaddr_storage rdma_src_addr;

if (NULL == ib_dev) {
status = UCC_ERR_NO_RESOURCE;
} else {
ucc_string_split(ib_dev, ":", 2, &ib_name, &port);
status = dev2if(ib_name, port, &rdma_src_addr);
}

if (UCC_OK == status) {
*addr = rdma_src_addr;
}

return status;
}

7 changes: 5 additions & 2 deletions src/components/tl/mlx5/mcast/tl_mlx5_mcast_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,14 +352,17 @@ static inline ucc_status_t ucc_tl_mlx5_mcast_reliable(ucc_tl_mlx5_mcast_coll_com
return UCC_INPROGRESS;
}

ucc_status_t ucc_tl_setup_mcast(ucc_tl_mlx5_mcast_coll_comm_t *comm);
ucc_status_t ucc_tl_mlx5_probe_ip_over_ib(char* ib_dev_list,
struct sockaddr_storage *addr);

ucc_status_t ucc_tl_mlx5_setup_mcast(ucc_tl_mlx5_mcast_coll_comm_t *comm);

ucc_status_t ucc_tl_mlx5_mcast_init_qps(ucc_tl_mlx5_mcast_coll_context_t *ctx,
ucc_tl_mlx5_mcast_coll_comm_t *comm);

ucc_status_t ucc_tl_mlx5_mcast_setup_qps(ucc_tl_mlx5_mcast_coll_context_t *ctx,
ucc_tl_mlx5_mcast_coll_comm_t *comm);

ucc_status_t ucc_tl_clean_mcast_comm(ucc_tl_mlx5_mcast_coll_comm_t *comm);
ucc_status_t ucc_tl_mlx5_clean_mcast_comm(ucc_tl_mlx5_mcast_coll_comm_t *comm);

#endif /* TL_MLX5_MCAST_HELPER_H_ */
29 changes: 29 additions & 0 deletions src/components/tl/mlx5/tl_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,27 @@ static ucc_config_field_t ucc_tl_mlx5_lib_config_table[] = {
ucc_offsetof(ucc_tl_mlx5_lib_config_t, qp_conf.qp_max_atomic),
UCC_CONFIG_TYPE_UINT},

{"MCAST_SX_DEPTH", "512", "Send context depth of the Mcast comm",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, mcast_conf.sx_depth),
UCC_CONFIG_TYPE_INT},

{"MCAST_SX_INLINE", "128", "Minimal size for inline data send in Mcast",
MamziB marked this conversation as resolved.
Show resolved Hide resolved
ucc_offsetof(ucc_tl_mlx5_lib_config_t, mcast_conf.sx_inline),
UCC_CONFIG_TYPE_MEMUNITS},

{"MCAST_RX_DEPTH", "4096", "Recv context depth of the Mcast comm",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, mcast_conf.rx_depth),
UCC_CONFIG_TYPE_INT},

{"MCAST_POST_RECV_THRESH", "64",
"Threshold for posting recv into rx ctx of the Mcast comm",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, mcast_conf.post_recv_thresh),
UCC_CONFIG_TYPE_INT},

{"MCAST_WINDOW_SIZE", "64", "Reliability Mcast window size",
ucc_offsetof(ucc_tl_mlx5_lib_config_t, mcast_conf.wsize),
UCC_CONFIG_TYPE_INT},

{NULL}};

static ucc_config_field_t ucc_tl_mlx5_context_config_table[] = {
Expand All @@ -77,6 +98,14 @@ static ucc_config_field_t ucc_tl_mlx5_context_config_table[] = {
ucc_offsetof(ucc_tl_mlx5_context_config_t, devices),
UCC_CONFIG_TYPE_STRING_ARRAY},

{"MCAST_TIMEOUT", "10000", "Timeout [usec] for the reliability NACK in Mcast",
ucc_offsetof(ucc_tl_mlx5_context_config_t, mcast_ctx_conf.timeout),
UCC_CONFIG_TYPE_INT},

{"MCAST_NET_DEVICE", "", "Specifies which network device to use for Mcast",
ucc_offsetof(ucc_tl_mlx5_context_config_t, mcast_ctx_conf.ib_dev_name),
UCC_CONFIG_TYPE_STRING},

{NULL}};

UCC_CLASS_DEFINE_NEW_FUNC(ucc_tl_mlx5_lib_t, ucc_base_lib_t,
Expand Down
1 change: 1 addition & 0 deletions src/utils/ucc_compiler_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#define ucc_snprintf_safe snprintf
#define ucc_likely ucs_likely
#define ucc_unlikely ucs_unlikely
#define ucc_string_split ucs_string_split

/**
* Prevent compiler from reordering instructions
Expand Down
Loading