Skip to content

Commit

Permalink
TL/UCP: Grace tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
nsarkauskas authored and nsarkauskas committed Oct 4, 2024
1 parent 16586e1 commit 5c9e288
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 3 deletions.
52 changes: 52 additions & 0 deletions contrib/ucc.conf
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,55 @@ UCC_TL_UCP_TUNE=allreduce:0-16k:@0#allreduce:16k-inf:@1
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-8k:host:8,8k-inf:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=8
UCC_TL_UCP_TUNE=allreduce:0-8k:@0#allreduce:8k-inf:@1

#NVIDIA Grace, 2 socket (C2):
[vendor=nvidia model=grace team_size=144 sock=72 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:96,8192-16384:host:4,16384-32768:host:6,32768-65536:host:18,65536-131072:host:32,131072-262144:host:72,262144-524288:host:3,524288-1048576:host:2
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=128 sock=64 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:72,8192-16384:host:4,16384-32768:host:8,32768-65536:host:16,65536-131072:host:32,131072-262144:host:64,262144-524288:host:3,524288-1048576:host:3
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=64 sock=32 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:2,8192-16384:host:4,16384-32768:host:8,32768-65536:host:16,65536-131072:host:32,131072-262144:host:3,262144-524288:host:3,524288-1048576:host:2
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=32 sock=16 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:2,8192-16384:host:4,16384-32768:host:8,32768-65536:host:16,65536-131072:host:3,131072-262144:host:2,262144-524288:host:2,524288-1048576:host:2
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=16 sock=8 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:2,8192-16384:host:8,16384-32768:host:8,32768-65536:host:2,65536-131072:host:2,131072-262144:host:2,262144-524288:host:2,524288-1048576:host:2
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

#NVIDIA Grace, 1 socket (CG):
[vendor=nvidia model=grace team_size=72 sock=72 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-64:host:3,64-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:6,8192-16384:host:6,16384-32768:host:6,32768-65536:host:16,65536-131072:host:32,131072-262144:host:48,262144-524288:host:2,524288-1048576:host:2
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=64 sock=64 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:2,8192-16384:host:4,16384-32768:host:8,32768-65536:host:18,65536-131072:host:32,131072-262144:host:48,262144-524288:host:2,524288-1048576:host:2
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=32 sock=32 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:2,8192-16384:host:4,16384-32768:host:8,32768-65536:host:16,65536-131072:host:144,131072-262144:host:2,262144-524288:host:2,524288-1048576:host:4
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=16 sock=16 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-4k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4096-8192:host:2,8192-16384:host:4,16384-32768:host:8,32768-65536:host:16,65536-131072:host:3,131072-262144:host:2,262144-524288:host:4,524288-1048576:host:4
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1

[vendor=nvidia model=grace team_size=8 sock=8 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-8k:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=8192-16384:host:2,16384-32768:host:4,32768-65536:host:16,65536-131072:host:18,131072-262144:host:6,262144-524288:host:96,524288-1048576:host:4
UCC_TL_UCP_TUNE=allreduce:0-8k:@0#allreduce:8k-inf:@1
2 changes: 1 addition & 1 deletion src/utils/ini.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ int ucc_ini_parse_string(const char* string, ini_handler handler, void* user);
/* Maximum line length for any line in INI file (stack or heap). Note that
this must be 3 more than the longest line (due to '\r', '\n', and '\0'). */
#ifndef UCC_INI_MAX_LINE
#define UCC_INI_MAX_LINE 200
#define UCC_INI_MAX_LINE 500
#endif

/* Nonzero to allow heap line buffer to grow via realloc(), zero for a
Expand Down
18 changes: 17 additions & 1 deletion src/utils/ucc_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ static int ucc_check_section(ucc_section_desc_t sec_desc,
ucc_rank_t team_size,
ucc_rank_t ppn_min,
ucc_rank_t ppn_max,
ucc_rank_t sock_min,
ucc_rank_t sock_max,
ucc_rank_t nnodes)
{
if (sec_desc.mask & UCC_TUNING_DESC_FIELD_VENDOR) {
Expand All @@ -72,6 +74,11 @@ static int ucc_check_section(ucc_section_desc_t sec_desc,
return 0;
}
}
if (sec_desc.mask & UCC_TUNING_DESC_FIELD_SOCK) {
if (sock_min < sec_desc.min_sock || sock_max > sec_desc.max_sock) {
return 0;
}
}
if (sec_desc.mask & UCC_TUNING_DESC_FIELD_NNODES) {
if (nnodes < sec_desc.min_nnodes || nnodes > sec_desc.max_nnodes) {
return 0;
Expand Down Expand Up @@ -160,6 +167,13 @@ ucc_parse_section_name_to_desc(const char *sec_name, ucc_section_desc_t *desc)
}
desc->mask |= UCC_TUNING_DESC_FIELD_PPN;
}
else if (strcmp(cur_str[0], "sock") == 0) {
if (!ucc_check_range(cur_str[1], &desc->min_sock,
&desc->max_sock)) {
goto err_key;
}
desc->mask |= UCC_TUNING_DESC_FIELD_SOCK;
}
else if (strcmp(cur_str[0], "nnodes") == 0) {
if (!ucc_check_range(cur_str[1], &desc->min_nnodes,
&desc->max_nnodes)) {
Expand Down Expand Up @@ -576,6 +590,8 @@ ucc_status_t ucc_add_team_sections(void *team_cfg,
ucc_cpu_model_t model = ucc_arch_get_cpu_model();
ucc_rank_t ppn_min = ucc_topo_min_ppn(team_topo);
ucc_rank_t ppn_max = ucc_topo_max_ppn(team_topo);
ucc_rank_t sock_min = ucc_topo_min_socket_size(team_topo);
ucc_rank_t sock_max = ucc_topo_max_socket_size(team_topo);
ucc_rank_t nnodes = ucc_topo_nnodes(team_topo);
ucc_rank_t team_size = team_topo->set.map.ep_num;
khash_t(ucc_sec) *sec_h;
Expand All @@ -589,7 +605,7 @@ ucc_status_t ucc_add_team_sections(void *team_cfg,
sec_name = kh_key(sections, i);
sec = kh_val(sections, i);
if (ucc_check_section(sec->desc, vendor, model, team_size,
ppn_min, ppn_max, nnodes)) {
ppn_min, ppn_max, sock_min, sock_max, nnodes)) {
sec_h = &sec->vals_h;
j = kh_get(ucc_sec, sec_h, tune_key);
if (j != kh_end(sec_h)) {
Expand Down
5 changes: 4 additions & 1 deletion src/utils/ucc_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ enum tuning_mask {
UCC_TUNING_DESC_FIELD_MODEL = UCC_BIT(1),
UCC_TUNING_DESC_FIELD_TEAM_SIZE = UCC_BIT(2),
UCC_TUNING_DESC_FIELD_PPN = UCC_BIT(3),
UCC_TUNING_DESC_FIELD_NNODES = UCC_BIT(4)
UCC_TUNING_DESC_FIELD_NNODES = UCC_BIT(4),
UCC_TUNING_DESC_FIELD_SOCK = UCC_BIT(5)
};

typedef struct ucc_section_desc {
Expand All @@ -108,6 +109,8 @@ typedef struct ucc_section_desc {
ucc_rank_t max_team_size;
ucc_rank_t min_ppn;
ucc_rank_t max_ppn;
ucc_rank_t min_sock;
ucc_rank_t max_sock;
ucc_rank_t min_nnodes;
ucc_rank_t max_nnodes;
} ucc_section_desc_t;
Expand Down

0 comments on commit 5c9e288

Please sign in to comment.