diff --git a/src/coll_score/ucc_coll_score_map.c b/src/coll_score/ucc_coll_score_map.c index 9267a77478..037476efb2 100644 --- a/src/coll_score/ucc_coll_score_map.c +++ b/src/coll_score/ucc_coll_score_map.c @@ -74,10 +74,9 @@ void ucc_coll_score_free_map(ucc_score_map_t *map) ucc_free(map); } -static -ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map, - ucc_base_coll_args_t *bargs, - ucc_msg_range_t **range) +static ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map, + ucc_base_coll_args_t *bargs, + ucc_msg_range_t **range) { ucc_memory_type_t mt = ucc_coll_args_mem_type(&bargs->args, map->team_rank); @@ -85,11 +84,12 @@ ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map, size_t msgsize = ucc_coll_args_msgsize(&bargs->args, map->team_rank, map->team_size); - ucc_list_link_t *list; - ucc_msg_range_t *r; + ucc_list_link_t *list; + ucc_msg_range_t *r; if (mt == UCC_MEMORY_TYPE_ASYMMETRIC) { /* TODO */ + ucc_debug("asymmetric memory type is not supported"); return UCC_ERR_NOT_SUPPORTED; } else if (mt == UCC_MEMORY_TYPE_NOT_APPLY) { /* Temporary solution: for Barrier, Fanin, Fanout - use @@ -122,7 +122,9 @@ ucc_status_t ucc_coll_init(ucc_score_map_t *map, ucc_status_t status; status = ucc_coll_score_map_lookup(map, bargs, &r); - if (UCC_OK != status) { + if (ucc_unlikely(UCC_OK != status)) { + ucc_debug("coll_score_map lookup failed %d (%s)", + status, ucc_status_string(status)); return status; } diff --git a/src/core/ucc_coll.c b/src/core/ucc_coll.c index 8cf3658570..6cb0426389 100644 --- a/src/core/ucc_coll.c +++ b/src/core/ucc_coll.c @@ -280,20 +280,19 @@ UCC_CORE_PROFILE_FUNC(ucc_status_t, ucc_collective_init, print_trace: *request = &task->super; - if (ucc_global_config.coll_trace.log_level >= UCC_LOG_LEVEL_DIAG) { + if (ucc_unlikely(ucc_global_config.coll_trace.log_level >= + UCC_LOG_LEVEL_DIAG)) { char coll_str[256]; ucc_coll_str(task, coll_str, sizeof(coll_str), ucc_global_config.coll_trace.log_level); - if (ucc_global_config.coll_trace.log_level <= UCC_LOG_LEVEL_DEBUG) { + if (ucc_global_config.coll_trace.log_level <= UCC_LOG_LEVEL_INFO) { if (team->rank == 0) { ucc_log_component_collective_trace( ucc_global_config.coll_trace.log_level, "coll_init: %s", coll_str); } } else { - ucc_log_component_collective_trace( - ucc_global_config.coll_trace.log_level, "coll_init: %s", - coll_str); + ucc_coll_trace_debug("coll_init: %s", coll_str); } } diff --git a/src/utils/ucc_coll_utils.c b/src/utils/ucc_coll_utils.c index 75a49400e2..533a9e4fb3 100644 --- a/src/utils/ucc_coll_utils.c +++ b/src/utils/ucc_coll_utils.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -56,7 +56,8 @@ static inline int ucc_coll_args_is_mem_symmetric(const ucc_coll_args_t *args, ucc_rank_t rank) { - ucc_rank_t root = args->root; + ucc_rank_t root = args->root; + if (UCC_IS_INPLACE(*args)) { return 1; } @@ -93,7 +94,7 @@ ucc_coll_args_is_mem_symmetric(const ucc_coll_args_t *args, return 0; } -int ucc_coll_args_is_predefined_dt(ucc_coll_args_t *args, ucc_rank_t rank) +int ucc_coll_args_is_predefined_dt(const ucc_coll_args_t *args, ucc_rank_t rank) { switch (args->coll_type) { case UCC_COLL_TYPE_BARRIER: @@ -160,7 +161,7 @@ int ucc_coll_args_is_predefined_dt(ucc_coll_args_t *args, ucc_rank_t rank) ucc_memory_type_t ucc_coll_args_mem_type(const ucc_coll_args_t *args, ucc_rank_t rank) { - ucc_rank_t root = args->root; + ucc_rank_t root = args->root; if (!ucc_coll_args_is_mem_symmetric(args, rank)) { return UCC_MEMORY_TYPE_ASYMMETRIC; diff --git a/src/utils/ucc_coll_utils.h b/src/utils/ucc_coll_utils.h index c5cb2ef392..5b24f19cf2 100644 --- a/src/utils/ucc_coll_utils.h +++ b/src/utils/ucc_coll_utils.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See file LICENSE for terms. */ @@ -332,6 +332,6 @@ static inline size_t ucc_buffer_block_offset_aligned(size_t total_count, @param [in] args pointer to the collective args. @param [in] rank rank to check, used only for rooted collective operations. */ -int ucc_coll_args_is_predefined_dt(ucc_coll_args_t *args, ucc_rank_t rank); +int ucc_coll_args_is_predefined_dt(const ucc_coll_args_t *args, ucc_rank_t rank); #endif