From dc6dffecafc25df4b63314d214c1d1ce36cd6554 Mon Sep 17 00:00:00 2001 From: Nicholas Sarkauskas Date: Fri, 28 Jun 2024 00:22:46 +0300 Subject: [PATCH] Add gtest changes to reduce for asymmetric mem --- src/coll_score/ucc_coll_score_map.c | 6 +- src/utils/ucc_coll_utils.c | 1 - test/gtest/coll/test_reduce.cc | 151 +++++++++++++++++++++++----- test/gtest/common/test_ucc.cc | 6 ++ test/gtest/common/test_ucc.h | 19 ++++ test/gtest/core/test_mc_cuda.cc | 1 + 6 files changed, 154 insertions(+), 30 deletions(-) diff --git a/src/coll_score/ucc_coll_score_map.c b/src/coll_score/ucc_coll_score_map.c index 037476efb2..89dfb51f3d 100644 --- a/src/coll_score/ucc_coll_score_map.c +++ b/src/coll_score/ucc_coll_score_map.c @@ -87,11 +87,7 @@ static ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map, ucc_list_link_t *list; ucc_msg_range_t *r; - if (mt == UCC_MEMORY_TYPE_ASYMMETRIC) { - /* TODO */ - ucc_debug("asymmetric memory type is not supported"); - return UCC_ERR_NOT_SUPPORTED; - } else if (mt == UCC_MEMORY_TYPE_NOT_APPLY) { + if (mt == UCC_MEMORY_TYPE_NOT_APPLY) { /* Temporary solution: for Barrier, Fanin, Fanout - use "host" range list */ mt = UCC_MEMORY_TYPE_HOST; diff --git a/src/utils/ucc_coll_utils.c b/src/utils/ucc_coll_utils.c index 533a9e4fb3..9da8cd5da5 100644 --- a/src/utils/ucc_coll_utils.c +++ b/src/utils/ucc_coll_utils.c @@ -180,7 +180,6 @@ ucc_memory_type_t ucc_coll_args_mem_type(const ucc_coll_args_t *args, return args->dst.info.mem_type; case UCC_COLL_TYPE_ALLGATHERV: case UCC_COLL_TYPE_REDUCE_SCATTERV: - return args->dst.info_v.mem_type; case UCC_COLL_TYPE_ALLTOALLV: return args->dst.info_v.mem_type; case UCC_COLL_TYPE_REDUCE: diff --git a/test/gtest/coll/test_reduce.cc b/test/gtest/coll/test_reduce.cc index 2fb1cbc963..51fefc474e 100644 --- a/test/gtest/coll/test_reduce.cc +++ b/test/gtest/coll/test_reduce.cc @@ -43,29 +43,46 @@ class test_reduce : public UccCollArgs, public testing::Test { coll->coll_type = UCC_COLL_TYPE_REDUCE; coll->op = T::redop; coll->root = root; + if (r != root || !inplace) { - coll->src.info.mem_type = mem_type; + ucc_memory_type_t src_mem_type = mem_type; + +#ifdef HAVE_CUDA + if (mem_symmetry == TEST_MEM_ASYMMETRIC_SRC_MISMATCH) { + src_mem_type = ((mem_type == UCC_MEMORY_TYPE_CUDA) ? + UCC_MEMORY_TYPE_HOST : UCC_MEMORY_TYPE_CUDA); + } +#endif + coll->src.info.mem_type = src_mem_type; coll->src.info.count = (ucc_count_t)count; coll->src.info.datatype = dt; UCC_CHECK(ucc_mc_alloc(&ctxs[r]->src_mc_header, - ucc_dt_size(dt) * count, mem_type)); + ucc_dt_size(dt) * count, src_mem_type)); coll->src.info.buffer = ctxs[r]->src_mc_header->addr; UCC_CHECK(ucc_mc_memcpy(coll->src.info.buffer, ctxs[r]->init_buf, - ucc_dt_size(dt) * count, mem_type, + ucc_dt_size(dt) * count, src_mem_type, UCC_MEMORY_TYPE_HOST)); } if (r == root) { - coll->dst.info.mem_type = mem_type; + ucc_memory_type_t dst_mem_type = mem_type; + +#ifdef HAVE_CUDA + if (mem_symmetry == TEST_MEM_ASYMMETRIC_DST_MISMATCH) { + dst_mem_type = ((mem_type == UCC_MEMORY_TYPE_CUDA) ? + UCC_MEMORY_TYPE_HOST : UCC_MEMORY_TYPE_CUDA); + } +#endif + coll->dst.info.mem_type = dst_mem_type; coll->dst.info.count = (ucc_count_t)count; coll->dst.info.datatype = dt; UCC_CHECK(ucc_mc_alloc(&ctxs[r]->dst_mc_header, - ucc_dt_size(dt) * count, mem_type)); + ucc_dt_size(dt) * count, dst_mem_type)); coll->dst.info.buffer = ctxs[r]->dst_mc_header->addr; if (inplace) { UCC_CHECK(ucc_mc_memcpy(coll->dst.info.buffer, ctxs[r]->init_buf, ucc_dt_size(dt) * count, - mem_type, UCC_MEMORY_TYPE_HOST)); + dst_mem_type, UCC_MEMORY_TYPE_HOST)); } } if (inplace) { @@ -154,7 +171,7 @@ class test_reduce_cuda : public test_reduce {}; TYPED_TEST_CASE(test_reduce_host, CollReduceTypeOpsHost); TYPED_TEST_CASE(test_reduce_cuda, CollReduceTypeOpsCuda); -#define TEST_DECLARE(_mem_type, _inplace, _repeat, _persistent) \ +#define TEST_DECLARE(_mem_type, _inplace, _repeat, _persistent, _mem_sym) \ { \ std::array counts{4, 256, 65536}; \ CHECK_TYPE_OP_SKIP(TypeParam::dt, TypeParam::redop, _mem_type); \ @@ -164,6 +181,7 @@ TYPED_TEST_CASE(test_reduce_cuda, CollReduceTypeOpsCuda); int size = team->procs.size(); \ UccCollCtxVec ctxs; \ SET_MEM_TYPE(_mem_type); \ + SET_MEM_SYMMETRY(_mem_sym); \ this->set_inplace(_inplace); \ this->data_init(size, TypeParam::dt, count, ctxs, _persistent);\ UccReq req(team, ctxs); \ @@ -180,50 +198,117 @@ TYPED_TEST_CASE(test_reduce_cuda, CollReduceTypeOpsCuda); } TYPED_TEST(test_reduce_host, single) { - TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0); + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_host, single_persistent) { - TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1); + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_host, single_inplace) { - TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0); + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_host, single_persistent_inplace) { - TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1); + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1, TEST_MEM_SYMMETRIC); } #ifdef HAVE_CUDA + +// Symmetric TYPED_TEST(test_reduce_cuda, single) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_persistent) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_inplace) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_persistent_inplace) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_managed) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 1, 0); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 1, 0, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_persistent_managed) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 3, 1); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 3, 1, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_inplace_managed) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 1, 0); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 1, 0, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_cuda, single_persistent_inplace_managed) { - TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 3, 1); + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 3, 1, TEST_MEM_SYMMETRIC); +} + +// Asymmetric src mismatch CUDA +TYPED_TEST(test_reduce_cuda, single_asymmetric_src_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_src_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} +TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_src_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_src_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +// Asymmetric dst mismatch CUDA +TYPED_TEST(test_reduce_cuda, single_asymmetric_dst_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_dst_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} +TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_dst_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_dst_mismatch_cuda) { + TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH); } + +// Asymmetric src mismatch HOST +TYPED_TEST(test_reduce_cuda, single_asymmetric_src_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_src_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} +TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_src_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_src_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +// Asymmetric dst mismatch HOST +TYPED_TEST(test_reduce_cuda, single_asymmetric_dst_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_dst_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} +TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_dst_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} + +TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_dst_mismatch_host) { + TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} + #endif #define TEST_DECLARE_MULTIPLE(_mem_type, _inplace) \ @@ -286,7 +371,7 @@ template class test_reduce_dbt : public test_reduce { template class test_reduce_2step : public test_reduce { }; -#define TEST_DECLARE_WITH_ENV(_env, _n_procs, _persistent) \ +#define TEST_DECLARE_WITH_ENV(_env, _n_procs, _persistent, _mem_sym) \ { \ UccJob job(_n_procs, UccJob::UCC_JOB_CTX_GLOBAL, _env); \ UccTeam_h team = job.create_team(_n_procs); \ @@ -304,6 +389,7 @@ template class test_reduce_2step : public test_reduce { for (auto m : mt) { \ CHECK_TYPE_OP_SKIP(TypeParam::dt, TypeParam::redop, m); \ SET_MEM_TYPE(m); \ + SET_MEM_SYMMETRY(_mem_sym); \ this->set_inplace(inplace); \ this->data_init(_n_procs, TypeParam::dt, count, ctxs, \ _persistent); \ @@ -332,17 +418,34 @@ ucc_job_env_t reduce_2step_env = {{"UCC_CL_HIER_TUNE", "reduce:@2step:0-inf:inf" {"UCC_CLS", "all"}}; TYPED_TEST(test_reduce_avg_order, avg_post_op) { - TEST_DECLARE_WITH_ENV(post_op_env, 15, true); + TEST_DECLARE_WITH_ENV(post_op_env, 15, true, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_dbt, reduce_dbt_shift) { - TEST_DECLARE_WITH_ENV(reduce_dbt_env, 15, true); + TEST_DECLARE_WITH_ENV(reduce_dbt_env, 15, true, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_dbt, reduce_dbt_mirror) { - TEST_DECLARE_WITH_ENV(reduce_dbt_env, 16, true); + TEST_DECLARE_WITH_ENV(reduce_dbt_env, 16, true, TEST_MEM_SYMMETRIC); } TYPED_TEST(test_reduce_2step, 2step) { - TEST_DECLARE_WITH_ENV(reduce_2step_env, 16, false); + TEST_DECLARE_WITH_ENV(reduce_2step_env, 16, false, TEST_MEM_SYMMETRIC); +} + +// Asymmetric memory +TYPED_TEST(test_reduce_avg_order, avg_post_op_asymmetric) { + TEST_DECLARE_WITH_ENV(post_op_env, 15, true, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +TYPED_TEST(test_reduce_dbt, reduce_dbt_shift_asymmetric) { + TEST_DECLARE_WITH_ENV(reduce_dbt_env, 15, true, TEST_MEM_ASYMMETRIC_SRC_MISMATCH); +} + +TYPED_TEST(test_reduce_dbt, reduce_dbt_mirror_asymmetric) { + TEST_DECLARE_WITH_ENV(reduce_dbt_env, 16, true, TEST_MEM_ASYMMETRIC_DST_MISMATCH); +} + +TYPED_TEST(test_reduce_2step, 2step_asymmetric) { + TEST_DECLARE_WITH_ENV(reduce_2step_env, 16, false, TEST_MEM_ASYMMETRIC_DST_MISMATCH); } diff --git a/test/gtest/common/test_ucc.cc b/test/gtest/common/test_ucc.cc index 40b51c1f56..bf5232bff1 100644 --- a/test/gtest/common/test_ucc.cc +++ b/test/gtest/common/test_ucc.cc @@ -707,6 +707,12 @@ void UccCollArgs::set_inplace(gtest_ucc_inplace_t _inplace) inplace = _inplace; } +void UccCollArgs::set_mem_symmetry(gtest_ucc_mem_symmetry_t _mem_symmetry) +{ + ucc_assert(!inplace); + mem_symmetry = _mem_symmetry; +} + void clear_buffer(void *_buf, size_t size, ucc_memory_type_t mt, uint8_t value) { void *buf = _buf; diff --git a/test/gtest/common/test_ucc.h b/test/gtest/common/test_ucc.h index f16e014b54..bc760dad5e 100644 --- a/test/gtest/common/test_ucc.h +++ b/test/gtest/common/test_ucc.h @@ -36,10 +36,17 @@ typedef enum { TEST_INPLACE } gtest_ucc_inplace_t; +typedef enum { + TEST_MEM_SYMMETRIC, /* src/dst mem types match */ + TEST_MEM_ASYMMETRIC_SRC_MISMATCH, /* src != mem_type */ + TEST_MEM_ASYMMETRIC_DST_MISMATCH, /* dst != mem_type */ +} gtest_ucc_mem_symmetry_t; + class UccCollArgs { protected: ucc_memory_type_t mem_type; gtest_ucc_inplace_t inplace; + gtest_ucc_mem_symmetry_t mem_symmetry; void alltoallx_init_buf(int src_rank, int dst_rank, uint8_t *buf, size_t len) { for (int i = 0; i < len; i++) { @@ -65,6 +72,7 @@ class UccCollArgs { // defaults mem_type = UCC_MEMORY_TYPE_HOST; inplace = TEST_NO_INPLACE; + mem_symmetry = TEST_MEM_SYMMETRIC; } virtual ~UccCollArgs() {} virtual void data_init(int nprocs, ucc_datatype_t dtype, @@ -74,6 +82,7 @@ class UccCollArgs { virtual bool data_validate(UccCollCtxVec args) = 0; void set_mem_type(ucc_memory_type_t _mt); void set_inplace(gtest_ucc_inplace_t _inplace); + void set_mem_symmetry(gtest_ucc_mem_symmetry_t mem_symmetry); }; #define SET_MEM_TYPE(_mt) do { \ @@ -83,6 +92,16 @@ class UccCollArgs { this->mem_type = _mt; \ } while (0) +#define SET_MEM_SYMMETRY(_sym) do { \ + if (_sym != TEST_MEM_SYMMETRIC && \ + (UCC_OK != ucc_mc_available(UCC_MEMORY_TYPE_CUDA) || \ + UCC_OK != ucc_mc_available(UCC_MEMORY_TYPE_HOST) || \ + this->inplace)) { \ + GTEST_SKIP(); \ + } \ + this->mem_symmetry = _sym; \ + } while (0) + class ThreadAllgather; class ThreadAllgatherReq { public: diff --git a/test/gtest/core/test_mc_cuda.cc b/test/gtest/core/test_mc_cuda.cc index 867b268f3d..e2ff35bb34 100644 --- a/test/gtest/core/test_mc_cuda.cc +++ b/test/gtest/core/test_mc_cuda.cc @@ -62,6 +62,7 @@ class test_mc_cuda : public ucc::test { }; if (UCC_OK != ucc_mc_available(UCC_MEMORY_TYPE_CUDA)) { + printf("nick cuda mc not avail\n"); GTEST_SKIP(); }