Skip to content

Commit

Permalink
Add gtest changes to reduce for asymmetric mem
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Sarkauskas committed Jun 27, 2024
1 parent 36389f4 commit dc6dffe
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 30 deletions.
6 changes: 1 addition & 5 deletions src/coll_score/ucc_coll_score_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,7 @@ static ucc_status_t ucc_coll_score_map_lookup(ucc_score_map_t *map,
ucc_list_link_t *list;
ucc_msg_range_t *r;

if (mt == UCC_MEMORY_TYPE_ASYMMETRIC) {
/* TODO */
ucc_debug("asymmetric memory type is not supported");
return UCC_ERR_NOT_SUPPORTED;
} else if (mt == UCC_MEMORY_TYPE_NOT_APPLY) {
if (mt == UCC_MEMORY_TYPE_NOT_APPLY) {
/* Temporary solution: for Barrier, Fanin, Fanout - use
"host" range list */
mt = UCC_MEMORY_TYPE_HOST;
Expand Down
1 change: 0 additions & 1 deletion src/utils/ucc_coll_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ ucc_memory_type_t ucc_coll_args_mem_type(const ucc_coll_args_t *args,
return args->dst.info.mem_type;
case UCC_COLL_TYPE_ALLGATHERV:
case UCC_COLL_TYPE_REDUCE_SCATTERV:
return args->dst.info_v.mem_type;
case UCC_COLL_TYPE_ALLTOALLV:
return args->dst.info_v.mem_type;
case UCC_COLL_TYPE_REDUCE:
Expand Down
151 changes: 127 additions & 24 deletions test/gtest/coll/test_reduce.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,46 @@ class test_reduce : public UccCollArgs, public testing::Test {
coll->coll_type = UCC_COLL_TYPE_REDUCE;
coll->op = T::redop;
coll->root = root;

if (r != root || !inplace) {
coll->src.info.mem_type = mem_type;
ucc_memory_type_t src_mem_type = mem_type;

#ifdef HAVE_CUDA
if (mem_symmetry == TEST_MEM_ASYMMETRIC_SRC_MISMATCH) {
src_mem_type = ((mem_type == UCC_MEMORY_TYPE_CUDA) ?
UCC_MEMORY_TYPE_HOST : UCC_MEMORY_TYPE_CUDA);
}
#endif
coll->src.info.mem_type = src_mem_type;
coll->src.info.count = (ucc_count_t)count;
coll->src.info.datatype = dt;
UCC_CHECK(ucc_mc_alloc(&ctxs[r]->src_mc_header,
ucc_dt_size(dt) * count, mem_type));
ucc_dt_size(dt) * count, src_mem_type));
coll->src.info.buffer = ctxs[r]->src_mc_header->addr;
UCC_CHECK(ucc_mc_memcpy(coll->src.info.buffer,
ctxs[r]->init_buf,
ucc_dt_size(dt) * count, mem_type,
ucc_dt_size(dt) * count, src_mem_type,
UCC_MEMORY_TYPE_HOST));
}
if (r == root) {
coll->dst.info.mem_type = mem_type;
ucc_memory_type_t dst_mem_type = mem_type;

#ifdef HAVE_CUDA
if (mem_symmetry == TEST_MEM_ASYMMETRIC_DST_MISMATCH) {
dst_mem_type = ((mem_type == UCC_MEMORY_TYPE_CUDA) ?
UCC_MEMORY_TYPE_HOST : UCC_MEMORY_TYPE_CUDA);
}
#endif
coll->dst.info.mem_type = dst_mem_type;
coll->dst.info.count = (ucc_count_t)count;
coll->dst.info.datatype = dt;
UCC_CHECK(ucc_mc_alloc(&ctxs[r]->dst_mc_header,
ucc_dt_size(dt) * count, mem_type));
ucc_dt_size(dt) * count, dst_mem_type));
coll->dst.info.buffer = ctxs[r]->dst_mc_header->addr;
if (inplace) {
UCC_CHECK(ucc_mc_memcpy(coll->dst.info.buffer,
ctxs[r]->init_buf, ucc_dt_size(dt) * count,
mem_type, UCC_MEMORY_TYPE_HOST));
dst_mem_type, UCC_MEMORY_TYPE_HOST));
}
}
if (inplace) {
Expand Down Expand Up @@ -154,7 +171,7 @@ class test_reduce_cuda : public test_reduce<T> {};
TYPED_TEST_CASE(test_reduce_host, CollReduceTypeOpsHost);
TYPED_TEST_CASE(test_reduce_cuda, CollReduceTypeOpsCuda);

#define TEST_DECLARE(_mem_type, _inplace, _repeat, _persistent) \
#define TEST_DECLARE(_mem_type, _inplace, _repeat, _persistent, _mem_sym) \
{ \
std::array<int, 3> counts{4, 256, 65536}; \
CHECK_TYPE_OP_SKIP(TypeParam::dt, TypeParam::redop, _mem_type); \
Expand All @@ -164,6 +181,7 @@ TYPED_TEST_CASE(test_reduce_cuda, CollReduceTypeOpsCuda);
int size = team->procs.size(); \
UccCollCtxVec ctxs; \
SET_MEM_TYPE(_mem_type); \
SET_MEM_SYMMETRY(_mem_sym); \
this->set_inplace(_inplace); \
this->data_init(size, TypeParam::dt, count, ctxs, _persistent);\
UccReq req(team, ctxs); \
Expand All @@ -180,50 +198,117 @@ TYPED_TEST_CASE(test_reduce_cuda, CollReduceTypeOpsCuda);
}

TYPED_TEST(test_reduce_host, single) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0);
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_host, single_persistent) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1);
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_host, single_inplace) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0);
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_host, single_persistent_inplace) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1);
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1, TEST_MEM_SYMMETRIC);
}

#ifdef HAVE_CUDA

// Symmetric
TYPED_TEST(test_reduce_cuda, single) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_cuda, single_persistent) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1, TEST_MEM_SYMMETRIC);
}
TYPED_TEST(test_reduce_cuda, single_inplace) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_cuda, single_persistent_inplace) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1, TEST_MEM_SYMMETRIC);
}
TYPED_TEST(test_reduce_cuda, single_managed) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 1, 0);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 1, 0, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_cuda, single_persistent_managed) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 3, 1);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_NO_INPLACE, 3, 1, TEST_MEM_SYMMETRIC);
}
TYPED_TEST(test_reduce_cuda, single_inplace_managed) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 1, 0);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 1, 0, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_cuda, single_persistent_inplace_managed) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 3, 1);
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA_MANAGED, TEST_INPLACE, 3, 1, TEST_MEM_SYMMETRIC);
}

// Asymmetric src mismatch CUDA
TYPED_TEST(test_reduce_cuda, single_asymmetric_src_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_src_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}
TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_src_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_src_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

// Asymmetric dst mismatch CUDA
TYPED_TEST(test_reduce_cuda, single_asymmetric_dst_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_dst_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}
TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_dst_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_dst_mismatch_cuda) {
TEST_DECLARE(UCC_MEMORY_TYPE_CUDA, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

// Asymmetric src mismatch HOST
TYPED_TEST(test_reduce_cuda, single_asymmetric_src_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_src_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}
TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_src_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_src_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

// Asymmetric dst mismatch HOST
TYPED_TEST(test_reduce_cuda, single_asymmetric_dst_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_asymmetric_dst_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_NO_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}
TYPED_TEST(test_reduce_cuda, single_inplace_asymmetric_dst_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 1, 0, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

TYPED_TEST(test_reduce_cuda, single_persistent_inplace_asymmetric_dst_mismatch_host) {
TEST_DECLARE(UCC_MEMORY_TYPE_HOST, TEST_INPLACE, 3, 1, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

#endif

#define TEST_DECLARE_MULTIPLE(_mem_type, _inplace) \
Expand Down Expand Up @@ -286,7 +371,7 @@ template <typename T> class test_reduce_dbt : public test_reduce<T> {
template <typename T> class test_reduce_2step : public test_reduce<T> {
};

#define TEST_DECLARE_WITH_ENV(_env, _n_procs, _persistent) \
#define TEST_DECLARE_WITH_ENV(_env, _n_procs, _persistent, _mem_sym) \
{ \
UccJob job(_n_procs, UccJob::UCC_JOB_CTX_GLOBAL, _env); \
UccTeam_h team = job.create_team(_n_procs); \
Expand All @@ -304,6 +389,7 @@ template <typename T> class test_reduce_2step : public test_reduce<T> {
for (auto m : mt) { \
CHECK_TYPE_OP_SKIP(TypeParam::dt, TypeParam::redop, m); \
SET_MEM_TYPE(m); \
SET_MEM_SYMMETRY(_mem_sym); \
this->set_inplace(inplace); \
this->data_init(_n_procs, TypeParam::dt, count, ctxs, \
_persistent); \
Expand Down Expand Up @@ -332,17 +418,34 @@ ucc_job_env_t reduce_2step_env = {{"UCC_CL_HIER_TUNE", "reduce:@2step:0-inf:inf"
{"UCC_CLS", "all"}};

TYPED_TEST(test_reduce_avg_order, avg_post_op) {
TEST_DECLARE_WITH_ENV(post_op_env, 15, true);
TEST_DECLARE_WITH_ENV(post_op_env, 15, true, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_dbt, reduce_dbt_shift) {
TEST_DECLARE_WITH_ENV(reduce_dbt_env, 15, true);
TEST_DECLARE_WITH_ENV(reduce_dbt_env, 15, true, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_dbt, reduce_dbt_mirror) {
TEST_DECLARE_WITH_ENV(reduce_dbt_env, 16, true);
TEST_DECLARE_WITH_ENV(reduce_dbt_env, 16, true, TEST_MEM_SYMMETRIC);
}

TYPED_TEST(test_reduce_2step, 2step) {
TEST_DECLARE_WITH_ENV(reduce_2step_env, 16, false);
TEST_DECLARE_WITH_ENV(reduce_2step_env, 16, false, TEST_MEM_SYMMETRIC);
}

// Asymmetric memory
TYPED_TEST(test_reduce_avg_order, avg_post_op_asymmetric) {
TEST_DECLARE_WITH_ENV(post_op_env, 15, true, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

TYPED_TEST(test_reduce_dbt, reduce_dbt_shift_asymmetric) {
TEST_DECLARE_WITH_ENV(reduce_dbt_env, 15, true, TEST_MEM_ASYMMETRIC_SRC_MISMATCH);
}

TYPED_TEST(test_reduce_dbt, reduce_dbt_mirror_asymmetric) {
TEST_DECLARE_WITH_ENV(reduce_dbt_env, 16, true, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}

TYPED_TEST(test_reduce_2step, 2step_asymmetric) {
TEST_DECLARE_WITH_ENV(reduce_2step_env, 16, false, TEST_MEM_ASYMMETRIC_DST_MISMATCH);
}
6 changes: 6 additions & 0 deletions test/gtest/common/test_ucc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,12 @@ void UccCollArgs::set_inplace(gtest_ucc_inplace_t _inplace)
inplace = _inplace;
}

void UccCollArgs::set_mem_symmetry(gtest_ucc_mem_symmetry_t _mem_symmetry)
{
ucc_assert(!inplace);
mem_symmetry = _mem_symmetry;
}

void clear_buffer(void *_buf, size_t size, ucc_memory_type_t mt, uint8_t value)
{
void *buf = _buf;
Expand Down
19 changes: 19 additions & 0 deletions test/gtest/common/test_ucc.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,17 @@ typedef enum {
TEST_INPLACE
} gtest_ucc_inplace_t;

typedef enum {
TEST_MEM_SYMMETRIC, /* src/dst mem types match */
TEST_MEM_ASYMMETRIC_SRC_MISMATCH, /* src != mem_type */
TEST_MEM_ASYMMETRIC_DST_MISMATCH, /* dst != mem_type */
} gtest_ucc_mem_symmetry_t;

class UccCollArgs {
protected:
ucc_memory_type_t mem_type;
gtest_ucc_inplace_t inplace;
gtest_ucc_mem_symmetry_t mem_symmetry;
void alltoallx_init_buf(int src_rank, int dst_rank, uint8_t *buf, size_t len)
{
for (int i = 0; i < len; i++) {
Expand All @@ -65,6 +72,7 @@ class UccCollArgs {
// defaults
mem_type = UCC_MEMORY_TYPE_HOST;
inplace = TEST_NO_INPLACE;
mem_symmetry = TEST_MEM_SYMMETRIC;
}
virtual ~UccCollArgs() {}
virtual void data_init(int nprocs, ucc_datatype_t dtype,
Expand All @@ -74,6 +82,7 @@ class UccCollArgs {
virtual bool data_validate(UccCollCtxVec args) = 0;
void set_mem_type(ucc_memory_type_t _mt);
void set_inplace(gtest_ucc_inplace_t _inplace);
void set_mem_symmetry(gtest_ucc_mem_symmetry_t mem_symmetry);
};

#define SET_MEM_TYPE(_mt) do { \
Expand All @@ -83,6 +92,16 @@ class UccCollArgs {
this->mem_type = _mt; \
} while (0)

#define SET_MEM_SYMMETRY(_sym) do { \
if (_sym != TEST_MEM_SYMMETRIC && \
(UCC_OK != ucc_mc_available(UCC_MEMORY_TYPE_CUDA) || \
UCC_OK != ucc_mc_available(UCC_MEMORY_TYPE_HOST) || \
this->inplace)) { \
GTEST_SKIP(); \
} \
this->mem_symmetry = _sym; \
} while (0)

class ThreadAllgather;
class ThreadAllgatherReq {
public:
Expand Down
1 change: 1 addition & 0 deletions test/gtest/core/test_mc_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class test_mc_cuda : public ucc::test {
};

if (UCC_OK != ucc_mc_available(UCC_MEMORY_TYPE_CUDA)) {
printf("nick cuda mc not avail\n");
GTEST_SKIP();
}

Expand Down

0 comments on commit dc6dffe

Please sign in to comment.