Skip to content

Commit

Permalink
TEST: improve test output
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed Dec 26, 2023
1 parent c57422d commit 6e34b20
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 57 deletions.
25 changes: 1 addition & 24 deletions src/utils/ucc_coll_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,29 +123,6 @@ ucc_coll_args_get_displacement(const ucc_coll_args_t *args,
return ((uint32_t *)displacements)[idx];
}

static inline const char* ucc_mem_type_str(ucc_memory_type_t ct)
{
switch((int)ct) {
case UCC_MEMORY_TYPE_HOST:
return "Host";
case UCC_MEMORY_TYPE_CUDA:
return "Cuda";
case UCC_MEMORY_TYPE_CUDA_MANAGED:
return "CudaManaged";
case UCC_MEMORY_TYPE_ROCM:
return "Rocm";
case UCC_MEMORY_TYPE_ROCM_MANAGED:
return "RocmManaged";
case UCC_MEMORY_TYPE_ASYMMETRIC:
return "asymmetric";
case UCC_MEMORY_TYPE_NOT_APPLY:
return "n/a";
default:
break;
}
return "invalid";
}

static inline size_t
ucc_coll_args_get_total_count(const ucc_coll_args_t *args,
const ucc_count_t *counts, ucc_rank_t size)
Expand Down Expand Up @@ -248,7 +225,7 @@ ucc_status_t ucc_ep_map_create_nested(ucc_ep_map_t *base_map,
ucc_ep_map_t *sub_map,
ucc_ep_map_t *out);

ucc_status_t ucc_ep_map_is_identity(const ucc_ep_map_t *map);
int ucc_ep_map_is_identity(const ucc_ep_map_t *map);

void ucc_ep_map_destroy_nested(ucc_ep_map_t *out);

Expand Down
23 changes: 23 additions & 0 deletions src/utils/ucc_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,27 @@ static inline const char* ucc_reduction_op_str(ucc_reduction_op_t op)
}
}

static inline const char* ucc_mem_type_str(ucc_memory_type_t ct)
{
switch((int)ct) {
case UCC_MEMORY_TYPE_HOST:
return "Host";
case UCC_MEMORY_TYPE_CUDA:
return "Cuda";
case UCC_MEMORY_TYPE_CUDA_MANAGED:
return "CudaManaged";
case UCC_MEMORY_TYPE_ROCM:
return "Rocm";
case UCC_MEMORY_TYPE_ROCM_MANAGED:
return "RocmManaged";
case UCC_MEMORY_TYPE_ASYMMETRIC:
return "asymmetric";
case UCC_MEMORY_TYPE_NOT_APPLY:
return "n/a";
default:
break;
}
return "invalid";
}

#endif
129 changes: 110 additions & 19 deletions test/mpi/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <sstream>
#include <algorithm>
#include <chrono>
#include <iomanip>
#include "test_mpi.h"

int test_rand_seed = -1;
Expand Down Expand Up @@ -135,6 +136,23 @@ static ucc_test_mpi_team_t team_str_to_type(std::string team)
throw std::string("incorrect team type: ") + team;
}

static std::string team_type_to_str(ucc_test_mpi_team_t team)
{
switch (team) {
case TEAM_WORLD:
return "world";
case TEAM_SPLIT_HALF:
return "half";
case TEAM_SPLIT_ODD_EVEN:
return "odd_even";
case TEAM_REVERSE:
return "reverse";
default:
break;
}
throw std::string("incorrect team type: ");
}

static ucc_coll_type_t coll_str_to_type(std::string coll)
{
if (coll == "barrier") {
Expand Down Expand Up @@ -395,15 +413,52 @@ int init_rand_seed(int user_seed)
void print_info()
{
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
if (world_rank) {
return;
}
std::cout << "\n===== UCC MPI TEST INFO =======\n"
<< " seed : " << std::to_string(test_rand_seed) << "\n"
<< "===============================\n"
<< std::endl;

std::cout << "===== UCC MPI TEST INFO =======" << std::endl;
std::cout <<"seed: " << std::to_string(test_rand_seed) << std::endl;
std::cout <<"collectives: ";
for (const auto &c : colls) {
std::cout << ucc_coll_type_str(c);
if (c != colls.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}
std::cout <<"data types: ";
for (const auto &d : dtypes) {
std::cout << ucc_datatype_str(d);
if (d != dtypes.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}

std::cout <<"memory types: ";
for (const auto &m : mtypes) {
std::cout << ucc_mem_type_str(m);
if (m != mtypes.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}

std::cout <<"teams: ";
for (const auto &t : teams) {
std::cout << team_type_to_str(t);
if (t != teams.back()) {
std::cout << ", ";
} else {
std::cout << std::endl;
}
}
}

void ProcessArgs(int argc, char** argv)
Expand Down Expand Up @@ -519,8 +574,8 @@ void ProcessArgs(int argc, char** argv)

int main(int argc, char *argv[])
{
int failed = 0;
int total_done_skipped_failed[4] = {0};
int failed = 0;
int total_done_skipped_failed[ucc_ilog2(UCC_COLL_TYPE_LAST) + 1][4] = {0};
std::chrono::steady_clock::time_point begin;
int size, required, provided, completed, rank;
UccTestMpi *test;
Expand Down Expand Up @@ -623,19 +678,20 @@ int main(int argc, char *argv[])
}
std::cout << std::flush;

total_done_skipped_failed[0] = test->results.size();
for (auto s : test->results) {
switch(s) {
int coll_num = ucc_ilog2(std::get<0>(s));
switch(std::get<1>(s)) {
case UCC_OK:
total_done_skipped_failed[1]++;
total_done_skipped_failed[coll_num][1]++;
break;
case UCC_ERR_NOT_IMPLEMENTED:
case UCC_ERR_LAST:
total_done_skipped_failed[2]++;
total_done_skipped_failed[coll_num][2]++;
break;
default:
total_done_skipped_failed[3]++;
total_done_skipped_failed[coll_num][3]++;
}
total_done_skipped_failed[coll_num][0]++;
}
MPI_Iallreduce(MPI_IN_PLACE, total_done_skipped_failed,
sizeof(total_done_skipped_failed)/sizeof(int),
Expand All @@ -648,23 +704,58 @@ int main(int argc, char *argv[])
if (0 == rank) {
std::chrono::steady_clock::time_point end =
std::chrono::steady_clock::now();
ucc_coll_type_t coll_type;
int num_all = 0, num_skipped = 0, num_done =0, num_failed = 0;
std::ios iostate(nullptr);

iostate.copyfmt(std::cout);
std::cout << "\n===== UCC MPI TEST REPORT =====\n" <<
" total tests : " << total_done_skipped_failed[0] << "\n" <<
" passed : " << total_done_skipped_failed[1] << "\n" <<
" skipped : " << total_done_skipped_failed[2] << "\n" <<
" failed : " << total_done_skipped_failed[3] << "\n" <<
" elapsed : " <<
std::setw(22) << std::left << "collective" <<
std::setw(10) << std::right << "tests" <<
std::setw(10) << std::right << "passed" <<
std::setw(10) << std::right << "failed" <<
std::setw(10) << std::right << "skipped" << std::endl;

for (coll_type = (ucc_coll_type_t)1;
coll_type < UCC_COLL_TYPE_LAST;
coll_type = (ucc_coll_type_t)(coll_type << 1))
{
int coll_num = ucc_ilog2(coll_type);
if (total_done_skipped_failed[coll_num][0] == 0) {
continue;
}
num_all += total_done_skipped_failed[coll_num][0];
num_done += total_done_skipped_failed[coll_num][1];
num_skipped += total_done_skipped_failed[coll_num][2];
num_failed += total_done_skipped_failed[coll_num][3];
std::cout <<
std::setw(22) << std::left << ucc_coll_type_str(coll_type) <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][0] <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][1] <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][3] <<
std::setw(10) << std::right << total_done_skipped_failed[coll_num][2] <<
std::endl;

}
std::cout <<
" \n===== UCC MPI TEST SUMMARY =====\n" <<
"total tests: " << num_all << "\n" <<
"passed: " << num_done << "\n" <<
"skipped: " << num_skipped << "\n" <<
"failed: " << num_failed << "\n" <<
"elapsed: " <<
std::chrono::duration_cast<std::chrono::seconds>(end - begin).count()
<< "s" << std::endl;
std::cout.copyfmt(iostate);

/* check if all tests have been skipped */
if (total_done_skipped_failed[0] == total_done_skipped_failed[2]) {
if (num_all == num_skipped) {
std::cout << "\n All tests have been skipped, indicating most likely "
"a problem\n";
failed = 1;
}

if (total_done_skipped_failed[3] != 0) {
if (num_failed != 0) {
failed = 1;
}
}
Expand Down
20 changes: 10 additions & 10 deletions test/mpi/test_mpi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ void set_gpu_device(test_set_gpu_device_t set_device)

#endif

std::vector<ucc_status_t> UccTestMpi::exec_tests(
std::vector<ucc_test_mpi_result_t> UccTestMpi::exec_tests(
std::vector<std::shared_ptr<TestCase>> tcs, bool triggered,
bool persistent)
{
Expand All @@ -483,7 +483,7 @@ std::vector<ucc_status_t> UccTestMpi::exec_tests(
ucc_status_t status;

MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
std::vector<ucc_status_t> rst;
std::vector<ucc_test_mpi_result_t> rst;

for (i = 0; i < n_persistent; i++) {
for (auto tc: tcs) {
Expand All @@ -501,7 +501,7 @@ std::vector<ucc_status_t> UccTestMpi::exec_tests(
std::cout << "SKIPPED: " << skip_str(tc->test_skip) << ": "
<< tc->str() << " " << std::endl;
}
rst.push_back(UCC_ERR_LAST);
rst.push_back(std::make_tuple(tc->args.coll_type, UCC_ERR_LAST));
return rst;
}
}
Expand All @@ -528,14 +528,14 @@ std::vector<ucc_status_t> UccTestMpi::exec_tests(
if (UCC_OK != status) {
std::cerr << "FAILURE in: " << tc->str() << std::endl;
}
rst.push_back(status);
rst.push_back(std::make_tuple(tc->args.coll_type, status));
}
}
return rst;
}

void UccTestMpi::run_all_at_team(ucc_test_team_t &team,
std::vector<ucc_status_t> &rst)
std::vector<ucc_test_mpi_result_t> &rst)
{
TestCaseParams params;

Expand Down Expand Up @@ -586,7 +586,7 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team,
for (auto r : roots) {
for (auto mt: test_memtypes) {
if (triggered && !ucc_coll_triggered_supported(mt)) {
rst.push_back(UCC_ERR_NOT_IMPLEMENTED);
rst.push_back(std::make_tuple(c, UCC_ERR_NOT_IMPLEMENTED));
continue;
}

Expand Down Expand Up @@ -642,10 +642,10 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team,
}

typedef struct ucc_test_thread {
pthread_t thread;
int id;
UccTestMpi * test;
std::vector<ucc_status_t> rst;
pthread_t thread;
int id;
UccTestMpi * test;
std::vector<ucc_test_mpi_result_t> rst;
} ucc_test_thread_t;

static void *thread_start(void *arg)
Expand Down
10 changes: 6 additions & 4 deletions test/mpi/test_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ class TestCase {
size_t msgsize;
bool inplace;
bool persistent;
ucc_coll_args_t args;
ucc_coll_req_h req;
ucc_mc_buffer_header_t *sbuf_mc_header, *rbuf_mc_header;
void *sbuf;
Expand All @@ -279,6 +278,7 @@ class TestCase {
ucc_datatype_t dt;
int iter_persistent;
public:
ucc_coll_args_t args;
void mpi_progress(void);
test_skip_cause_t test_skip;
static std::shared_ptr<TestCase> init_single(
Expand All @@ -304,6 +304,7 @@ class TestCase {
MPI_Comm comm);
};

typedef std::tuple<ucc_coll_type_t, ucc_status_t> ucc_test_mpi_result_t;
class UccTestMpi {
ucc_thread_mode_t tm;
ucc_context_h ctx;
Expand Down Expand Up @@ -331,14 +332,15 @@ class UccTestMpi {
std::vector<int> gen_roots(ucc_test_team_t &team);
std::vector<ucc_test_vsize_flag_t> counts_vsize;
std::vector<ucc_test_vsize_flag_t> displs_vsize;
std::vector<ucc_status_t> exec_tests(
std::vector<ucc_test_mpi_result_t> exec_tests(
std::vector<std::shared_ptr<TestCase>> tcs,
bool triggered, bool persistent);
public:
std::vector<ucc_test_team_t> teams;
std::vector<ucc_test_team_t> onesided_teams;
void run_all_at_team(ucc_test_team_t &team, std::vector<ucc_status_t> &rst);
std::vector<ucc_status_t> results;
void run_all_at_team(ucc_test_team_t &team,
std::vector<ucc_test_mpi_result_t> &rst);
std::vector<ucc_test_mpi_result_t> results;
UccTestMpi(int argc, char *argv[], ucc_thread_mode_t tm, int is_local,
bool with_onesided);
~UccTestMpi();
Expand Down

0 comments on commit 6e34b20

Please sign in to comment.