diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c92b46f..78ae9c5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,12 +7,6 @@ NLCGLIB_SETUP_TARGET(test3) add_executable(test_traits test_traits.cpp) NLCGLIB_SETUP_TARGET(test_traits) -add_executable(test_kokkos test_kokkos.cpp) -NLCGLIB_SETUP_TARGET(test_kokkos) - -add_executable(test_view test_view.cpp) -NLCGLIB_SETUP_TARGET(test_view) - add_executable(test_gpu test_gpu.cpp) NLCGLIB_SETUP_TARGET(test_gpu) @@ -28,14 +22,14 @@ NLCGLIB_SETUP_TARGET(test_solver) add_executable(test_mvector_cuda test_mvector_cuda.cpp) NLCGLIB_SETUP_TARGET(test_mvector_cuda) -add_executable(test_deep_copy test_deep_copy.cpp) -NLCGLIB_SETUP_TARGET(test_deep_copy) - add_executable(test_smearing test_smearing.cpp) NLCGLIB_SETUP_TARGET(test_smearing) -add_executable(test_nvcc test_nvcc.cpp) -NLCGLIB_SETUP_TARGET(test_nvcc) - -add_executable(test_check_alignment test_check_alignment.cpp) -NLCGLIB_SETUP_TARGET(test_check_alignment) +if(USE_CUDA) + add_executable(test_deep_copy test_deep_copy.cpp) + NLCGLIB_SETUP_TARGET(test_deep_copy) + add_executable(test_view test_view.cpp) + NLCGLIB_SETUP_TARGET(test_view) + add_executable(test_nvcc test_nvcc.cpp) + NLCGLIB_SETUP_TARGET(test_nvcc) +endif() diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 0851f93..aa92e9f 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -6,6 +6,7 @@ #include #include +#ifdef __NLCGLIB__CUDA #include "cudaProfiler.h" using namespace nlcglib; @@ -71,3 +72,11 @@ int main(int argc, char *argv[]) Communicator::finalize(); return 0; } +#else +int main(int argc, char *argv[]) +{ + + return 0; +} + +#endif diff --git a/test/test_kokkos.cpp b/test/test_kokkos.cpp deleted file mode 100644 index f579547..0000000 --- a/test/test_kokkos.cpp +++ /dev/null @@ -1,170 +0,0 @@ -#include -#include -#include -/// to have exp available on device -#include - - -auto unmanaged() -{ - std::cout << "\nunmanaged\n"; - int n = 10; - double* A = new double[n*n]; - - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - A[n*i + j] = n * i + j; - } - } - - Kokkos::View > a_view( - A, n, n); - // how does it work with strides? - - for (int i = 0; i < n; ++i) { - for (int j = 0; j < n; ++j) { - std::cout << a_view(i, j) << " "; - } - std::cout << "\n"; - } - - return a_view; -} - - -auto unmanaged_strided() -{ - std::cout << "\nunmanaged_strided\n"; - int n = 10; // rows - int m = 10; // cols - int lda = 12; - - double* A; - posix_memalign(reinterpret_cast(&A), 256, lda*m*sizeof(double)); - for (int i = 0; i < n; ++i) { - for (int j = 0; j < m; ++j) { - A[j*lda + i] = i*n + j; - } - } - typedef Kokkos::View> - vector_t; - vector_t a_view(A, Kokkos::LayoutStride(n, 1, m, lda)); - - // int nelems = a_view.size(); - - for (int i = 0; i < n; ++i) { - for (int j = 0; j < m; ++j) { - std::cout << a_view(i, j) << " "; - } - std::cout << "\n"; - } - return a_view; -} - - -void kokkos_view_stuff() -{ - Kokkos::View view; - - view = Kokkos::View("nope", 100); - Kokkos::View view2("nope", 100); - - // accessing host memory space from Device will crash - // at runtime (not caught at compile time) - Kokkos::parallel_for("foo", Kokkos::RangePolicy(0, 100), - KOKKOS_LAMBDA (int i){ - double f = view2(i) * view(i); - view2(i) = f; - }); -} - - -template -void kokkos_reduction() -{ - using space = Kokkos::HostSpace; - using exec_space = Kokkos::Serial; - // using space = Kokkos::CudaSpace; - // using exec_space = Kokkos::Cuda; - - int n = 100; - Kokkos::View view("", n); - Kokkos::parallel_for("foo", Kokkos::RangePolicy(0, n), - KOKKOS_LAMBDA (int i) - { - view(i) = i; - } - ); - - numeric_t sum = 0; - Kokkos::parallel_reduce("summation", Kokkos::RangePolicy(0, view.size()), - KOKKOS_LAMBDA (int i, numeric_t& loc_sum) { loc_sum += view(i); }, sum); - std::cout << "Sum is: " << sum << "\n"; -} - -template -void -kokkos_reduction_cuda() -{ - // using space = Kokkos::HostSpace; - // using exec_space = Kokkos::Serial; - using space = Kokkos::CudaSpace; - using exec_space = Kokkos::Cuda; - - int n = 100; - Kokkos::View view("", n); - Kokkos::parallel_for( - "foo", Kokkos::RangePolicy(0, n), KOKKOS_LAMBDA(int i) { view(i) = i; }); - - numeric_t sum = 0; - Kokkos::parallel_reduce( - "summation", - Kokkos::RangePolicy(0, view.size()), - KOKKOS_LAMBDA(int i, numeric_t& loc_sum) { loc_sum += view(i); }, - sum); - std::cout << "Sum is: " << sum << "\n"; -} - - -struct fun -{ - __device__ __host__ double operator()(double x) const { return 1 / (1 + exp(x)); } -}; - -void test_template_lambda() -{ - // using space = Kokkos::HostSpace; - // using exec_space = Kokkos::Serial; - using space = Kokkos::CudaSpace; - using exec_space = Kokkos::Cuda; - using numeric_t = double; - - // int n = 100; - // Kokkos::View view("", n); - // Kokkos::parallel_for( - // "foo", Kokkos::RangePolicy(0, n), [view, f=fun()] __device__ __host__ (int i) { - // view(i) = f(i); - // }); -} - -int main(int argc, char *argv[]) -{ - Kokkos::initialize(); - auto x = unmanaged(); - auto x2 = unmanaged(); - unmanaged_strided(); - - std::cout << "trying reduction on cpu: " << "\n"; - kokkos_reduction(); - std::cout << "trying reduction on gpu: " << "\n"; - kokkos_reduction_cuda(); - // // test - // kokkos_view_stuff(); - - std::cout << x.data() << "\n"; - x = x2; - std::cout << x.data() << " (after assign x=x2)\n"; - std::cout << x2.data() << "\n"; - Kokkos::finalize(); - return 0; -} diff --git a/test/test_mvector_cuda.cpp b/test/test_mvector_cuda.cpp index 26e7579..4e0a5a2 100644 --- a/test/test_mvector_cuda.cpp +++ b/test/test_mvector_cuda.cpp @@ -4,7 +4,7 @@ #include "smearing.hpp" #include "la/mvector.hpp" - +#ifdef __NLCGLIB__CUDA void run() { int n = 10; @@ -22,14 +22,18 @@ void run() vector_t copy(a_view); - double foo = 1; + double f = 1; Kokkos::parallel_for( "scale", Kokkos::RangePolicy(0, a_view.size()), KOKKOS_LAMBDA(int i) { - a_view(i) = foo / sqrt(a_view(i)); + a_view(i) = f / sqrt(a_view(i)); }); } - +#else +void run() +{ +} +#endif int main(int argc, char *argv[]) diff --git a/test/test_solver.cpp b/test/test_solver.cpp index 2f7eb64..d90c362 100644 --- a/test/test_solver.cpp +++ b/test/test_solver.cpp @@ -71,9 +71,11 @@ int main(int argc, char *argv[]) std::cout << "run on HOST" << "\n"; run_unmanaged(); +#ifdef __NLCGLIB__CUDA std::cout << "run on DEVICE" << "\n"; run_unmanaged(); +#endif // std::cout << "run non GPU" << "\n"; // run_unmanaged(); diff --git a/unit_tests/local/test_la_wrappers.cpp b/unit_tests/local/test_la_wrappers.cpp index d2431f7..cbd495d 100644 --- a/unit_tests/local/test_la_wrappers.cpp +++ b/unit_tests/local/test_la_wrappers.cpp @@ -110,7 +110,7 @@ TEST_F(CPUKokkosVectors, TransformCPU) std::cout << "\n"; } - +#ifdef __NLCGLIB__CUDA class GPUKokkosVectors : public ::testing::Test { public: @@ -268,7 +268,7 @@ TEST(EigenValues, EigHermitian) EXPECT_NEAR(wh(i), eigs[i], 1e-8); } } - +#endif /* CUDA */ int main(int argc, char *argv[]) diff --git a/unit_tests/local/test_solver_wrappers.cpp b/unit_tests/local/test_solver_wrappers.cpp index 257f166..3ac66ad 100644 --- a/unit_tests/local/test_solver_wrappers.cpp +++ b/unit_tests/local/test_solver_wrappers.cpp @@ -47,7 +47,13 @@ void TestSymSolve::SetUp() } // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests + +#ifdef __NLCGLIB__CUDA using KokkosMemTypes = ::testing::Types; +#else +using KokkosMemTypes = ::testing::Types; +#endif + // TYPED_TEST_SUITE_P(TestSymSolve); TYPED_TEST_CASE(TestSymSolve, KokkosMemTypes);