diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 9abd8451..35208cff 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -35,6 +35,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); @@ -53,6 +55,8 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan1d(&(*plan), nx, type, howmany); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan1d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -78,6 +82,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); @@ -96,6 +102,8 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan2d(&(*plan), nx, ny, type); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan2d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -121,6 +129,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + plan = std::make_unique(); cufftResult cufft_rt = cufftCreate(&(*plan)); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); @@ -141,6 +151,8 @@ auto create_plan(const ExecutionSpace& exec_space, cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan3d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -170,7 +182,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_cufft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type::type(); @@ -198,6 +212,7 @@ auto create_plan(const ExecutionSpace& exec_space, out_extents.data(), ostride, odist, type, howmany); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlanMany failed"); + Kokkos::Profiling::popRegion(); return fft_size; } @@ -206,7 +221,9 @@ template , std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_cufft]"); cufftDestroy(*plan); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 83f0cb45..f047230c 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -13,42 +13,54 @@ namespace Impl { template inline void exec_plan(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecR2C failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecD2Z failed"); } template inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2R failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2D failed"); } template inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2C failed"); } template inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index c94ed23e..c520c1d5 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -35,6 +35,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); @@ -53,6 +55,8 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan1d(&(*plan), nx, type, howmany); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan1d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -78,6 +82,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); @@ -96,6 +102,8 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan2d(&(*plan), nx, ny, type); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan2d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -121,6 +129,8 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + plan = std::make_unique(); hipfftResult hipfft_rt = hipfftCreate(&(*plan)); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); @@ -141,6 +151,8 @@ auto create_plan(const ExecutionSpace& exec_space, hipfft_rt = hipfftPlan3d(&(*plan), nx, ny, nz, type); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan3d failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -170,7 +182,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_hipfft]"); + const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type::type(); @@ -198,6 +212,7 @@ auto create_plan(const ExecutionSpace& exec_space, out_extents.data(), ostride, odist, type, howmany); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlanMany failed"); + Kokkos::Profiling::popRegion(); return fft_size; } @@ -206,7 +221,9 @@ template , std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_hipfft]"); hipfftDestroy(*plan); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index 6e131150..26fc6836 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -13,42 +13,54 @@ namespace Impl { template inline void exec_plan(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_cufft]"); hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecR2C failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleReal* idata, hipfftDoubleComplex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecD2Z failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2R failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleReal* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2D failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2C failed"); } template inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_hipfft]"); hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 7b66522e..4ce21583 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -54,8 +54,10 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - const int rank = fft_rank; + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_fftw]"); + + const int rank = fft_rank; init_threads>( exec_space); @@ -109,6 +111,8 @@ auto create_plan(const ExecutionSpace& exec_space, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -116,11 +120,13 @@ template , std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_fftw]"); if constexpr (std::is_same_v) { fftwf_destroy_plan(*plan); } else { fftw_destroy_plan(*plan); } + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Host_transform.hpp b/fft/src/KokkosFFT_Host_transform.hpp index 4dfc04bb..147461b9 100644 --- a/fft/src/KokkosFFT_Host_transform.hpp +++ b/fft/src/KokkosFFT_Host_transform.hpp @@ -12,37 +12,49 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, fftwf_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft_r2c(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, double* idata, fftw_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft_r2c(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftwf_complex* idata, float* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft_c2r(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftw_complex* idata, double* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft_c2r(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, fftwf_complex* idata, fftwf_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftwf_execute_dft(plan, idata, odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType plan, fftw_complex* idata, fftw_complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_fftw]"); fftw_execute_dft(plan, idata, odata); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index e1b115e9..fed385e0 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -109,6 +109,9 @@ auto create_plan(const ExecutionSpace& exec_space, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_rocfft]"); + constexpr auto type = KokkosFFT::Impl::transform_type::type(); @@ -198,6 +201,8 @@ auto create_plan(const ExecutionSpace& exec_space, KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_plan_description_destroy failed"); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -206,8 +211,12 @@ template = nullptr> void destroy_plan_and_info(std::unique_ptr& plan, InfoType& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_rocfft]"); + rocfft_execution_info_destroy(execution_info); rocfft_plan_destroy(*plan); + + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_transform.hpp b/fft/src/KokkosFFT_ROCM_transform.hpp index 2c6d50b8..11f1a63e 100644 --- a/fft/src/KokkosFFT_ROCM_transform.hpp +++ b/fft/src/KokkosFFT_ROCM_transform.hpp @@ -14,8 +14,10 @@ namespace Impl { inline void exec_plan(rocfft_plan& plan, float* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for R2C failed"); } @@ -23,8 +25,10 @@ inline void exec_plan(rocfft_plan& plan, float* idata, inline void exec_plan(rocfft_plan& plan, double* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for D2Z failed"); } @@ -32,8 +36,10 @@ inline void exec_plan(rocfft_plan& plan, double* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, float* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2R failed"); } @@ -41,8 +47,10 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, double* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2D failed"); } @@ -50,8 +58,10 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2C failed"); } @@ -59,8 +69,10 @@ inline void exec_plan(rocfft_plan& plan, std::complex* idata, inline void exec_plan(rocfft_plan& plan, std::complex* idata, std::complex* odata, int /*direction*/, const rocfft_execution_info& execution_info) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_rocfft]"); rocfft_status status = rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + Kokkos::Profiling::popRegion(); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2Z failed"); } diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index fa9d232c..a1ead852 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -68,6 +68,8 @@ auto create_plan(const ExecutionSpace& exec_space, InViewType::rank() >= fft_rank, "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); + Kokkos::Profiling::pushRegion("KokkosFFT::create_plan[TPL_oneMKL]"); + auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, @@ -107,6 +109,8 @@ auto create_plan(const ExecutionSpace& exec_space, sycl::queue q = exec_space.sycl_queue(); plan->commit(q); + Kokkos::Profiling::popRegion(); + return fft_size; } @@ -116,6 +120,8 @@ template < std::nullptr_t> = nullptr> void destroy_plan_and_info(std::unique_ptr&, InfoType&) { // In oneMKL, plans are destroybed by destructor + Kokkos::Profiling::pushRegion("KokkosFFT::destroy_plan[TPL_oneMKL]"); + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp index bd85ec7f..0ee67b2e 100644 --- a/fft/src/KokkosFFT_SYCL_transform.hpp +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -13,49 +13,61 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, std::complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, double* idata, std::complex* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, float* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, double* odata, int /*direction*/, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { oneapi::mkl::dft::compute_backward(plan, idata, odata); } + Kokkos::Profiling::popRegion(); } template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { + Kokkos::Profiling::pushRegion("KokkosFFT::exec_plan[TPL_oneMKL]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { oneapi::mkl::dft::compute_backward(plan, idata, odata); } + Kokkos::Profiling::popRegion(); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index b23886ce..629f8560 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -35,11 +35,14 @@ void fft(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 1, "fft: View rank must be larger than or equal to 1"); + + Kokkos::Profiling::pushRegion("KokkosFFT::fft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axis, n); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT in backward direction @@ -65,11 +68,14 @@ void ifft(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 1, "ifft: View rank must be larger than or equal to 1"); + + Kokkos::Profiling::pushRegion("KokkosFFT::ifft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axis, n); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT for real input @@ -103,9 +109,12 @@ void rfft(const ExecutionSpace& exec_space, const InViewType& in, "rfft: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft: OutViewType must be complex"); + + Kokkos::Profiling::pushRegion("KokkosFFT::rfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); fft(exec_space, in, out, norm, axis, n); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfft @@ -140,9 +149,12 @@ void irfft(const ExecutionSpace& exec_space, const InViewType& in, "irfft: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft: OutViewType must be real"); + + Kokkos::Profiling::pushRegion("KokkosFFT::irfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); ifft(exec_space, in, out, norm, axis, n); + Kokkos::Profiling::popRegion(); } /// \brief One dimensional FFT of a signal that has Hermitian symmetry @@ -178,6 +190,8 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, "hfft: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "hfft: OutViewType must be real"); + + Kokkos::Profiling::pushRegion("KokkosFFT::hfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -187,6 +201,7 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, InViewType in_conj; KokkosFFT::Impl::conjugate(exec_space, in, in_conj); irfft(exec_space, in_conj, out, new_norm, axis, n); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of hfft @@ -220,6 +235,8 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, "ihfft: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "ihfft: OutViewType must be complex"); + + Kokkos::Profiling::pushRegion("KokkosFFT::ihfft"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axis_type<1>({axis})), "axes are invalid for in/out views"); auto new_norm = KokkosFFT::Impl::swap_direction(norm); @@ -227,6 +244,7 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, rfft(exec_space, in, out, new_norm, axis, n); KokkosFFT::Impl::conjugate(exec_space, out, out_conj); Kokkos::deep_copy(exec_space, out, out_conj); + Kokkos::Profiling::popRegion(); } // 2D FFT @@ -253,11 +271,14 @@ void fft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "fft2: View rank must be larger than or equal to 2"); + + Kokkos::Profiling::pushRegion("KokkosFFT::fft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief Two dimensional FFT in backward direction @@ -283,11 +304,13 @@ void ifft2(const ExecutionSpace& exec_space, const InViewType& in, "and OutViewType."); static_assert(InViewType::rank() >= 2, "ifft2: View rank must be larger than or equal to 2"); + Kokkos::Profiling::pushRegion("KokkosFFT::ifft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief Two dimensional FFT for real input @@ -321,9 +344,11 @@ void rfft2(const ExecutionSpace& exec_space, const InViewType& in, "rfft2: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfft2: OutViewType must be complex"); + Kokkos::Profiling::pushRegion("KokkosFFT::rfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fft2(exec_space, in, out, norm, axes, s); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfft2 @@ -357,9 +382,11 @@ void irfft2(const ExecutionSpace& exec_space, const InViewType& in, "irfft2: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfft2: OutViewType must be real"); + Kokkos::Profiling::pushRegion("KokkosFFT::irfft2"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifft2(exec_space, in, out, norm, axes, s); + Kokkos::Profiling::popRegion(); } // ND FFT @@ -398,11 +425,14 @@ void fftn( static_assert( InViewType::rank() >= DIM, "fftn: View rank must be larger than or equal to the Rank of FFT axes"); + + Kokkos::Profiling::pushRegion("KokkosFFT::fftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::forward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of fftn @@ -441,11 +471,14 @@ void ifftn( static_assert( InViewType::rank() >= DIM, "ifftn: View rank must be larger than or equal to the Rank of FFT axes"); + + Kokkos::Profiling::pushRegion("KokkosFFT::ifftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); KokkosFFT::Plan plan(exec_space, in, out, KokkosFFT::Direction::backward, axes, s); plan.execute(in, out, norm); + Kokkos::Profiling::popRegion(); } /// \brief N-dimensional FFT for real input @@ -492,9 +525,12 @@ void rfftn( "rfftn: InViewType must be real"); static_assert(KokkosFFT::Impl::is_complex_v, "rfftn: OutViewType must be complex"); + + Kokkos::Profiling::pushRegion("KokkosFFT::rfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); fftn(exec_space, in, out, axes, norm, s); + Kokkos::Profiling::popRegion(); } /// \brief Inverse of rfftn @@ -541,9 +577,11 @@ void irfftn( "irfftn: InViewType must be complex"); static_assert(KokkosFFT::Impl::is_real_v, "irfftn: OutViewType must be real"); + Kokkos::Profiling::pushRegion("KokkosFFT::irfftn"); KOKKOSFFT_THROW_IF(!KokkosFFT::Impl::are_valid_axes(in, axes), "axes are invalid for in/out views"); ifftn(exec_space, in, out, axes, norm, s); + Kokkos::Profiling::popRegion(); } } // namespace KokkosFFT