Merge pull request #18 from CExA-project/fft-on-host-and-device

FFT on Host and Device
kokkos · Jan 5, 2024 · 66b979a · 66b979a
2 parents 0cdb8ff + 06cf61b
commit 66b979a
Show file tree

Hide file tree

Showing 21 changed files with 690 additions and 301 deletions.
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ubuntu-latest
 
     env:
-      backends: OPENMP CUDA
+      backends: OPENMP CUDA CUDA_HOST_DEVICE
       CUDA_ARCHITECTURES: AMPERE80
       CMAKE_CXX_COMPILER: /work/tpls/kokkos/bin/nvcc_wrapper
       container: nvidia_env
@@ -54,6 +54,15 @@ jobs:
             -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
             -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON
 
+    - name: Configure CMake for CUDA backend with HOST and DEVICE option
+      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
+      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
+      run: |
+          docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_CUDA_HOST_DEVICE \
+            -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
+            -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON \
+            -DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON
+
     - name: Build
       # Build your program with the given configuration
       run: |
@@ -73,7 +82,7 @@ jobs:
     runs-on: ubuntu-latest
 
     env:
-      backend: HIP
+      backends: HIP HIP_HOST_DEVICE
       architecture: VEGA90A
       CMAKE_CXX_COMPILER: hipcc
       container: amd_env
@@ -98,11 +107,22 @@ jobs:
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
       run: |
-        docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_${{env.backend}} \
+        docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_HIP \
             -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
             -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON
 
+    - name: Configure CMake for HIP backend with HOST and DEVICE option
+      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
+      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
+      run: |
+        docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_HIP_HOST_DEVICE \
+            -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
+            -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON \
+            -DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON
+
     - name: Build
       # Build your program with the given configuration
       run: |
-        docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake --build build_${{env.backend}} --config ${{env.BUILD_TYPE}} -j 2
+        for backend in ${{ env.backends }}; do
+          docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake --build build_${backend} --config ${{env.BUILD_TYPE}} -j 2
+        done
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,6 +6,7 @@ list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_SOURCE_DIR}/cmake")
 
 # Options
 option(BUILD_EXAMPLES "Build kokkos-fft examples" ON)
+option(KokkosFFT_ENABLE_HOST_AND_DEVICE "Enable fft on both host and device" OFF)
 
 find_package(Kokkos CONFIG)
 if(NOT kokkos_FOUND)

diff --git a/common/src/CMakeLists.txt b/common/src/CMakeLists.txt
@@ -8,9 +8,19 @@ target_link_libraries(common
 if(Kokkos_ENABLE_CUDA)
   find_package(CUDAToolkit REQUIRED COMPONENTS cufft)
   target_link_libraries(common INTERFACE CUDA::cufft)
+  if(KokkosFFT_ENABLE_HOST_AND_DEVICE)
+    find_package(FFTW MODULE REQUIRED)
+    target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP)
+    target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE)
+  endif()
 elseif(Kokkos_ENABLE_HIP)
   find_package(hipfft REQUIRED)
   target_link_libraries(common INTERFACE hip::hipfft)
+  if(KokkosFFT_ENABLE_HOST_AND_DEVICE)
+    find_package(FFTW MODULE REQUIRED)
+    target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP)
+    target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE)
+  endif()
 elseif(Kokkos_ENABLE_OPENMP)
   find_package(FFTW MODULE REQUIRED)
   target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP)

diff --git a/common/src/KokkosFFT_Cuda_types.hpp b/common/src/KokkosFFT_Cuda_types.hpp
@@ -3,59 +3,173 @@
 
 #include <cufft.h>
 
+// Check the size of complex type
+static_assert(sizeof(cufftComplex) == sizeof(Kokkos::complex<float>));
+static_assert(alignof(cufftComplex) <= alignof(Kokkos::complex<float>));
+
+static_assert(sizeof(cufftDoubleComplex) == sizeof(Kokkos::complex<double>));
+static_assert(alignof(cufftDoubleComplex) <= alignof(Kokkos::complex<double>));
+
+#ifdef ENABLE_HOST_AND_DEVICE
+  #include <fftw3.h>
+  #include "KokkosFFT_utils.hpp"
+  static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex<float>));
+  static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex<float>));
+
+  static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex<double>));
+  static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex<double>));
+#endif
+
 namespace KokkosFFT {
 namespace Impl {
-  #define KOKKOS_FFT_FORWARD CUFFT_FORWARD
-  #define KOKKOS_FFT_BACKWARD CUFFT_INVERSE
-  #define KOKKOS_FFT_R2C CUFFT_R2C
-  #define KOKKOS_FFT_D2Z CUFFT_D2Z
-  #define KOKKOS_FFT_C2R CUFFT_C2R
-  #define KOKKOS_FFT_Z2D CUFFT_Z2D
-  #define KOKKOS_FFT_C2C CUFFT_C2C
-  #define KOKKOS_FFT_Z2Z CUFFT_Z2Z
-
-  struct FFTDataType {
-    using float32    = cufftReal;
-    using float64    = cufftDoubleReal;
-    using complex64  = cufftComplex;
-    using complex128 = cufftDoubleComplex;
+  enum class Direction {
+    Forward,
+    Backward,
   };
 
-  template <typename T>
-  struct FFTPlanType {
-    using type = cufftHandle;
-  };
-
-  using FFTResultType = cufftResult;
-  using TransformType = cufftType;
   using FFTDirectionType = int;
 
-  template <typename T1, typename T2>
-  struct transform_type {
-    static_assert(std::is_same_v<T1, T2>, "Real to real transform is unavailable");
-  };
+  #ifdef ENABLE_HOST_AND_DEVICE
+    enum class FFTWTransformType {
+      R2C,
+      D2Z,
+      C2R,
+      Z2D,
+      C2C,
+      Z2Z
+    };
 
-  template <typename T1, typename T2>
-  struct transform_type<T1, Kokkos::complex<T2>> {
-    static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
-    static constexpr TransformType m_type = std::is_same_v<T1, float> ? KOKKOS_FFT_R2C : KOKKOS_FFT_D2Z;
-    static constexpr TransformType type() { return m_type; };
-  };
+    template <typename ExecutionSpace>
+    struct FFTDataType {
+      using float32    = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftReal, float>;
+      using float64    = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftDoubleReal, double>;
+      using complex64  = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftComplex, fftwf_complex>;
+      using complex128 = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftDoubleComplex, fftw_complex>;
+    };
 
-  template <typename T1, typename T2>
-  struct transform_type<Kokkos::complex<T1>, T2> {
-    static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
-    static constexpr TransformType m_type = std::is_same_v<T2, float> ? KOKKOS_FFT_C2R : KOKKOS_FFT_Z2D;
-    static constexpr TransformType type() { return m_type; };
-  };
+    template <typename ExecutionSpace, typename T>
+    struct FFTPlanType {
+      using fftwHandle = std::conditional_t<std::is_same_v<KokkosFFT::Impl::real_type_t<T>, float>, fftwf_plan, fftw_plan>;
+      using type = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftHandle, fftwHandle>;
+    };
 
-  template <typename T1, typename T2>
-  struct transform_type<Kokkos::complex<T1>, Kokkos::complex<T2>> {
-    static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
-    static constexpr TransformType m_type = std::is_same_v<T1, float> ? KOKKOS_FFT_C2C : KOKKOS_FFT_Z2Z;
-    static constexpr TransformType type() { return m_type; };
-  };
+    template <typename ExecutionSpace>
+    using TransformType = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftType, FFTWTransformType>;
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type {
+      static_assert(std::is_same_v<T1, T2>, "Real to real transform is unavailable");
+    };
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type<ExecutionSpace, T1, Kokkos::complex<T2>> {
+      static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
+      using _TransformType = TransformType<ExecutionSpace>;
+
+      static constexpr _TransformType m_cuda_type = std::is_same_v<T1, float> ? CUFFT_R2C : CUFFT_D2Z;
+      static constexpr _TransformType m_cpu_type  = std::is_same_v<T1, float> ? FFTWTransformType::R2C : FFTWTransformType::D2Z;
+
+      static constexpr _TransformType type() {
+        if constexpr(std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
+          return m_cuda_type;
+        } else {
+          return m_cpu_type;
+        }
+      }
+    };
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type<ExecutionSpace, Kokkos::complex<T1>, T2> {
+      static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
+      using _TransformType = TransformType<ExecutionSpace>;
+
+      static constexpr _TransformType m_cuda_type = std::is_same_v<T1, float> ? CUFFT_C2R : CUFFT_Z2D;
+      static constexpr _TransformType m_cpu_type  = std::is_same_v<T1, float> ? FFTWTransformType::C2R : FFTWTransformType::Z2D;
+
+      static constexpr _TransformType type() {
+        if constexpr(std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
+          return m_cuda_type;
+        } else {
+          return m_cpu_type;
+        }
+      }
+    };
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type<ExecutionSpace, Kokkos::complex<T1>, Kokkos::complex<T2>> {
+      static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
+      using _TransformType = TransformType<ExecutionSpace>;
+
+      static constexpr _TransformType m_cuda_type = std::is_same_v<T1, float> ? CUFFT_C2C : CUFFT_Z2Z;
+      static constexpr _TransformType m_cpu_type  = std::is_same_v<T1, float> ? FFTWTransformType::C2C : FFTWTransformType::Z2Z;
+
+      static constexpr _TransformType type() {
+        if constexpr(std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
+          return m_cuda_type;
+        } else {
+          return m_cpu_type;
+        }
+      }
+    };
+
+    template <typename ExecutionSpace>
+    auto direction_type(Direction direction) {
+      static constexpr FFTDirectionType _FORWARD = std::is_same_v<ExecutionSpace, Kokkos::Cuda> ? CUFFT_FORWARD : FFTW_FORWARD;
+      static constexpr FFTDirectionType _BACKWARD = std::is_same_v<ExecutionSpace, Kokkos::Cuda> ? CUFFT_INVERSE : FFTW_BACKWARD;
+      return direction==Direction::Forward ? _FORWARD : _BACKWARD;
+    }
+  #else
+    template <typename ExecutionSpace>
+    struct FFTDataType {
+      using float32    = cufftReal;
+      using float64    = cufftDoubleReal;
+      using complex64  = cufftComplex;
+      using complex128 = cufftDoubleComplex;
+    };
+
+    template <typename ExecutionSpace, typename T>
+    struct FFTPlanType {
+      using type = cufftHandle;
+    };
+
+    template <typename ExecutionSpace>
+    using TransformType = cufftType;
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type {
+      static_assert(std::is_same_v<T1, T2>, "Real to real transform is unavailable");
+    };
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type<ExecutionSpace, T1, Kokkos::complex<T2>> {
+      static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
+      using _TransformType = TransformType<ExecutionSpace>;
+      static constexpr _TransformType m_type = std::is_same_v<T1, float> ? CUFFT_R2C : CUFFT_D2Z;
+      static constexpr _TransformType type() { return m_type; };
+    };
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type<ExecutionSpace, Kokkos::complex<T1>, T2> {
+      static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
+      using _TransformType = TransformType<ExecutionSpace>;
+      static constexpr _TransformType m_type = std::is_same_v<T2, float> ? CUFFT_C2R : CUFFT_Z2D;
+      static constexpr _TransformType type() { return m_type; };
+    };
+
+    template <typename ExecutionSpace, typename T1, typename T2>
+    struct transform_type<ExecutionSpace, Kokkos::complex<T1>, Kokkos::complex<T2>> {
+      static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
+      using _TransformType = TransformType<ExecutionSpace>;
+      static constexpr _TransformType m_type = std::is_same_v<T1, float> ? CUFFT_C2C : CUFFT_Z2Z;
+      static constexpr _TransformType type() { return m_type; };
+    };
+
+    template <typename ExecutionSpace>
+    auto direction_type(Direction direction) {
+      return direction==Direction::Forward ? CUFFT_FORWARD : CUFFT_INVERSE;
+    }
+  #endif
 } // namespace Impl
-}; // namespace KokkosFFT
+} // namespace KokkosFFT
 
 #endif