Skip to content

Commit

Permalink
Merge pull request #18 from CExA-project/fft-on-host-and-device
Browse files Browse the repository at this point in the history
FFT on Host and Device
  • Loading branch information
yasahi-hpc authored Jan 5, 2024
2 parents 0cdb8ff + 06cf61b commit 66b979a
Show file tree
Hide file tree
Showing 21 changed files with 690 additions and 301 deletions.
28 changes: 24 additions & 4 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest

env:
backends: OPENMP CUDA
backends: OPENMP CUDA CUDA_HOST_DEVICE
CUDA_ARCHITECTURES: AMPERE80
CMAKE_CXX_COMPILER: /work/tpls/kokkos/bin/nvcc_wrapper
container: nvidia_env
Expand Down Expand Up @@ -54,6 +54,15 @@ jobs:
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
-DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON
- name: Configure CMake for CUDA backend with HOST and DEVICE option
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: |
docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_CUDA_HOST_DEVICE \
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
-DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_${{env.CUDA_ARCHITECTURES}}=ON -DBUILD_TESTING=ON \
-DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON
- name: Build
# Build your program with the given configuration
run: |
Expand All @@ -73,7 +82,7 @@ jobs:
runs-on: ubuntu-latest

env:
backend: HIP
backends: HIP HIP_HOST_DEVICE
architecture: VEGA90A
CMAKE_CXX_COMPILER: hipcc
container: amd_env
Expand All @@ -98,11 +107,22 @@ jobs:
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: |
docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_${{env.backend}} \
docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_HIP \
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
-DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON
- name: Configure CMake for HIP backend with HOST and DEVICE option
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: |
docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_HIP_HOST_DEVICE \
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \
-DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON \
-DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON
- name: Build
# Build your program with the given configuration
run: |
docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake --build build_${{env.backend}} --config ${{env.BUILD_TYPE}} -j 2
for backend in ${{ env.backends }}; do
docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake --build build_${backend} --config ${{env.BUILD_TYPE}} -j 2
done
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_SOURCE_DIR}/cmake")

# Options
option(BUILD_EXAMPLES "Build kokkos-fft examples" ON)
option(KokkosFFT_ENABLE_HOST_AND_DEVICE "Enable fft on both host and device" OFF)

find_package(Kokkos CONFIG)
if(NOT kokkos_FOUND)
Expand Down
10 changes: 10 additions & 0 deletions common/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,19 @@ target_link_libraries(common
if(Kokkos_ENABLE_CUDA)
find_package(CUDAToolkit REQUIRED COMPONENTS cufft)
target_link_libraries(common INTERFACE CUDA::cufft)
if(KokkosFFT_ENABLE_HOST_AND_DEVICE)
find_package(FFTW MODULE REQUIRED)
target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP)
target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE)
endif()
elseif(Kokkos_ENABLE_HIP)
find_package(hipfft REQUIRED)
target_link_libraries(common INTERFACE hip::hipfft)
if(KokkosFFT_ENABLE_HOST_AND_DEVICE)
find_package(FFTW MODULE REQUIRED)
target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP)
target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE)
endif()
elseif(Kokkos_ENABLE_OPENMP)
find_package(FFTW MODULE REQUIRED)
target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP)
Expand Down
202 changes: 158 additions & 44 deletions common/src/KokkosFFT_Cuda_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,173 @@

#include <cufft.h>

// Check the size of complex type
static_assert(sizeof(cufftComplex) == sizeof(Kokkos::complex<float>));
static_assert(alignof(cufftComplex) <= alignof(Kokkos::complex<float>));

static_assert(sizeof(cufftDoubleComplex) == sizeof(Kokkos::complex<double>));
static_assert(alignof(cufftDoubleComplex) <= alignof(Kokkos::complex<double>));

#ifdef ENABLE_HOST_AND_DEVICE
#include <fftw3.h>
#include "KokkosFFT_utils.hpp"
static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex<float>));
static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex<float>));

static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex<double>));
static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex<double>));
#endif

namespace KokkosFFT {
namespace Impl {
#define KOKKOS_FFT_FORWARD CUFFT_FORWARD
#define KOKKOS_FFT_BACKWARD CUFFT_INVERSE
#define KOKKOS_FFT_R2C CUFFT_R2C
#define KOKKOS_FFT_D2Z CUFFT_D2Z
#define KOKKOS_FFT_C2R CUFFT_C2R
#define KOKKOS_FFT_Z2D CUFFT_Z2D
#define KOKKOS_FFT_C2C CUFFT_C2C
#define KOKKOS_FFT_Z2Z CUFFT_Z2Z

struct FFTDataType {
using float32 = cufftReal;
using float64 = cufftDoubleReal;
using complex64 = cufftComplex;
using complex128 = cufftDoubleComplex;
enum class Direction {
Forward,
Backward,
};

template <typename T>
struct FFTPlanType {
using type = cufftHandle;
};

using FFTResultType = cufftResult;
using TransformType = cufftType;
using FFTDirectionType = int;

template <typename T1, typename T2>
struct transform_type {
static_assert(std::is_same_v<T1, T2>, "Real to real transform is unavailable");
};
#ifdef ENABLE_HOST_AND_DEVICE
enum class FFTWTransformType {
R2C,
D2Z,
C2R,
Z2D,
C2C,
Z2Z
};

template <typename T1, typename T2>
struct transform_type<T1, Kokkos::complex<T2>> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
static constexpr TransformType m_type = std::is_same_v<T1, float> ? KOKKOS_FFT_R2C : KOKKOS_FFT_D2Z;
static constexpr TransformType type() { return m_type; };
};
template <typename ExecutionSpace>
struct FFTDataType {
using float32 = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftReal, float>;
using float64 = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftDoubleReal, double>;
using complex64 = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftComplex, fftwf_complex>;
using complex128 = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftDoubleComplex, fftw_complex>;
};

template <typename T1, typename T2>
struct transform_type<Kokkos::complex<T1>, T2> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
static constexpr TransformType m_type = std::is_same_v<T2, float> ? KOKKOS_FFT_C2R : KOKKOS_FFT_Z2D;
static constexpr TransformType type() { return m_type; };
};
template <typename ExecutionSpace, typename T>
struct FFTPlanType {
using fftwHandle = std::conditional_t<std::is_same_v<KokkosFFT::Impl::real_type_t<T>, float>, fftwf_plan, fftw_plan>;
using type = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftHandle, fftwHandle>;
};

template <typename T1, typename T2>
struct transform_type<Kokkos::complex<T1>, Kokkos::complex<T2>> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
static constexpr TransformType m_type = std::is_same_v<T1, float> ? KOKKOS_FFT_C2C : KOKKOS_FFT_Z2Z;
static constexpr TransformType type() { return m_type; };
};
template <typename ExecutionSpace>
using TransformType = std::conditional_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>, cufftType, FFTWTransformType>;

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type {
static_assert(std::is_same_v<T1, T2>, "Real to real transform is unavailable");
};

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type<ExecutionSpace, T1, Kokkos::complex<T2>> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
using _TransformType = TransformType<ExecutionSpace>;

static constexpr _TransformType m_cuda_type = std::is_same_v<T1, float> ? CUFFT_R2C : CUFFT_D2Z;
static constexpr _TransformType m_cpu_type = std::is_same_v<T1, float> ? FFTWTransformType::R2C : FFTWTransformType::D2Z;

static constexpr _TransformType type() {
if constexpr(std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
return m_cuda_type;
} else {
return m_cpu_type;
}
}
};

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type<ExecutionSpace, Kokkos::complex<T1>, T2> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
using _TransformType = TransformType<ExecutionSpace>;

static constexpr _TransformType m_cuda_type = std::is_same_v<T1, float> ? CUFFT_C2R : CUFFT_Z2D;
static constexpr _TransformType m_cpu_type = std::is_same_v<T1, float> ? FFTWTransformType::C2R : FFTWTransformType::Z2D;

static constexpr _TransformType type() {
if constexpr(std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
return m_cuda_type;
} else {
return m_cpu_type;
}
}
};

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type<ExecutionSpace, Kokkos::complex<T1>, Kokkos::complex<T2>> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
using _TransformType = TransformType<ExecutionSpace>;

static constexpr _TransformType m_cuda_type = std::is_same_v<T1, float> ? CUFFT_C2C : CUFFT_Z2Z;
static constexpr _TransformType m_cpu_type = std::is_same_v<T1, float> ? FFTWTransformType::C2C : FFTWTransformType::Z2Z;

static constexpr _TransformType type() {
if constexpr(std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
return m_cuda_type;
} else {
return m_cpu_type;
}
}
};

template <typename ExecutionSpace>
auto direction_type(Direction direction) {
static constexpr FFTDirectionType _FORWARD = std::is_same_v<ExecutionSpace, Kokkos::Cuda> ? CUFFT_FORWARD : FFTW_FORWARD;
static constexpr FFTDirectionType _BACKWARD = std::is_same_v<ExecutionSpace, Kokkos::Cuda> ? CUFFT_INVERSE : FFTW_BACKWARD;
return direction==Direction::Forward ? _FORWARD : _BACKWARD;
}
#else
template <typename ExecutionSpace>
struct FFTDataType {
using float32 = cufftReal;
using float64 = cufftDoubleReal;
using complex64 = cufftComplex;
using complex128 = cufftDoubleComplex;
};

template <typename ExecutionSpace, typename T>
struct FFTPlanType {
using type = cufftHandle;
};

template <typename ExecutionSpace>
using TransformType = cufftType;

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type {
static_assert(std::is_same_v<T1, T2>, "Real to real transform is unavailable");
};

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type<ExecutionSpace, T1, Kokkos::complex<T2>> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
using _TransformType = TransformType<ExecutionSpace>;
static constexpr _TransformType m_type = std::is_same_v<T1, float> ? CUFFT_R2C : CUFFT_D2Z;
static constexpr _TransformType type() { return m_type; };
};

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type<ExecutionSpace, Kokkos::complex<T1>, T2> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
using _TransformType = TransformType<ExecutionSpace>;
static constexpr _TransformType m_type = std::is_same_v<T2, float> ? CUFFT_C2R : CUFFT_Z2D;
static constexpr _TransformType type() { return m_type; };
};

template <typename ExecutionSpace, typename T1, typename T2>
struct transform_type<ExecutionSpace, Kokkos::complex<T1>, Kokkos::complex<T2>> {
static_assert(std::is_same_v<T1, T2>, "T1 and T2 should have the same precision");
using _TransformType = TransformType<ExecutionSpace>;
static constexpr _TransformType m_type = std::is_same_v<T1, float> ? CUFFT_C2C : CUFFT_Z2Z;
static constexpr _TransformType type() { return m_type; };
};

template <typename ExecutionSpace>
auto direction_type(Direction direction) {
return direction==Direction::Forward ? CUFFT_FORWARD : CUFFT_INVERSE;
}
#endif
} // namespace Impl
}; // namespace KokkosFFT
} // namespace KokkosFFT

#endif
Loading

0 comments on commit 66b979a

Please sign in to comment.