diff --git a/examples/09_derivative/09_derivative.cpp b/examples/09_derivative/09_derivative.cpp new file mode 100644 index 00000000..d9da9104 --- /dev/null +++ b/examples/09_derivative/09_derivative.cpp @@ -0,0 +1,227 @@ +// SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) +constexpr int TILE0 = 4; +constexpr int TILE1 = 32; +#else +constexpr int TILE0 = 4; +constexpr int TILE1 = 4; +#endif + +using execution_space = Kokkos::DefaultExecutionSpace; +template +using View1D = Kokkos::View; +template +using View2D = Kokkos::View; +template +using View3D = Kokkos::View; + +void compute_derivative(const int nx, const int ny, const int nz, + double& seconds); + +// \brief Initialize the grid, wavenumbers, and the test function values +// u = sin(2 * x) + cos(3 * y) +// \tparam RealView1DType: Type for 1D grids in real space +// \tparam RealView3DType: Type for 3D values in real space +// \tparam ComplexView2DType: Type for 2D values in Fourier space +// +// \param x [out]: 1D grid in x direction +// \param y [out]: 1D grid in y direction +// \param ikx [out]: 2D grid in Fourier space for x direction +// \param iky [out]: 2D grid in Fourier space for y direction +// \param u [out]: 3D field in real space +template +void initialize(RealView1DType& x, RealView1DType& y, ComplexView2DType& ikx, + ComplexView2DType& iky, RealView3DType& u) { + using value_type = typename RealView1DType::non_const_value_type; + const auto pi = Kokkos::numbers::pi_v; + value_type Lx = 2.0 * pi, Ly = 2.0 * pi; + const int nx = u.extent(2), ny = u.extent(1), nz = u.extent(0); + value_type dx = Lx / static_cast(nx), + dy = Ly / static_cast(ny); + + // Initialize grids + auto h_x = Kokkos::create_mirror_view(x); + auto h_y = Kokkos::create_mirror_view(y); + for (int ix = 0; ix < nx; ++ix) h_x(ix) = static_cast(ix) * dx; + for (int iy = 0; iy < ny; ++iy) h_y(iy) = static_cast(iy) * dy; + + // Initialize wave numbers + const Kokkos::complex I(0.0, 1.0); // Imaginary unit + auto h_ikx = Kokkos::create_mirror_view(ikx); + auto h_iky = Kokkos::create_mirror_view(iky); + for (int iy = 0; iy < ny; ++iy) { + for (int ix = 0; ix < nx / 2; ++ix) { + h_ikx(iy, ix) = I * 2.0 * pi * static_cast(ix) / Lx; + } + } + + for (int iy = 0; iy < ny; ++iy) { + for (int ix = 0; ix < nx / 2 + 1; ++ix) { + auto tmp_iy = iy < ny / 2 ? iy : iy - ny; + h_iky(iy, ix) = I * 2.0 * pi * static_cast(tmp_iy) / Ly; + } + } + + // Initialize field + auto h_u = Kokkos::create_mirror_view(u); + for (int jz = 0; jz < nz; jz++) { + for (int jy = 0; jy < ny; jy++) { + for (int jx = 0; jx < nx; jx++) { + h_u(jz, jy, jx) = std::sin(2.0 * h_x(jx)) + std::cos(3.0 * h_y(jy)); + } + } + } + + Kokkos::deep_copy(x, h_x); + Kokkos::deep_copy(y, h_y); + Kokkos::deep_copy(ikx, h_ikx); + Kokkos::deep_copy(iky, h_iky); + Kokkos::deep_copy(u, h_u); +} + +// \brief Compute analytical solution of the derivative +// du/dx + du/dy = 2 * cos(2 * x) - 3 * sin(3 * y) +// \tparam RealView1DType: Type for 1D grids in real space +// \tparam RealView3DType: Type for 3D values in real space +// +// \param x [in]: 1D grid in x direction +// \param y [in]: 1D grid in y direction +// \param dudxy [out]: 3D fiels of the analytical derivative values +template +void analytical_solution(RealView1DType& x, RealView1DType& y, + RealView3DType& dudxy) { + // Copy grids to host + auto h_x = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), x); + auto h_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), y); + + // Initialize field + const int nx = dudxy.extent(2), ny = dudxy.extent(1), nz = dudxy.extent(0); + auto h_dudxy = Kokkos::create_mirror_view(dudxy); + for (int iz = 0; iz < nz; iz++) { + for (int iy = 0; iy < ny; iy++) { + for (int ix = 0; ix < nx; ix++) { + h_dudxy(iz, iy, ix) = + 2.0 * std::cos(2.0 * h_x(ix)) - 3.0 * std::sin(3.0 * h_y(iy)); + } + } + } + + Kokkos::deep_copy(dudxy, h_dudxy); +} + +// \brief Compute the derivative of a function using FFT-based methods and +// compare with the analytical solution +// \param nx [in]: Number of grid points in the x-direction +// \param ny [in]: Number of grid points in the y-direction +// \param nz [in]: Number of grid points in the z-direction +// \param seconds [out]: Time taken to compute the derivatives (in seconds) +void compute_derivative(const int nx, const int ny, const int nz, + double& seconds) { + // View types + using RealView1D = View1D; + using RealView3D = View3D; + using ComplexView2D = View2D>; + using ComplexView3D = View3D>; + + // KokkosFFT plan types + using ForwardPlanType = + KokkosFFT::Plan; + using BackwardPlanType = + KokkosFFT::Plan; + + // Declare grids + RealView1D x("x", nx), y("y", ny); + ComplexView2D ikx("ikx", ny, nx / 2 + 1), iky("iky", ny, nx / 2 + 1); + + // Variables to be transformed + RealView3D u("u", nz, ny, nx), dudxy("dudxy", nz, ny, nx); + ComplexView3D u_hat("u_hat", nz, ny, nx / 2 + 1); + + initialize(x, y, ikx, iky, u); + analytical_solution(x, y, dudxy); + + // MDRanges used in the kernels + using range2D_type = Kokkos::MDRangePolicy< + execution_space, + Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Right>>; + using tile2D_type = typename range2D_type::tile_type; + using point2D_type = typename range2D_type::point_type; + + execution_space exec; + + range2D_type range2d(exec, point2D_type{{0, 0}}, + point2D_type{{ny, nx / 2 + 1}}, + tile2D_type{{TILE0, TILE1}}); + + ForwardPlanType r2c_plan(exec, u, u_hat, KokkosFFT::Direction::forward, + KokkosFFT::axis_type<2>({-2, -1})); + BackwardPlanType c2r_plan(exec, u_hat, u, KokkosFFT::Direction::backward, + KokkosFFT::axis_type<2>({-2, -1})); + + // Start computation + Kokkos::Timer timer; + + // Forward transform u -> u_hat (=FFT (u)) + r2c_plan.execute(u, u_hat); + + // Compute derivatives by multiplications in Fourier space + Kokkos::parallel_for( + "ComputeDerivative", range2d, KOKKOS_LAMBDA(const int iy, const int ix) { + auto ikx_tmp = ikx(iy, ix), iky_tmp = iky(iy, ix); + for (int iz = 0; iz < nz; ++iz) { + u_hat(iz, iy, ix) = + (ikx_tmp * u_hat(iz, iy, ix) + iky_tmp * u_hat(iz, iy, ix)); + } + }); + + // Baclward transform u_hat -> u (=IFFT (u_hat)) + c2r_plan.execute(u_hat, u); // normalization is made here + exec.fence(); + seconds = timer.seconds(); + + // Check results + auto h_u = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), u); + auto h_dudxy = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), dudxy); + + double epsilon = 1.e-8; + for (int iz = 0; iz < nz; ++iz) { + for (int iy = 0; iy < ny; ++iy) { + for (int ix = 0; ix < nx; ++ix) { + if (std::abs(h_dudxy(iz, iy, ix)) <= epsilon) continue; + auto relative_error = std::abs(h_dudxy(iz, iy, ix) - h_u(iz, iy, ix)) / + std::abs(h_dudxy(iz, iy, ix)); + if (relative_error > epsilon) { + std::cerr << "Error: " << h_dudxy(iz, iy, ix) + << " != " << h_u(iz, iy, ix) << std::endl; + return; + } + } + } + } +} + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + { + const int nx = 128, ny = 128, nz = 128; + double seconds = 0.0; + compute_derivative(nx, ny, nz, seconds); + std::cout << "2D derivative with FFT took: " << seconds << " [s]" + << std::endl; + } + Kokkos::finalize(); + + return 0; +} diff --git a/examples/09_derivative/CMakeLists.txt b/examples/09_derivative/CMakeLists.txt new file mode 100644 index 00000000..d81fbc36 --- /dev/null +++ b/examples/09_derivative/CMakeLists.txt @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +# +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +add_executable(09_derivative 09_derivative.cpp) +target_link_libraries(09_derivative PUBLIC KokkosFFT::fft) diff --git a/examples/09_derivative/derivative.py b/examples/09_derivative/derivative.py new file mode 100644 index 00000000..c6e1ecf8 --- /dev/null +++ b/examples/09_derivative/derivative.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +# +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +import numpy as np +import time + +def initialize(nx: int, ny: int, nz: int): + """ + Initialize the grid, wavenumbers, and test function values. + u = sin(2 * x) + cos(3 * y) + + Parameters: + nx (int): Number of grid points in the x-direction. + ny (int): Number of grid points in the y-direction. + nz (int): Number of grid points in the z-direction (batch direction). + + Returns: + tuple: A tuple containing: + - X (np.ndarray): 2D array of x-coordinates on the grid. + - Y (np.ndarray): 2D array of y-coordinates on the grid. + - ikx (np.ndarray): 2D array of Fourier wavenumbers in the x-direction. + - iky (np.ndarray): 2D array of Fourier wavenumbers in the y-direction. + - u (np.ndarray): 3D array of test function values. + """ + Lx, Ly = 2*np.pi, 2*np.pi + + # Define and initialize the grid + x = np.linspace(0, 2*np.pi, nx, endpoint=False) + y = np.linspace(0, 2*np.pi, ny, endpoint=False) + + X, Y = np.meshgrid(x, y) + + # Initialize the wavenumbers + ikx = np.zeros((ny, nx//2+1), dtype=np.complex128) + iky = np.zeros((ny, nx//2+1), dtype=np.complex128) + + for iy in range(ny): + for ix in range(nx//2): + ikx[iy, ix] = 1j * 2 * np.pi / Lx * ix + + for iy in range(ny): + for ix in range(nx//2+1): + _iy = iy if iy < ny//2 else iy - ny + iky[iy, ix] = 1j * 2 * np.pi / Lx * _iy + + # Initialize the data + u = np.sin(2 * X) + np.cos(3 * Y) + u_batched = np.repeat(u[np.newaxis, :, :], nz, axis=0) + + return X, Y, ikx, iky, u_batched + +def analytical_solution(X: np.ndarray, Y: np.ndarray, nz: int) -> np.ndarray: + """ + Compute the analytical solution for the derivative of the test function: + dudxy = du/dx + du/dy = 2 * cos(2 * x) - 3 * sin(3 * y) + + Parameters: + X (np.ndarray): 2D array of x-coordinates on the grid. + Y (np.ndarray): 2D array of y-coordinates on the grid. + nz (int): Number of grid points in the z-direction (batch direction). + + Returns: + np.ndarray: 3D array of the analytical derivative values. + """ + dudxy = 2 * np.cos(2 * X) - 3 * np.sin(3 * Y) + return np.repeat(dudxy[np.newaxis, :, :], nz, axis=0) + +def compute_derivative(nx: int, ny: int, nz: int) -> np.float64: + """ + Compute the derivative of a function using FFT-based methods and + compare with the analytical solution. + + Parameters: + nx (int): Number of grid points in the x-direction. + ny (int): Number of grid points in the y-direction. + nz (int): Number of grid points in the z-direction (batch direction). + + Returns: + float: Time taken to compute the derivatives (in seconds). + """ + X, Y, ikx, iky, u = initialize(nx, ny, nz) + dudxy = analytical_solution(X, Y, nz) + + # Compute the derivative + start = time.time() + + # Forward transform u -> u_hat (=FFT (u)) + u_hat = np.fft.rfft2(u) + + # Compute derivatives by multiplications in Fourier space + u_hat = ikx * u_hat + iky * u_hat + + # Backward transform u_hat -> u (=IFFT (u_hat)) + u = np.fft.irfft2(u_hat) + seconds = time.time() - start + + np.testing.assert_allclose(u, dudxy, atol=1e-10) + + return seconds + +if __name__ == '__main__': + nx, ny, nz = 128, 128, 128 + seconds = compute_derivative(nx, ny, nz) + print(f"2D derivative with FFT took {seconds} [s]") + \ No newline at end of file diff --git a/examples/09_derivative/test_derivative.py b/examples/09_derivative/test_derivative.py new file mode 100644 index 00000000..b9a9a35b --- /dev/null +++ b/examples/09_derivative/test_derivative.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +# +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +import numpy as np +import pytest +from derivative import ( + initialize, + analytical_solution, + compute_derivative, +) + +@pytest.mark.parametrize("n", [16, 32]) +def test_initialize(n: int) -> None: + X, Y, ikx, iky, u = initialize(n, n, n) + + # Check shapes + assert X.shape == (n, n) + assert Y.shape == (n, n) + assert ikx.shape == (n, n//2+1) + assert iky.shape == (n, n//2+1) + assert u.shape == (n, n, n) + + # Check dx, dy + lx, ly = 2*np.pi, 2*np.pi + dx, dy = lx / n, ly / n + np.testing.assert_allclose(X[0, 1] - X[0, 0], dx) + np.testing.assert_allclose(Y[1, 0] - Y[0, 0], dx) + + # Check dikx, diky + dikx, diky = 1j * 2 * np.pi / lx, 1j * 2 * np.pi / ly + np.testing.assert_allclose(ikx[0, 1], dikx) + np.testing.assert_allclose(iky[1, 0], diky) + + # Check the Hermitian symmetry of iky + # iky is Hermitian, so iky[i, j] = -iky[-i, j] + for i in range(1, n//2): + np.testing.assert_allclose(iky[i], -iky[-i]) + + # Check it is a batch + u0 = u[0] + for i in range(1, n): + np.testing.assert_allclose(u[i], u0) + +@pytest.mark.parametrize("n", [16, 32]) +def test_analytical_solution(n: int) -> None: + X, Y, ikx, iky, u = initialize(n, n, n) + dudxy = analytical_solution(X, Y, n) + + # Check shapes + assert dudxy.shape == (n, n, n) + + # Check it is a batch + dudxy0 = dudxy[0] + for i in range(1, n): + np.testing.assert_allclose(dudxy[i], dudxy0) + +@pytest.mark.parametrize("n", [16, 32]) +def test_derivative(n: int) -> None: + # The following function fails if it is not correct + _ = compute_derivative(n, n, n) + \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 94b3c84a..501b0a3a 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,3 +10,4 @@ add_subdirectory(05_1DFFT_HOST_DEVICE) add_subdirectory(06_1DFFT_reuse_plans) add_subdirectory(07_unmanaged_views) add_subdirectory(08_inplace_FFT) +add_subdirectory(09_derivative)