From 2d2f9e92e8329aee2c148a0a0d891cff8a05dcb5 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Sun, 10 Nov 2024 19:54:01 +0900 Subject: [PATCH] Add advanced example --- .../09_hasegawa_wakatani.cpp | 565 ++++++++++++++++++ examples/09_HasegawaWakatani/CMakeLists.txt | 6 + .../09_HasegawaWakatani/hasegawa_wakatani.py | 232 +++++++ examples/CMakeLists.txt | 1 + 4 files changed, 804 insertions(+) create mode 100644 examples/09_HasegawaWakatani/09_hasegawa_wakatani.cpp create mode 100644 examples/09_HasegawaWakatani/CMakeLists.txt create mode 100644 examples/09_HasegawaWakatani/hasegawa_wakatani.py diff --git a/examples/09_HasegawaWakatani/09_hasegawa_wakatani.cpp b/examples/09_HasegawaWakatani/09_hasegawa_wakatani.cpp new file mode 100644 index 00000000..7a056fff --- /dev/null +++ b/examples/09_HasegawaWakatani/09_hasegawa_wakatani.cpp @@ -0,0 +1,565 @@ +// SPDX-FileCopyrightText: (C) The Kokkos-FFT development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) +constexpr int TILE0 = 4; +constexpr int TILE1 = 32; +#else +constexpr int TILE0 = 4; +constexpr int TILE1 = 4; +#endif + +using execution_space = Kokkos::DefaultExecutionSpace; +template +using View1D = Kokkos::View; +template +using View2D = Kokkos::View; +template +using View3D = Kokkos::View; + +// \brief A struct that manages grid in wavenumber space +struct Grid { + //! Grid in x direction (nkx * 2 + 1) + View1D m_kx; + + //! Grid in y direction (nky + 1) + View1D m_kyh; + + //! Grid in x and y direction (nky + 1, nkx * 2 + 1) + View2D m_ksq; + + Grid(int nx, int ny, double lx, double ly) { + int nkx = (nx - 2) / 3, nky = (ny - 2) / 3; + int nkx2 = nkx * 2 + 1, nkyh = nky + 1; + m_kx = View1D("m_kx", nkx2); + m_kyh = View1D("m_kyh", nkyh); + m_ksq = View2D("m_ksq", nkyh, nkx2); + + auto h_kx = Kokkos::create_mirror_view(m_kx); + auto h_kyh = Kokkos::create_mirror_view(m_kyh); + auto h_ksq = Kokkos::create_mirror_view(m_ksq); + + // [0, dkx, 2*dkx, ..., nkx * dkx, -nkx * dkx, ..., -dkx] + for (int ikx = 0; ikx < nkx + 1; ikx++) { + h_kx(ikx) = static_cast(ikx) / lx; + } + + for (int ikx = 1; ikx < nkx + 1; ikx++) { + h_kx(nkx2 - ikx) = -static_cast(ikx) / lx; + } + + // [0, dky, 2*dky, ..., nky * dky] + for (int iky = 0; iky < nkyh; iky++) { + h_kyh(iky) = static_cast(iky) / ly; + } + + // kx**2 + ky**2 + for (int iky = 0; iky < nkyh; iky++) { + for (int ikx = 0; ikx < nkx2; ikx++) { + h_ksq(iky, ikx) = h_kx(ikx) * h_kx(ikx) + h_kyh(iky) * h_kyh(iky); + } + } + + Kokkos::deep_copy(m_kx, h_kx); + Kokkos::deep_copy(m_kyh, h_kyh); + Kokkos::deep_copy(m_ksq, h_ksq); + } +}; + +// \brief A struct that manages physical quantities in wavenumber space +struct Variables { + View3D> m_fk; + View2D> m_pk; + + Variables(Grid& grid, double init_val = 0.001) { + double random_number = 1.0; + constexpr int nb_vars = 2; + const int nkx2 = grid.m_ksq.extent(1), nkyh = grid.m_ksq.extent(0); + m_fk = View3D>("m_fk", nb_vars, nkyh, nkx2); + m_pk = View2D>("m_pk", nkyh, nkx2); + + const Kokkos::complex I(0.0, 1.0); // Imaginary unit + auto h_fk = Kokkos::create_mirror_view(m_fk); + auto h_ksq = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), grid.m_ksq); + for (int iky = 0; iky < nkyh; iky++) { + for (int ikx = 0; ikx < nkx2; ikx++) { + h_fk(0, iky, ikx) = init_val / (1.0 + h_ksq(iky, ikx)) * + Kokkos::exp(I * 2.0 * M_PI * random_number); + h_fk(1, iky, ikx) = -h_fk(0, iky, ikx) * h_ksq(iky, ikx); + } + } + Kokkos::deep_copy(m_fk, h_fk); + } +}; + +// \brief A class that manages the time integration using +// the 4th order Runge-Kutta method +template +class RK4th { + using value_type = typename ViewType::non_const_value_type; + using float_type = KokkosFFT::Impl::base_floating_point_type; + using BufferViewType = View1D; + + const int m_order = 4; + const float_type m_h; + std::size_t m_array_size; + BufferViewType m_y, m_k1, m_k2, m_k3; + + public: + RK4th(const ViewType& y, float_type h) : m_h(h) { + m_array_size = y.size(); + m_y = BufferViewType("m_y", m_array_size); + m_k1 = BufferViewType("m_k1", m_array_size); + m_k2 = BufferViewType("m_k2", m_array_size); + m_k3 = BufferViewType("m_k3", m_array_size); + } + + auto order() { return m_order; } + + void advance(ViewType& dydt, ViewType& y, int step) { + auto h = m_h; + auto* y_data = y.data(); + const auto* dydt_data = dydt.data(); + if (step == 0) { + auto* y_copy_data = m_y.data(); + auto* k1_data = m_k1.data(); + Kokkos::parallel_for( + "rk_step0", + Kokkos::RangePolicy>( + execution_space(), 0, m_array_size), + KOKKOS_LAMBDA(const int& i) { + y_copy_data[i] = y_data[i]; + k1_data[i] = dydt_data[i] * h; + y_data[i] = y_copy_data[i] + k1_data[i] / 2.0; + }); + } else if (step == 1) { + const auto* y_copy_data = m_y.data(); + auto* k2_data = m_k2.data(); + Kokkos::parallel_for( + "rk_step1", + Kokkos::RangePolicy>( + execution_space(), 0, m_array_size), + KOKKOS_LAMBDA(const int& i) { + k2_data[i] = dydt_data[i] * h; + y_data[i] = y_copy_data[i] + k2_data[i] / 2.0; + }); + } else if (step == 2) { + const auto* y_copy_data = m_y.data(); + auto* k3_data = m_k3.data(); + Kokkos::parallel_for( + "rk_step2", + Kokkos::RangePolicy>( + execution_space(), 0, m_array_size), + KOKKOS_LAMBDA(const int& i) { + k3_data[i] = dydt_data[i] * h; + y_data[i] = y_copy_data[i] + k3_data[i]; + }); + } else if (step == 3) { + const auto* y_copy_data = m_y.data(); + const auto* k1_data = m_k1.data(); + const auto* k2_data = m_k2.data(); + const auto* k3_data = m_k3.data(); + Kokkos::parallel_for( + "rk_step3", + Kokkos::RangePolicy>( + execution_space(), 0, m_array_size), + KOKKOS_LAMBDA(const int& i) { + auto tmp_k4 = dydt_data[i] * h; + y_data[i] = y_copy_data[i] + (k1_data[i] + 2.0 * k2_data[i] + + 2.0 * k3_data[i] + tmp_k4) / + 6.0; + }); + } else { + throw std::runtime_error("step should be 0, 1, 2, or 3"); + } + } +}; + +// \brief Apply the reality condition in wavenumber space +// Force A to satisfy the following conditios +// A[i] == conj(A[-i]) and A[0] == 0 +// +// \tparam ViewType The type of the view +// \tparam MaskViewType The type of the mask view +// \param view View to be modified (2 * nkx + 1) or (nvars, 2 * nkx + 1) +// \param mask The mask view [0, 1, 1, ..., 1] (nkx + 1) +template +void realityCondition(const ViewType& view, const MaskViewType& mask) { + if constexpr (ViewType::rank() == 1) { + const int nk0 = (view.extent(0) - 1) / 2; + Kokkos::parallel_for( + "reality_condition", + Kokkos::RangePolicy>( + execution_space(), 0, nk0 + 1), + KOKKOS_LAMBDA(int i0) { + auto tmp_view = view(i0); + view(i0) = mask(i0) * tmp_view; + + int dst_idx = i0 == 0 ? 0 : 2 * nk0 + 1 - i0; + view(dst_idx) = mask(i0) * Kokkos::conj(tmp_view); + }); + } else if constexpr (ViewType::rank() == 2) { + const int nk0 = view.extent(0); + const int nk1 = (view.extent(1) - 1) / 2; + + Kokkos::parallel_for( + "reality_condition", + Kokkos::RangePolicy>( + execution_space(), 0, nk1 + 1), + KOKKOS_LAMBDA(int i1) { + for (int i0 = 0; i0 < nk0; i0++) { + auto tmp_view = view(i0, i1); + view(i0, i1) = mask(i1) * tmp_view; + + int dst_idx = i1 == 0 ? 0 : 2 * nk1 + 1 - i1; + view(i0, dst_idx) = mask(i1) * Kokkos::conj(tmp_view); + } + }); + } else { + throw std::runtime_error("rank should be 1 or 2"); + } +} + +// \brief A class that solves the Hasegawa-Wakatani equation using +// spectral methods +class HasegawaWakatani { + using OdeSolverType = RK4th>>; + using ForwardPlanType = KokkosFFT::Plan, + View3D>, 2>; + using BackwardPlanType = + KokkosFFT::Plan>, + View3D, 2>; + + // MDRanges used in the kernels + using range2D_type = Kokkos::MDRangePolicy< + execution_space, + Kokkos::Rank<2, Kokkos::Iterate::Default, Kokkos::Iterate::Default>>; + using tile2D_type = typename range2D_type::tile_type; + using point2D_type = typename range2D_type::point_type; + + // MDRanges used in the kernels + using range3D_type = Kokkos::MDRangePolicy< + execution_space, + Kokkos::Rank<3, Kokkos::Iterate::Default, Kokkos::Iterate::Default>>; + using tile3D_type = typename range3D_type::tile_type; + using point3D_type = typename range3D_type::point_type; + + using pair_type = std::pair; + + std::unique_ptr m_grid; + std::unique_ptr m_variables; + std::unique_ptr m_ode; + std::unique_ptr m_forward_plan; + std::unique_ptr m_backward_plan; + + View3D> m_dfkdt; + View3D> m_nonlinear_k; + View3D> m_forward_buffer; + View3D> m_backward_buffer; + View3D> m_ik_fg_all; + + View3D m_dfgdx_all; + View3D m_conv; + + View1D m_mask; + View1D m_adiabacity_factor; + View2D m_poisson_operator; + + const int m_nbiter; + const double m_ca = 3.0; + const double m_nu = 0.01; + const double m_eta = 5.0; + double m_norm_coef; + int m_nkx2, m_nkyh, m_nx, m_ny; + + public: + HasegawaWakatani(int nx, double lx, int nbiter, double dt) + : m_nbiter(nbiter), m_nx(nx), m_ny(nx) { + m_grid = std::make_unique(nx, nx, lx, lx); + m_variables = std::make_unique(*m_grid); + m_ode = std::make_unique(m_variables->m_fk, dt); + + m_nkx2 = m_grid->m_ksq.extent(1); + m_nkyh = m_grid->m_ksq.extent(0); + m_norm_coef = static_cast(m_nx * m_ny); + const int nkx = (m_nkx2 - 1) / 2; + constexpr int nb_vars = 2; + m_dfkdt = + View3D>("m_dfkdt", nb_vars, m_nkyh, m_nkx2); + m_nonlinear_k = View3D>("m_nonlinear_k", nb_vars, + m_nkyh, m_nkx2); + m_forward_buffer = View3D>( + "m_forward_buffer", nb_vars, m_ny, m_nx / 2 + 1); + m_backward_buffer = View3D>("m_backward_buffer", 6, + m_ny, m_nx / 2 + 1); + m_ik_fg_all = + View3D>("m_ik_fg_all", 6, m_nkyh, m_nkx2); + m_dfgdx_all = View3D("m_dfgdx_all", 6, m_ny, m_nx); + m_conv = View3D("m_conv", nb_vars, m_ny, m_nx); + m_mask = View1D("m_mask", nkx); + m_poisson_operator = View2D("m_poisson_operator", m_nkyh, m_nkx2); + m_adiabacity_factor = View1D("m_adiabacity_factor", m_nkyh); + + m_forward_plan = std::make_unique( + execution_space(), m_conv, m_forward_buffer, + KokkosFFT::Direction::forward, KokkosFFT::axis_type<2>({-2, -1})); + m_backward_plan = std::make_unique( + execution_space(), m_backward_buffer, m_dfgdx_all, + KokkosFFT::Direction::backward, KokkosFFT::axis_type<2>({-2, -1})); + auto h_mask = Kokkos::create_mirror_view(m_mask); + auto h_poisson_operator = Kokkos::create_mirror_view(m_poisson_operator); + auto h_adiabacity_factor = Kokkos::create_mirror_view(m_adiabacity_factor); + auto h_ksq = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), m_grid->m_ksq); + auto h_kyh = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), m_grid->m_kyh); + for (int ikx = 1; ikx < nkx; ikx++) { + h_mask(ikx) = 1.0; + } + + for (int iky = 0; iky < m_nkyh; iky++) { + h_adiabacity_factor(iky) = h_kyh(iky) * h_kyh(iky); + for (int ikx = 0; ikx < m_nkx2; ikx++) { + h_poisson_operator(iky, ikx) = + (ikx == 0 && iky == 0) ? 0.0 : -1.0 / (h_ksq(iky, ikx)); + } + } + Kokkos::deep_copy(m_mask, h_mask); + Kokkos::deep_copy(m_poisson_operator, h_poisson_operator); + Kokkos::deep_copy(m_adiabacity_factor, h_adiabacity_factor); + + auto rhok = Kokkos::subview(m_variables->m_fk, 1, Kokkos::ALL, Kokkos::ALL); + poisson(rhok, m_variables->m_pk); + auto sub_fk = Kokkos::subview(m_variables->m_fk, Kokkos::ALL, 0, + Kokkos::ALL); // ky == 0 component + auto sub_pk = Kokkos::subview(m_variables->m_pk, 0, + Kokkos::ALL); // ky == 0 component + realityCondition(sub_fk, m_mask); + realityCondition(sub_pk, m_mask); + } + ~HasegawaWakatani() = default; + + void run() { + for (int i = 0; i < m_nbiter; i++) { + solve(); + } + } + + void solve() { + for (int step = 0; step < m_ode->order(); step++) { + vorticity(m_variables->m_fk, m_variables->m_pk, m_dfkdt); + m_ode->advance(m_dfkdt, m_variables->m_fk, step); + + auto rhok = + Kokkos::subview(m_variables->m_fk, 1, Kokkos::ALL, Kokkos::ALL); + poisson(rhok, m_variables->m_pk); + + // ky == 0 component + auto sub_fk = + Kokkos::subview(m_variables->m_fk, Kokkos::ALL, 0, Kokkos::ALL); + auto sub_pk = Kokkos::subview(m_variables->m_pk, 0, Kokkos::ALL); + realityCondition(sub_fk, m_mask); + realityCondition(sub_pk, m_mask); + } + } + + template + void vorticity(const FViewType& fk, const PViewType& pk, + const dFViewType& dfkdt) { + poissonBracket(fk, pk, m_nonlinear_k); + + constexpr int nb_vars = 2; + auto nonlinear_k = m_nonlinear_k; + auto adiabacity_factor = m_adiabacity_factor; + auto kyh = m_grid->m_kyh; + auto ksq = m_grid->m_ksq; + const Kokkos::complex I(0.0, 1.0); // Imaginary unit + const double eta = m_eta, ca = m_ca, nu = m_nu; + + range2D_type range(point2D_type{{0, 0}}, point2D_type{{m_nkyh, m_nkx2}}, + tile2D_type{{TILE0, TILE1}}); + + Kokkos::parallel_for( + "vorticity", range, KOKKOS_LAMBDA(int iky, int ikx) { + auto tmp_pk = pk(iky, ikx); + auto tmp_kyh = kyh(iky); + auto tmp_adiabacity_factor = adiabacity_factor(iky); + auto tmp_k4 = ksq(iky, ikx) * ksq(iky, ikx); + for (int in = 0; in < nb_vars; in++) { + dfkdt(in, iky, ikx) = + -nonlinear_k(in, iky, ikx) - I * eta * tmp_kyh * tmp_pk - + ca * tmp_adiabacity_factor * (fk(in, iky, ikx) - tmp_pk) - + nu * fk(in, iky, ikx) * tmp_k4; + } + }); + } + + template + void poissonBracket(const FViewType& fk, const GViewType& gk, PViewType& pk) { + multiplication(fk, gk, m_ik_fg_all); + backwardFFT(m_ik_fg_all, m_dfgdx_all); + + // Convolution in real space + convolution(m_dfgdx_all, m_conv); + + // Forward FFT + forwardFFT(m_conv, pk); + + // ky == 0 component + auto sub_pk = Kokkos::subview(pk, Kokkos::ALL, 0, Kokkos::ALL); + realityCondition(pk, m_mask); + } + + template + void multiplication(const FViewType& fk, const GViewType& gk, + FGViewType& ik_fg_all) { + auto ikx_f = + Kokkos::subview(ik_fg_all, pair_type(0, 2), Kokkos::ALL, Kokkos::ALL); + auto iky_f = + Kokkos::subview(ik_fg_all, pair_type(2, 4), Kokkos::ALL, Kokkos::ALL); + auto ikx_g = Kokkos::subview(ik_fg_all, 4, Kokkos::ALL, Kokkos::ALL); + auto iky_g = Kokkos::subview(ik_fg_all, 5, Kokkos::ALL, Kokkos::ALL); + auto kx = m_grid->m_kx; + auto kyh = m_grid->m_kyh; + + const Kokkos::complex I(0.0, 1.0); // Imaginary unit + constexpr int nb_vars = 2; + + range2D_type range(point2D_type{{0, 0}}, point2D_type{{m_nkyh, m_nkx2}}, + tile2D_type{{TILE0, TILE1}}); + + Kokkos::parallel_for( + range, KOKKOS_LAMBDA(int iky, int ikx) { + const auto tmp_ikx = I * kx(ikx); + const auto tmp_iky = I * kyh(iky); + for (int in = 0; in < nb_vars; in++) { + const auto tmp_fk = fk(in, iky, ikx); + ikx_f(in, iky, ikx) = tmp_ikx * tmp_fk; + iky_f(in, iky, ikx) = tmp_iky * tmp_fk; + }; + const auto tmp_gk = gk(iky, ikx); + ikx_g(iky, ikx) = tmp_ikx * tmp_gk; + iky_g(iky, ikx) = tmp_iky * tmp_gk; + }); + } + + template + void forwardFFT(const InViewType& f, OutViewType& fk) { + m_forward_plan->execute(f, m_forward_buffer); + + auto forward_buffer = m_forward_buffer; + auto norm_coef = m_norm_coef; + int nkx2 = m_nkx2, nkx = (m_nkx2 - 1) / 2, ny = m_ny, nv = 2; + range3D_type range(point3D_type{{0, 0, 0}}, + point3D_type{{nv, m_nkyh, nkx + 1}}, + tile3D_type{{2, TILE0, TILE1}}); + + Kokkos::parallel_for( + "FFT_unpack", range, KOKKOS_LAMBDA(int iv, int iky, int ikx) { + fk(iv, iky, ikx) = forward_buffer(iv, iky, ikx) * norm_coef; + + int ikx_neg = nkx2 - ikx; + int iky_neg = (ny - iky), iky_nonzero = iky; + if (ikx == 0) { + ikx_neg = 0; + }; + if (iky == 0) { + iky_neg = ny - 1; + iky_nonzero = 1; + }; + fk(iv, iky_nonzero, ikx_neg) = + Kokkos::conj(forward_buffer(iv, iky_neg, ikx)) * norm_coef; + }); + } + + template + void backwardFFT(const InViewType& fk, OutViewType& f) { + auto backward_buffer = m_backward_buffer; + Kokkos::deep_copy(backward_buffer, 0.0); + int nkx2 = m_nkx2, nkx = (m_nkx2 - 1) / 2, ny = m_ny, nv = 6; + range3D_type range(point3D_type{{0, 0, 0}}, + point3D_type{{nv, m_nkyh, nkx + 1}}, + tile3D_type{{6, TILE0, TILE1}}); + + Kokkos::parallel_for( + "FFT_pack", range, KOKKOS_LAMBDA(int iv, int iky, int ikx) { + backward_buffer(iv, iky, ikx) = fk(iv, iky, ikx); + int ikx_neg = nkx2 - ikx; + int iky_neg = (ny - iky), iky_nonzero = iky; + if (ikx == 0) { + ikx_neg = 0; + }; + if (iky == 0) { + iky_neg = ny - 1; + iky_nonzero = 1; + }; + backward_buffer(iv, iky_neg, ikx) = + Kokkos::conj(fk(iv, iky_nonzero, ikx_neg)); + }); + + m_backward_plan->execute(backward_buffer, f); + } + + template + void convolution(const InViewType& dfgdx_all, OutViewType& conv) { + auto dfdx = + Kokkos::subview(dfgdx_all, pair_type(0, 2), Kokkos::ALL, Kokkos::ALL); + auto dfdy = + Kokkos::subview(dfgdx_all, pair_type(2, 4), Kokkos::ALL, Kokkos::ALL); + auto dgdx = Kokkos::subview(dfgdx_all, 4, Kokkos::ALL, Kokkos::ALL); + auto dgdy = Kokkos::subview(dfgdx_all, 5, Kokkos::ALL, Kokkos::ALL); + + range2D_type range(point2D_type{{0, 0}}, point2D_type{{m_ny, m_nx}}, + tile2D_type{{TILE0, TILE1}}); + + constexpr int nb_vars = 2; + Kokkos::parallel_for( + "convolution", range, KOKKOS_LAMBDA(int iy, int ix) { + const auto tmp_dgdx = dgdx(iy, ix); + const auto tmp_dgdy = dgdy(iy, ix); + for (int in = 0; in < nb_vars; in++) { + const auto tmp_dfdx = dfdx(in, iy, ix); + const auto tmp_dfdy = dfdy(in, iy, ix); + conv(in, iy, ix) = tmp_dfdx * tmp_dgdy - tmp_dfdy * tmp_dgdx; + }; + }); + } + + template + void poisson(const InViewType& rhok, OutViewType& phik) { + range2D_type range(point2D_type{{0, 0}}, point2D_type{{m_nkyh, m_nkx2}}, + tile2D_type{{TILE0, TILE1}}); + + auto poisson_operator = m_poisson_operator; + Kokkos::parallel_for( + "poisson", range, KOKKOS_LAMBDA(int iky, int ikx) { + phik(iky, ikx) = poisson_operator(iky, ikx) * rhok(iky, ikx); + }); + } +}; + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + { + int nx = 1024, nbiter = 100; + double lx = 10.0, dt = 0.005; + HasegawaWakatani model(nx, lx, nbiter, dt); + Kokkos::Timer timer; + model.run(); + Kokkos::fence(); + double seconds = timer.seconds(); + std::cout << "Elapsed time: " << seconds << " [s]" << std::endl; + } + Kokkos::finalize(); + + return 0; +} diff --git a/examples/09_HasegawaWakatani/CMakeLists.txt b/examples/09_HasegawaWakatani/CMakeLists.txt new file mode 100644 index 00000000..86d18539 --- /dev/null +++ b/examples/09_HasegawaWakatani/CMakeLists.txt @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +# +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +add_executable(09_hasegawa_wakatani 09_hasegawa_wakatani.cpp) +target_link_libraries(09_hasegawa_wakatani PUBLIC KokkosFFT::fft) diff --git a/examples/09_HasegawaWakatani/hasegawa_wakatani.py b/examples/09_HasegawaWakatani/hasegawa_wakatani.py new file mode 100644 index 00000000..1082b97d --- /dev/null +++ b/examples/09_HasegawaWakatani/hasegawa_wakatani.py @@ -0,0 +1,232 @@ +import argparse +import pathlib +import time +import numpy as np +import xarray as xr +from functools import partial +from dataclasses import dataclass + +@dataclass +class Grid: + nx: int + ny: int + lx: np.float64 + ly: np.float64 + + def __init__(self, nx, ny, lx, ly): + self.nx, self.ny = nx, ny + self.lx, self.ly = lx * np.pi, ly * np.pi + + nkx, nky = (self.nx-2)//3, (self.ny-2)//3 + self.nky, self.nkx2 = nky, nkx * 2 + 1 + + self.kx = np.linspace(-nkx, nkx+1, nkx*2+1, endpoint=False) / lx + self.ky = np.linspace(-nky, nky+1, nky*2+1, endpoint=False) / ly + + # Set zero-frequency component to the zero-th component + self.kx = np.fft.ifftshift(self.kx) + self.ky = np.fft.ifftshift(self.ky) + + # Half plane with ky >= 0. + self.kyh = self.ky[:nky+1] + + self.kx = np.expand_dims(self.kx, axis=0) + self.kyh = np.expand_dims(self.kyh, axis=1) + + KX, KY = np.meshgrid(self.kx, self.kyh) + self.ksq = KX**2 + KY**2 + + # Defined in [0:Nky, -Nkx:Nkx] + self.inv_ksq = 1. / (1. + self.ksq) + +@dataclass +class Variables: + fk: np.ndarray + pk: np.ndarray + + def __init__(self, grid, init_val=0.001): + #random_number = np.random.rand(*grid.inv_ksq.shape) + random_number = 1. + fk0 = init_val * grid.inv_ksq * np.exp(1j * 2. * np.pi * random_number) + fk1 = - fk0 * grid.ksq + + self.fk = np.stack([fk0, fk1]) + self.pk = np.zeros_like(fk0) + +class RKG4th: + order: int = 4 + h: np.float64 + def __init__(self, h): + self.y = None + self.k1 = None + self.k2 = None + self.k3 = None + self.k4 = None + self.h = h + + def advance(self, f, y, step): + y = np.asarray(y) + if step==0: + self.y = np.copy(y) + self.k1 = f(y) * self.h + y = self.y + self.k1/2 + elif step==1: + self.k2 = f(y) * self.h + y = self.y + self.k2/2 + elif step==2: + self.k3 = f(y) * self.h + y = self.y + self.k3 + elif step==3: + self.k4 = f(y) * self.h + y = self.y + (self.k1 + 2*self.k2 + 2*self.k3 + self.k4) / 6 + else: + raise ValueError('step should be 0, 1, 2, or 3') + + return y + +def realityCondition(A): + def realityCondition2D(A_col): + nky, nkx2 = A_col.shape + nkx = (nkx2-1)//2 + + conj = np.conj(A_col[0,1:nkx+1]).copy()[::-1] + A_col[0,nkx+1:] = conj + A_col[0,0] = 0. + 0.j + + return A_col + + if A.ndim == 2: + A = np.expand_dims(A, axis=0) + + tmp_A = [] + for A_col in A: + tmp_A.append( realityCondition2D(A_col) ) + + tmp_A = np.asarray(tmp_A) + + return np.squeeze(tmp_A) + +class HasegawaWakatani: + ca: np.float64 = 3. + nu: np.float64 = 0.01 + eta: np.float64 = 5. + it: int = 0 + + def __init__(self, nx, lx, nbiter, dt): + self.grid = Grid(nx=nx, ny=nx, lx=lx, ly=lx) + self.variables = Variables(grid=self.grid) + self.ode = RKG4th(h = dt) + self.nbiter = nbiter + + self.poisson_operator = -1. / self.grid.ksq + self.poisson_operator[0,0] = 0. + + self.adiabacity_factor = self.grid.kyh**2 + + self.variables.pk = self._poisson(self.variables.fk[1]) + self.variables.fk = realityCondition(self.variables.fk) + self.variables.pk = realityCondition(self.variables.pk) + + + + def run(self): + for _ in range(self.nbiter): + self._diag() + self._solve() + + def _diag(self): + pass + def _solve(self): + for step in range(self.ode.order): + vorticity = partial(self._vorticity, pk=self.variables.pk) + self.variables.fk = self.ode.advance(f=vorticity, + y=self.variables.fk, + step=step) + self.variables.pk = self._poisson(fk=self.variables.fk[1]) + + self.variables.fk = realityCondition(self.variables.fk) + self.variables.pk = realityCondition(self.variables.pk) + + def _vorticity(self, fk, pk): + phik = np.zeros_like(fk, dtype=np.complex128) + dfkdt = np.zeros_like(fk, dtype=np.complex128) + + for i in range(2): + phik[i] = self._poissonBracket(f=fk[i], g=pk) + dfkdt[i] = - phik[i] - 1j * self.eta * self.grid.kyh * pk \ + - self.ca * self.adiabacity_factor * (fk[i] - pk) \ + - self.nu * fk[i] * self.grid.ksq**2 + + return dfkdt + + def _poissonBracket(self, f, g): + ikx_f = 1j * self.grid.kx * f + iky_f = 1j * self.grid.kyh * f + ikx_g = 1j * self.grid.kx * g + iky_g = 1j * self.grid.kyh * g + + # Inverse FFT complex [ny, nx/2+1] => real [ny, nx] + dfdx = self._backwardFFT(ikx_f) + dfdy = self._backwardFFT(iky_f) + dgdx = self._backwardFFT(ikx_g) + dgdy = self._backwardFFT(iky_g) + + # Convolution in real space + conv = dfdx * dgdy - dfdy * dgdx + + # Forward FFT real [ny, nx] => [ny, nx/2+1] + poisson_bracket = self._forwardFFT(conv) + + # Reality condition + poisson_bracket = realityCondition(poisson_bracket) + + return poisson_bracket + + def _forwardFFT(self, f): + nky, nkx2 = self.grid.nky, self.grid.nkx2 + nkx = (nkx2-1)//2 + ny, nx = self.grid.ny, self.grid.nx + normalization_coefficient = nx*ny + + fk = np.fft.rfft2(f) + fk_buffer = np.zeros((nky+1, nkx2), dtype=np.complex128) + + fk_buffer[:,0:nkx+1] = fk[:nky+1,0:nkx+1] + for iy in range(1, nky+1): + for ix in range(0, -(nkx+1), -1): + fk_buffer[iy,ix] = np.conj(fk[ny-iy, -ix]) + + return fk_buffer * normalization_coefficient + + def _backwardFFT(self, fk): + nky, nkx2 = self.grid.nky, self.grid.nkx2 + nkx = (nkx2-1)//2 + ny, nx = self.grid.ny, self.grid.nx + + fk_buffer = np.zeros((ny,nx//2+1), dtype=np.complex128) + fk_buffer[:nky+1,0:nkx+1] = fk[:,0:nkx+1] + + for iy in range(1, nky+1): + for ix in range(0, -(nkx+1), -1): + fk_buffer[ny-iy, -ix] = np.conj(fk[iy,ix]) + + f = np.fft.irfft2(fk_buffer) + return f + + def _poisson(self, fk): + return self.poisson_operator * fk + +if __name__ == '__main__': + parser = argparse.ArgumentParser(add_help=True) + parser.add_argument('nx', nargs='?', type=int, default=1024) + parser.add_argument('lx', nargs='?', type=float, default=10.0) + parser.add_argument('nbiter', nargs='?', type=int, default=100) + parser.add_argument('dt', nargs='?', type=float, default=0.005) + args = parser.parse_args() + + model = HasegawaWakatani(**vars(args)) + start = time.time() + model.run() + seconds = time.time() - start + + print(f'Elapsed time: {seconds} [s]') \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 94b3c84a..6c09bd24 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,3 +10,4 @@ add_subdirectory(05_1DFFT_HOST_DEVICE) add_subdirectory(06_1DFFT_reuse_plans) add_subdirectory(07_unmanaged_views) add_subdirectory(08_inplace_FFT) +add_subdirectory(09_HasegawaWakatani) \ No newline at end of file