Skip to content

Commit

Permalink
Add labels to Kokkos kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuuichi Asahi committed Sep 12, 2024
1 parent 47e3c13 commit a23676f
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 15 deletions.
3 changes: 2 additions & 1 deletion common/src/KokkosFFT_Helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ void roll(const ExecutionSpace& exec_space, ViewType& inout, axis_type<1> shift,
// shift2 == 0 means shift
if (shift2 == 0) {
Kokkos::parallel_for(
"KokkosFFT::roll",
Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<std::size_t>>(
exec_space, 0, len),
KOKKOS_LAMBDA(std::size_t i) {
Expand Down Expand Up @@ -106,7 +107,7 @@ void roll(const ExecutionSpace& exec_space, ViewType& inout, axis_type<2> shift,
int shift_02 = shift2.at(0), shift_12 = shift2.at(1);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1) {
"KokkosFFT::roll", range, KOKKOS_LAMBDA(int i0, int i1) {
if (i0 + shift_00 < n0 && i1 + shift_10 < n1) {
tmp(i0 + shift_00, i1 + shift_10) = inout(i0, i1);
}
Expand Down
1 change: 1 addition & 0 deletions common/src/KokkosFFT_normalization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ void normalize_impl(const ExecutionSpace& exec_space, ViewType& inout,
auto* data = inout.data();

Kokkos::parallel_for(
"KokkosFFT::normalize",
Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<std::size_t>>(
exec_space, 0, size),
KOKKOS_LAMBDA(const int& i) { data[i] *= coef; });
Expand Down
21 changes: 14 additions & 7 deletions common/src/KokkosFFT_padding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
auto n0 = std::min(_n0, in.extent(0));

Kokkos::parallel_for(
"KokkosFFT::crop_or_pad",
Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<std::size_t>>(
exec_space, 0, n0),
KOKKOS_LAMBDA(int i0) { out(i0) = in(i0); });
Expand Down Expand Up @@ -128,7 +129,8 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1) { out(i0, i1) = in(i0, i1); });
"KokkosFFT::crop_or_pad", range,
KOKKOS_LAMBDA(int i0, int i1) { out(i0, i1) = in(i0, i1); });
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
Expand All @@ -155,7 +157,7 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2) {
"KokkosFFT::crop_or_pad", range, KOKKOS_LAMBDA(int i0, int i1, int i2) {
out(i0, i1, i2) = in(i0, i1, i2);
});
}
Expand Down Expand Up @@ -185,7 +187,8 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {
"KokkosFFT::crop_or_pad", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {
out(i0, i1, i2, i3) = in(i0, i1, i2, i3);
});
}
Expand Down Expand Up @@ -216,7 +219,8 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {
"KokkosFFT::crop_or_pad", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {
out(i0, i1, i2, i3, i4) = in(i0, i1, i2, i3, i4);
});
}
Expand Down Expand Up @@ -249,7 +253,8 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
"KokkosFFT::crop_or_pad", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
out(i0, i1, i2, i3, i4, i5) = in(i0, i1, i2, i3, i4, i5);
});
}
Expand Down Expand Up @@ -283,7 +288,8 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
"KokkosFFT::crop_or_pad", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
for (int i6 = 0; i6 < n6; i6++) {
out(i0, i1, i2, i3, i4, i5, i6) = in(i0, i1, i2, i3, i4, i5, i6);
}
Expand Down Expand Up @@ -320,7 +326,8 @@ void crop_or_pad_impl(const ExecutionSpace& exec_space, const InViewType& in,
);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
"KokkosFFT::crop_or_pad", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
for (int i6 = 0; i6 < n6; i6++) {
for (int i7 = 0; i7 < n7; i7++) {
out(i0, i1, i2, i3, i4, i5, i6, i7) =
Expand Down
20 changes: 13 additions & 7 deletions common/src/KokkosFFT_transpose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
prep_transpose_view(in, out, _map);

Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1) { out(i1, i0) = in(i0, i1); });
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1) { out(i1, i0) = in(i0, i1); });
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
Expand All @@ -149,7 +150,7 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,

Kokkos::Array<int, 3> map = {_map[0], _map[1], _map[2]};
Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2) {
"KokkosFFT::transpose", range, KOKKOS_LAMBDA(int i0, int i1, int i2) {
int _indices[rank] = {i0, i1, i2};
int _i0 = _indices[map[0]];
int _i1 = _indices[map[1]];
Expand Down Expand Up @@ -183,7 +184,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3]};
Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3) {
int _indices[rank] = {i0, i1, i2, i3};
int _i0 = _indices[map[0]];
int _i1 = _indices[map[1]];
Expand Down Expand Up @@ -219,7 +221,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3], _map[4]};
Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4) {
int _indices[rank] = {i0, i1, i2, i3, i4};
int _i0 = _indices[map[0]];
int _i1 = _indices[map[1]];
Expand Down Expand Up @@ -258,7 +261,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2],
_map[3], _map[4], _map[5]};
Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
int _indices[rank] = {i0, i1, i2, i3, i4, i5};
int _i0 = _indices[map[0]];
int _i1 = _indices[map[1]];
Expand Down Expand Up @@ -298,7 +302,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6]};
Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
for (int i6 = 0; i6 < n6; i6++) {
int _indices[rank] = {i0, i1, i2, i3, i4, i5, i6};
int _i0 = _indices[map[0]];
Expand Down Expand Up @@ -344,7 +349,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6], _map[7]};
Kokkos::parallel_for(
range, KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5) {
for (int i6 = 0; i6 < n6; i6++) {
for (int i7 = 0; i7 < n7; i7++) {
int _indices[rank] = {i0, i1, i2, i3, i4, i5, i6, i7};
Expand Down
1 change: 1 addition & 0 deletions common/src/KokkosFFT_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ void conjugate(const ExecutionSpace& exec_space, const InViewType& in,
auto* out_data = out.data();

Kokkos::parallel_for(
"KokkosFFT::conjugate",
Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<std::size_t>>(
exec_space, 0, size),
KOKKOS_LAMBDA(std::size_t i) { out_data[i] = Kokkos::conj(in_data[i]); });
Expand Down
2 changes: 2 additions & 0 deletions common/unit_test/Test_Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ bool allclose(const AViewType& a, const BViewType& b, double rtol = 1.e-5,

int error = 0;
Kokkos::parallel_reduce(
"KokkosFFT::Test::allclose",
Kokkos::RangePolicy<execution_space, Kokkos::IndexType<std::size_t>>{0,
n},
KOKKOS_LAMBDA(const int& i, int& err) {
Expand All @@ -44,6 +45,7 @@ void multiply(ViewType& x, T a) {
auto* ptr_x = x.data();

Kokkos::parallel_for(
"KokkosFFT::Test::multiply",
Kokkos::RangePolicy<execution_space, Kokkos::IndexType<std::size_t>>{0,
n},
KOKKOS_LAMBDA(const int& i) { ptr_x[i] = ptr_x[i] * a; });
Expand Down
2 changes: 2 additions & 0 deletions fft/unit_test/Test_Transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ void fft1(ViewType& in, ViewType& out) {
std::size_t L = in.size();

Kokkos::parallel_for(
"KokkosFFT::Test::fft1",
Kokkos::TeamPolicy<execution_space>(L, Kokkos::AUTO),
KOKKOS_LAMBDA(
const Kokkos::TeamPolicy<execution_space>::member_type& team_member) {
Expand Down Expand Up @@ -71,6 +72,7 @@ void ifft1(ViewType& in, ViewType& out) {
std::size_t L = in.size();

Kokkos::parallel_for(
"KokkosFFT::Test::ifft1",
Kokkos::TeamPolicy<execution_space>(L, Kokkos::AUTO),
KOKKOS_LAMBDA(
const Kokkos::TeamPolicy<execution_space>::member_type& team_member) {
Expand Down
2 changes: 2 additions & 0 deletions fft/unit_test/Test_Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ bool allclose(const AViewType& a, const BViewType& b, double rtol = 1.e-5,

int error = 0;
Kokkos::parallel_reduce(
"KokkosFFT::Test::allclose",
Kokkos::RangePolicy<execution_space, Kokkos::IndexType<std::size_t>>{0,
n},
KOKKOS_LAMBDA(const int& i, int& err) {
Expand All @@ -43,6 +44,7 @@ void multiply(ViewType& x, T a) {
auto* ptr_x = x.data();

Kokkos::parallel_for(
"KokkosFFT::Test::multiply",
Kokkos::RangePolicy<execution_space, Kokkos::IndexType<std::size_t>>{0,
n},
KOKKOS_LAMBDA(const int& i) { ptr_x[i] = ptr_x[i] * a; });
Expand Down

0 comments on commit a23676f

Please sign in to comment.