Skip to content

Commit

Permalink
Fix a bug in the KokkosV7 + update README
Browse files Browse the repository at this point in the history
  • Loading branch information
seyonglee committed Apr 30, 2024
1 parent 2f17b2d commit 697512e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 19 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ Here is the brief information on each version:
`src/kokkos_src_v6_2`: is a variant of `src/kokkos_src_v6`, where inner parallel_for
constructs are manually fused; the new best performing version.

`src/kokkos_src_v7`: has the same user data layouts and compute patterns as the P2R Kokkos version.

Here we have basic instructions.

#### Getting started
Expand Down Expand Up @@ -274,6 +276,9 @@ $ make COMPILER=nvcc MODE=kokkosv6 INCLUDE_DATA=1 USE_FMAD=1 USE_GPU=1 PREPIN_HO
$ make COMPILER=nvcc MODE=kokkosv6 INCLUDE_DATA=0 USE_FMAD=1 USE_GPU=1 PREPIN_HOSTMEM=1 #work only for NVIDIA GPUs
$ make COMPILER=nvcc MODE=kokkosv6_2 INCLUDE_DATA=0 USE_FMAD=1 USE_GPU=1 PREPIN_HOSTMEM=1 #work only for NVIDIA GPUs
$ make COMPILER=nvcc MODE=kokkosv5_2 INCLUDE_DATA=0 USE_FMAD=1 USE_GPU=1 KOKKOS_ARCH=AMPERE86
$ make COMPILER=nvcc MODE=kokkosv7 INCLUDE_DATA=0 USE_FMAD=1 USE_GPU=1
$ make COMPILER=gcc MODE=kokkosv7 USE_GPU=0 KOKKOS_ARCH=POWER9

$ make MODE=kokkosv6 clean
```

Expand Down
38 changes: 19 additions & 19 deletions src/kokkos_src_v7/propagate-toz-test_Kokkos_v7.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -722,26 +722,26 @@ KOKKOS_FUNCTION void propagateToZ(const MP6x6SF_<N> &inErr_, const MP6F_<N> &inP
const float sCosPsina = sinf(cosP*sina);
const float cCosPsina = cosf(cosP*sina);

//for (size_t i=0;i<6;++i) errorProp[N*PosInMtrx(i,i,6,N) + it] = 1.0f;
errorProp[N*PosInMtrx(0,0,6,N) + it] = 1.0f;
errorProp[N*PosInMtrx(1,1,6,N) + it] = 1.0f;
errorProp[N*PosInMtrx(2,2,6,N) + it] = 1.0f;
errorProp[N*PosInMtrx(3,3,6,N) + it] = 1.0f;
errorProp[N*PosInMtrx(4,4,6,N) + it] = 1.0f;
errorProp[N*PosInMtrx(5,5,6,N) + it] = 1.0f;
//for (size_t i=0;i<6;++i) errorProp[PosInMtrx(i,i,6,N) + it] = 1.0f;
errorProp[PosInMtrx(0,0,6,N) + it] = 1.0f;
errorProp[PosInMtrx(1,1,6,N) + it] = 1.0f;
errorProp[PosInMtrx(2,2,6,N) + it] = 1.0f;
errorProp[PosInMtrx(3,3,6,N) + it] = 1.0f;
errorProp[PosInMtrx(4,4,6,N) + it] = 1.0f;
errorProp[PosInMtrx(5,5,6,N) + it] = 1.0f;
//[Dec. 21, 2022] Added to have the same pattern as the cudauvm version.
errorProp[N*PosInMtrx(0,1,6,N) + it] = 0.0f;
errorProp[N*PosInMtrx(0,2,6,N) + it] = cosP*sinT*(sinP*cosa*sCosPsina-cosa)*icosT;
errorProp[N*PosInMtrx(0,3,6,N) + it] = cosP*sinT*deltaZ*cosa*(1.0f-sinP*sCosPsina)*(icosT*pt)-k*(cosP*sina-sinP*(1.0f-cCosPsina))*(pt*pt);
errorProp[N*PosInMtrx(0,4,6,N) + it] = (k*pt)*(-sinP*sina+sinP*sinP*sina*sCosPsina-cosP*(1.0f-cCosPsina));
errorProp[N*PosInMtrx(0,5,6,N) + it] = cosP*deltaZ*cosa*(1.0f-sinP*sCosPsina)*(icosT*icosT);
errorProp[N*PosInMtrx(1,2,6,N) + it] = cosa*sinT*(cosP*cosP*sCosPsina-sinP)*icosT;
errorProp[N*PosInMtrx(1,3,6,N) + it] = sinT*deltaZ*cosa*(cosP*cosP*sCosPsina+sinP)*(icosT*pt)-k*(sinP*sina+cosP*(1.0f-cCosPsina))*(pt*pt);
errorProp[N*PosInMtrx(1,4,6,N) + it] = (k*pt)*(-sinP*(1.0f-cCosPsina)-sinP*cosP*sina*sCosPsina+cosP*sina);
errorProp[N*PosInMtrx(1,5,6,N) + it] = deltaZ*cosa*(cosP*cosP*sCosPsina+sinP)*(icosT*icosT);
errorProp[N*PosInMtrx(4,2,6,N) + it] = -inPar_(iparIpt,it)*sinT*(icosTk);
errorProp[N*PosInMtrx(4,3,6,N) + it] = sinT*deltaZ*(icosTk);
errorProp[N*PosInMtrx(4,5,6,N) + it] = inPar_(iparIpt,it)*deltaZ*(icosT*icosTk);
errorProp[PosInMtrx(0,1,6,N) + it] = 0.0f;
errorProp[PosInMtrx(0,2,6,N) + it] = cosP*sinT*(sinP*cosa*sCosPsina-cosa)*icosT;
errorProp[PosInMtrx(0,3,6,N) + it] = cosP*sinT*deltaZ*cosa*(1.0f-sinP*sCosPsina)*(icosT*pt)-k*(cosP*sina-sinP*(1.0f-cCosPsina))*(pt*pt);
errorProp[PosInMtrx(0,4,6,N) + it] = (k*pt)*(-sinP*sina+sinP*sinP*sina*sCosPsina-cosP*(1.0f-cCosPsina));
errorProp[PosInMtrx(0,5,6,N) + it] = cosP*deltaZ*cosa*(1.0f-sinP*sCosPsina)*(icosT*icosT);
errorProp[PosInMtrx(1,2,6,N) + it] = cosa*sinT*(cosP*cosP*sCosPsina-sinP)*icosT;
errorProp[PosInMtrx(1,3,6,N) + it] = sinT*deltaZ*cosa*(cosP*cosP*sCosPsina+sinP)*(icosT*pt)-k*(sinP*sina+cosP*(1.0f-cCosPsina))*(pt*pt);
errorProp[PosInMtrx(1,4,6,N) + it] = (k*pt)*(-sinP*(1.0f-cCosPsina)-sinP*cosP*sina*sCosPsina+cosP*sina);
errorProp[PosInMtrx(1,5,6,N) + it] = deltaZ*cosa*(cosP*cosP*sCosPsina+sinP)*(icosT*icosT);
errorProp[PosInMtrx(4,2,6,N) + it] = -inPar_(iparIpt,it)*sinT*(icosTk);
errorProp[PosInMtrx(4,3,6,N) + it] = sinT*deltaZ*(icosTk);
errorProp[PosInMtrx(4,5,6,N) + it] = inPar_(iparIpt,it)*deltaZ*(icosT*icosTk);
}
//});
//
Expand Down

0 comments on commit 697512e

Please sign in to comment.