diff --git a/input/in.lj b/input/in.lj index 774c548..38ac995 100644 --- a/input/in.lj +++ b/input/in.lj @@ -5,7 +5,7 @@ atom_style atomic newton off lattice fcc 0.8442 -region box block 0 80 0 80 0 80 +region box block 0 4 0 4 0 4 create_box 1 box create_atoms 1 box mass 1 2.0 @@ -20,4 +20,4 @@ neigh_modify every 20 one 50 fix 1 all nve thermo 10 -run 100 +run 2 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c72806c..8c81768 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,14 +15,6 @@ if(ENABLE_MPI) target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_MPI) endif() -# Select a default set of options. We can export this as CMake options later -if (ENABLE_KOKKOS_REMOTE_SPACES) - target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) - target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_SCALAR) - #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO) - target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_GLOBAL) -endif() - target_include_directories(ExaMiniMD PRIVATE ${Kokkos_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${SUBDIRECTORIES}) target_link_libraries(ExaMiniMD PRIVATE $<$:MPI::MPI_CXX> Kokkos::kokkos $<$:Kokkos::kokkosremotespaces>) diff --git a/src/binning_types/binning_kksort.cpp b/src/binning_types/binning_kksort.cpp index a7fdefc..535ab85 100644 --- a/src/binning_types/binning_kksort.cpp +++ b/src/binning_types/binning_kksort.cpp @@ -71,7 +71,7 @@ namespace { void BinningKKSort::create_binning(T_X_FLOAT dx_in, T_X_FLOAT dy_in, T_X_FLOAT dz_in, int halo_depth, bool do_local, bool do_ghost, bool sort) { if(do_local||do_ghost) { nhalo = halo_depth; - std::pair range(do_local?0:system->N_local, + Kokkos::pair range(do_local?0:system->N_local, do_ghost?system->N_local+system->N_ghost:system->N_local); nbinx = T_INT(system->sub_domain_x/dx_in); diff --git a/src/comm_lib.cpp b/src/comm_lib.cpp new file mode 100644 index 0000000..359de92 --- /dev/null +++ b/src/comm_lib.cpp @@ -0,0 +1,88 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#include + +#if EXAMINIMD_ENABLE_MPI +#include +#endif + +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#endif + +void comm_lib_init(int argc, char* argv[]) { +#if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + int mpi_thread_level_available; + int mpi_thread_level_required = MPI_THREAD_MULTIPLE; + +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + mpi_thread_level_required = MPI_THREAD_SINGLE; +#endif + + MPI_Init_thread(&argc, &argv, mpi_thread_level_required, + &mpi_thread_level_available); + assert(mpi_thread_level_available >= mpi_thread_level_required); + +#ifdef KRS_ENABLE_SHMEMSPACE + shmem_init_thread(mpi_thread_level_required, &mpi_thread_level_available); + assert(mpi_thread_level_available >= mpi_thread_level_required); +#endif + +#ifdef KRS_ENABLE_NVSHMEMSPACE + MPI_Comm mpi_comm; + nvshmemx_init_attr_t attr; + mpi_comm = MPI_COMM_WORLD; + attr.mpi_comm = &mpi_comm; + nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr); +#endif +} + +void comm_lib_finalize() { +#if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) +#ifdef KRS_ENABLE_SHMEMSPACE + shmem_finalize(); +#endif +#ifdef KRS_ENABLE_NVSHMEMSPACE + nvshmem_finalize(); +#endif + MPI_Finalize(); +#endif +#endif +} diff --git a/src/comm_lib.h b/src/comm_lib.h new file mode 100644 index 0000000..8985856 --- /dev/null +++ b/src/comm_lib.h @@ -0,0 +1,47 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#pragma once + +#ifndef COMM_INIT_H +#define COMM_INIT_H + +void comm_lib_init(int argc, char* argv[]); +void comm_lib_finalize(); + +#endif diff --git a/src/comm_types/CMakeLists.txt b/src/comm_types/CMakeLists.txt index cd13f8b..a4b281b 100644 --- a/src/comm_types/CMakeLists.txt +++ b/src/comm_types/CMakeLists.txt @@ -1,9 +1,3 @@ FILE(GLOB SRCS *.cpp) target_sources(ExaMiniMD PRIVATE ${SRCS}) - -if (!ENABLE_MPI AND !ENABLE_KOKKOS_REMOTE_SPACES) - # Skip MPI module - list(FILTER SRCS EXCLUDE REGEX ".*comm_mpi\\.cpp$") -endif() - target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/comm_types/comm_mpi.cpp b/src/comm_types/comm_mpi.cpp index 38703c1..4fed510 100644 --- a/src/comm_types/comm_mpi.cpp +++ b/src/comm_types/comm_mpi.cpp @@ -36,6 +36,8 @@ // Questions? Contact Christian R. Trott (crtrott@sandia.gov) //************************************************************************ +#include + #if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) #include @@ -197,14 +199,12 @@ void CommMPI::exchange() { s = *system; N_local = system->N_local; N_ghost = 0; - //printf("System A: %i %lf %lf %lf %i\n",s.N_local,s.x(21,0),s.x(21,1),s.x(21,2),s.type(21)); Kokkos::parallel_for("CommMPI::exchange_self", Kokkos::RangePolicy >(0,N_local), *this); T_INT N_total_recv = 0; T_INT N_total_send = 0; - //printf("System B: %i %lf %lf %lf %i\n",s.N_local,s.x(21,0),s.x(21,1),s.x(21,2),s.type(21)); for(phase = 0; phase < 6; phase ++) { proc_num_send[phase] = 0; proc_num_recv[phase] = 0; @@ -390,7 +390,8 @@ void CommMPI::exchange_halo() { }; void CommMPI::update_halo() { -#ifndef SHMEMTESTS_USE_HALO + +#if !defined(SHMEMTESTS_USE_HALO) && defined(EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) return; #else Kokkos::Profiling::pushRegion("Comm::update_halo"); @@ -478,7 +479,13 @@ void CommMPI::update_force() { Kokkos::Profiling::popRegion(); }; -const char* CommMPI::name() { return "CommMPI"; } +const char* CommMPI::name() { + comm_name = std::string("CommMPI"); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + comm_name += "Distrib"; + #endif + return comm_name.c_str(); +} int CommMPI::process_rank() { return proc_rank; } int CommMPI::num_processes() { return proc_size; } diff --git a/src/comm_types/comm_mpi.h b/src/comm_types/comm_mpi.h index f1ba4dd..3515479 100644 --- a/src/comm_types/comm_mpi.h +++ b/src/comm_types/comm_mpi.h @@ -70,7 +70,6 @@ class CommMPI: public Comm { System s; // Owned Variables - int phase; // Communication Phase int proc_neighbors_recv[6]; // Neighbor for each phase int proc_neighbors_send[6]; // Neighbor for each phase @@ -81,6 +80,8 @@ class CommMPI: public Comm { int proc_rank; // My Process rank int proc_size; // Number of processes + std::string comm_name; + T_INT num_ghost[6]; T_INT ghost_offsets[6]; @@ -503,7 +504,9 @@ class CommMPI: public Comm { KOKKOS_INLINE_FUNCTION void operator() (const TagCreateGlobalIndecies, const T_INT& i) const { + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES s.global_index(i) = N_MAX_MASK * proc_rank + i; + #endif } const char* name(); diff --git a/src/comm_types/comm_serial.h b/src/comm_types/comm_serial.h index 386b3cd..dd11508 100644 --- a/src/comm_types/comm_serial.h +++ b/src/comm_types/comm_serial.h @@ -52,7 +52,6 @@ } #endif - #if !defined(MODULES_OPTION_CHECK) && !defined(COMM_MODULES_INSTANTIATION) #ifndef COMM_SERIAL_H #define COMM_SERIAL_H diff --git a/src/examinimd.cpp b/src/examinimd.cpp index 2d1b625..7ea9e4b 100644 --- a/src/examinimd.cpp +++ b/src/examinimd.cpp @@ -83,8 +83,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { #undef FORCE_MODULES_INSTANTIATION else comm->error("Invalid ForceType"); for(int line = 0; line < input->force_coeff_lines.extent(0); line++) { - //input->input_data.print_line(input->force_coeff_lines(line)); - //printf("init_coeff: %i %i\n",line,input->input_data.words_in_line(input->force_coeff_lines(line))); force->init_coeff(input->input_data.words_in_line(input->force_coeff_lines(line)), input->input_data.words[input->force_coeff_lines(line)]); } @@ -108,7 +106,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { if(neighbor) neighbor->comm_newton = input->comm_newton; - // system->print_particles(); if(system->do_print) { printf("Using: %s %s %s %s\n",force->name(),neighbor->name(),comm->name(),binning->name()); } @@ -160,7 +157,7 @@ void ExaMiniMD::init(int argc, char* argv[]) { } else { printf("\n"); printf("Step Temp E_pair TotEng CPU\n"); - printf(" %i %lf %lf %lf %lf\n",step,T,PE,PE+KE,0.0); + printf("%i %lf %lf %lf %lf\n",step,T,PE,PE+KE,0.0); } } } @@ -170,7 +167,6 @@ void ExaMiniMD::init(int argc, char* argv[]) { if(input->correctnessflag) check_correctness(step); - } void ExaMiniMD::run(int nsteps) { @@ -190,7 +186,6 @@ void ExaMiniMD::run(int nsteps) { // Timestep Loop for(int step = 1; step <= nsteps; step++ ) { - // Do first part of the verlet time step integration other_timer.reset(); integrator->initial_integrate(); @@ -221,11 +216,12 @@ void ExaMiniMD::run(int nsteps) { neighbor->create_neigh_list(system,binning,force->half_neigh,false); neigh_time += neigh_timer.seconds(); } else { - // Exchange Halo + // Exchange Halo data comm_timer.reset(); comm->update_halo(); comm_time += comm_timer.seconds(); } + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); // Zero out forces force_timer.reset(); @@ -260,7 +256,7 @@ void ExaMiniMD::run(int nsteps) { last_time = time; } else { double time = timer.seconds(); - printf(" %i %lf %lf %lf %lf\n",step, T, PE, PE+KE, timer.seconds()); + printf("%i %lf %lf %lf %lf\n",step, T, PE, PE+KE, timer.seconds()); last_time = time; } } diff --git a/src/force_types/CMakeLists.txt b/src/force_types/CMakeLists.txt index 9f43539..9c06a2c 100644 --- a/src/force_types/CMakeLists.txt +++ b/src/force_types/CMakeLists.txt @@ -1,11 +1,25 @@ FILE(GLOB SRCS *.cpp) - -#Skip lj_ideal, snap and cell +#Skip snap and cell #TODO: SNAP is outdates and should likely be removed all together as it is list(FILTER SRCS EXCLUDE REGEX ".*lj_cell\\.cpp$") -list(FILTER SRCS EXCLUDE REGEX ".*lj_idial_neigh\\.cpp$") list(FILTER SRCS EXCLUDE REGEX ".*snap_neigh\\.cpp$") -target_sources(ExaMiniMD PRIVATE ${SRCS}) +# Skip force-type module if Kokkos Remote Spaces is not enabled +if (ENABLE_KOKKOS_REMOTE_SPACES) + message(STATUS "Building with support for force_lj_neigh_distrib") + list(FILTER SRCS EXCLUDE REGEX ".*lj_neigh\\.cpp$") + target_compile_definitions(ExaMiniMD PRIVATE EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_SCALAR) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_HALO_LOCAL) + #target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_LOCAL_GLOBAL) + target_compile_definitions(ExaMiniMD PRIVATE SHMEMTESTS_USE_GLOBAL) +else() + #Otherwise exclude + list(FILTER SRCS EXCLUDE REGEX ".*distrib\\.cpp$") +endif() + + +target_sources(ExaMiniMD PRIVATE ${SRCS}) diff --git a/src/force_types/force_lj_neigh.cpp b/src/force_types/force_lj_neigh.cpp index 565964f..9f297ff 100644 --- a/src/force_types/force_lj_neigh.cpp +++ b/src/force_types/force_lj_neigh.cpp @@ -37,7 +37,6 @@ //************************************************************************ #include - #define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh #define FORCE_MODULES_TEMPLATE #include diff --git a/src/force_types/force_lj_neigh.h b/src/force_types/force_lj_neigh.h index 785a377..5737421 100644 --- a/src/force_types/force_lj_neigh.h +++ b/src/force_types/force_lj_neigh.h @@ -67,27 +67,16 @@ class ForceLJNeigh: public Force { private: int N_local,ntypes; t_x_const_rnd x; - - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - t_x_shmem x_shmem; - t_x_shmem_local x_shmem_local; - #endif t_f f; t_f_atomic f_a; t_id id; - t_index global_index; t_type_const_rnd type; - - T_X_FLOAT domain_x, domain_y, domain_z; - int proc_rank; - Binning::t_bincount bin_count; Binning::t_binoffsets bin_offsets; T_INT nbinx,nbiny,nbinz,nhalo; int step; bool use_stackparams; - typedef Kokkos::View t_fparams; typedef Kokkos::View> t_fparams_rnd; @@ -126,8 +115,6 @@ class ForceLJNeigh: public Force { typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe_stackparams; typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe_stackparams; - struct TagCopyLocalXShmem {}; - ForceLJNeigh (char** args, System* system, bool half_neigh_); void init_coeff(int nargs, char** args); @@ -151,9 +138,6 @@ class ForceLJNeigh: public Force { KOKKOS_INLINE_FUNCTION void operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagCopyLocalXShmem, const T_INT& i) const; - const char* name(); }; diff --git a/src/force_types/force_lj_neigh_distrib.cpp b/src/force_types/force_lj_neigh_distrib.cpp new file mode 100644 index 0000000..c286e43 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib.cpp @@ -0,0 +1,43 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh +#define FORCE_MODULES_TEMPLATE +#include +#undef FORCE_MODULES_TEMPLATE diff --git a/src/force_types/force_lj_neigh_distrib.h b/src/force_types/force_lj_neigh_distrib.h new file mode 100644 index 0000000..d2e8a11 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib.h @@ -0,0 +1,164 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#ifdef MODULES_OPTION_CHECK + if( (strcmp(argv[i+1], "NEIGH_FULL") == 0) ) + force_iteration_type = FORCE_ITER_NEIGH_FULL; + if( (strcmp(argv[i+1], "NEIGH_HALF") == 0) ) { + force_iteration_type = FORCE_ITER_NEIGH_HALF; + } +#endif +#ifdef FORCE_MODULES_INSTANTIATION + else if (input->force_type == FORCE_LJ) { + bool half_neigh = input->force_iteration_type == FORCE_ITER_NEIGH_HALF; + switch ( input->neighbor_type ) { + #define FORCETYPE_ALLOCATION_MACRO(NeighType) ForceLJNeigh(input->input_data.words[input->force_line],system,half_neigh) + #include + #undef FORCETYPE_ALLOCATION_MACRO + } + } +#endif + +#if !defined(MODULES_OPTION_CHECK) && \ + !defined(FORCE_MODULES_INSTANTIATION) + +#ifndef FORCE_LJ_NEIGH_H +#define FORCE_LJ_NEIGH_H +#include + +template +class ForceLJNeigh: public Force { +private: + int N_local,ntypes; + t_x_const_rnd x; + t_x_shmem x_shmem; + t_x_shmem_local x_shmem_local; + t_index global_index; + t_f f; + t_f_atomic f_a; + t_id id; + t_type_const_rnd type; + + T_X_FLOAT domain_x, domain_y, domain_z; + int proc_rank; + + Binning::t_bincount bin_count; + Binning::t_binoffsets bin_offsets; + T_INT nbinx,nbiny,nbinz,nhalo; + int step; + bool use_stackparams; + + + typedef Kokkos::View t_fparams; + typedef Kokkos::View> t_fparams_rnd; + t_fparams lj1,lj2,cutsq; + t_fparams_rnd rnd_lj1,rnd_lj2,rnd_cutsq; + + T_F_FLOAT stack_lj1[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; // hardwired space for 12 atom types + T_F_FLOAT stack_lj2[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + T_F_FLOAT stack_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typedef typename NeighborClass::t_neigh_list t_neigh_list; + t_neigh_list neigh_list; + +public: + typedef T_V_FLOAT value_type; + + template + struct TagFullNeigh {}; + + template + struct TagHalfNeigh {}; + + template + struct TagFullNeighPE {}; + + template + struct TagHalfNeighPE {}; + + struct TagCopyLocalXShmem {}; + + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe; + + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_full_neigh_pe_stackparams; + typedef Kokkos::RangePolicy,Kokkos::IndexType > t_policy_half_neigh_pe_stackparams; + + typedef Kokkos::RangePolicy > t_policy_compute_fill_xshmem; + + ForceLJNeigh (char** args, System* system, bool half_neigh_); + + void init_coeff(int nargs, char** args); + + void compute(System* system, Binning* binning, Neighbor* neighbor ); + T_F_FLOAT compute_energy(System* system, Binning* binning, Neighbor* neighbor); + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagFullNeigh, const T_INT& i) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagHalfNeigh, const T_INT& i) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagFullNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagCopyLocalXShmem, const T_INT& i) const; + + const char* name(); +}; + +#define FORCE_MODULES_EXTERNAL_TEMPLATE +#define FORCETYPE_DECLARE_TEMPLATE_MACRO(NeighType) ForceLJNeigh +#include +#undef FORCETYPE_DECLARE_TEMPLATE_MACRO +#undef FORCE_MODULES_EXTERNAL_TEMPLATE +#endif +#endif diff --git a/src/force_types/force_lj_neigh_distrib_impl.h b/src/force_types/force_lj_neigh_distrib_impl.h new file mode 100644 index 0000000..ede53a7 --- /dev/null +++ b/src/force_types/force_lj_neigh_distrib_impl.h @@ -0,0 +1,430 @@ +//************************************************************************ +// ExaMiniMD v. 1.0 +// Copyright (2018) National Technology & Engineering Solutions of Sandia, +// LLC (NTESS). +// +// Under the terms of Contract DE-NA-0003525 with NTESS, the U.S. Government +// retains certain rights in this software. +// +// ExaMiniMD is licensed under 3-clause BSD terms of use: Redistribution and +// use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL NTESS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +//************************************************************************ + +#include +#include + +template +ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half_neigh_):Force(args,system,half_neigh_) { + ntypes = system->ntypes; + use_stackparams = (ntypes <= MAX_TYPES_STACKPARAMS); + if (!use_stackparams) { + lj1 = t_fparams("ForceLJNeigh::lj1",ntypes,ntypes); + lj2 = t_fparams("ForceLJNeigh::lj2",ntypes,ntypes); + cutsq = t_fparams("ForceLJNeigh::cutsq",ntypes,ntypes); + } + nbinx = nbiny = nbinz = 0; + N_local = 0; + nhalo = 0; + step = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + +} + +template +void ForceLJNeigh::init_coeff(int nargs, char** args) { + step = 0; + + int one_based_type = 1; + int t1 = atoi(args[1])-one_based_type; + int t2 = atoi(args[2])-one_based_type; + double eps = atof(args[3]); + double sigma = atof(args[4]); + double cut = atof(args[5]); + + if (use_stackparams) { + for (int i = 0; i < ntypes; i++) { + for (int j = 0; j < ntypes; j++) { + stack_lj1[i][j] = 48.0 * eps * pow(sigma,12.0); + stack_lj2[i][j] = 24.0 * eps * pow(sigma,6.0); + stack_cutsq[i][j] = cut*cut; + } + } + } else { + t_fparams::HostMirror h_lj1 = Kokkos::create_mirror_view(lj1); + t_fparams::HostMirror h_lj2 = Kokkos::create_mirror_view(lj2); + t_fparams::HostMirror h_cutsq = Kokkos::create_mirror_view(cutsq); + Kokkos::deep_copy(h_lj1,lj1); + Kokkos::deep_copy(h_lj2,lj2); + Kokkos::deep_copy(h_cutsq,cutsq); + + h_lj1(t1,t2) = 48.0 * eps * pow(sigma,12.0); + h_lj2(t1,t2) = 24.0 * eps * pow(sigma,6.0); + h_lj1(t2,t1) = h_lj1(t1,t2); + h_lj2(t2,t1) = h_lj2(t1,t2); + h_cutsq(t1,t2) = cut*cut; + h_cutsq(t2,t1) = cut*cut; + + Kokkos::deep_copy(lj1,h_lj1); + Kokkos::deep_copy(lj2,h_lj2); + Kokkos::deep_copy(cutsq,h_cutsq); + + rnd_lj1 = lj1; + rnd_lj2 = lj2; + rnd_cutsq = cutsq; + } +}; + +template +void ForceLJNeigh::compute(System* system, Binning* binning, Neighbor* neighbor_ ) { + // Set internal data handles + NeighborClass* neighbor = (NeighborClass*) neighbor_; + neigh_list = neighbor->get_neigh_list(); + N_local = system->N_local; + x = system->x; + x_shmem = system->x_shmem; + x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); + f = system->f; + f_a = system->f; + type = system->type; + id = system->id; + global_index = system->global_index; + + domain_x = system->domain_x; + domain_y = system->domain_y; + domain_z = system->domain_z; + + #ifdef SHMEMTESTS_USE_HALO + #else + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", t_policy_compute_fill_xshmem(0,system->N_local), *this); + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace().fence(); + #endif + + if (use_stackparams) { + if(half_neigh) + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); + else + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh_stackparams(0, system->N_local), *this); + } else { + if(half_neigh) + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh(0, system->N_local), *this); + else + Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); + } + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + step++; +} + +template +T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* binning, Neighbor* neighbor_ ) { + // Set internal data handles + NeighborClass* neighbor = (NeighborClass*) neighbor_; + neigh_list = neighbor->get_neigh_list(); + MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); + N_local = system->N_local; + x = system->x; + f = system->f; + f_a = system->f; + type = system->type; + id = system->id; + T_V_FLOAT energy; + + if (use_stackparams) { + if(half_neigh) + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_half_neigh_pe_stackparams(0, system->N_local), *this, energy); + else + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_full_neigh_pe_stackparams(0, system->N_local), *this, energy); + } else { + if(half_neigh) + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_half_neigh_pe(0, system->N_local), *this, energy); + else + Kokkos::parallel_reduce("ForceLJNeigh::compute_energy", t_policy_full_neigh_pe(0, system->N_local), *this, energy); + } + Kokkos::fence(); + Kokkos::Experimental::DefaultRemoteMemorySpace::fence(); + + step++; + return energy; +} + +template +const char* ForceLJNeigh::name() { return half_neigh?"ForceLJNeighHalf":"ForceLJNeighFull"; } + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagFullNeigh, const T_INT& i) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + const int num_neighs = neighs_i.get_num_neighs(); + + T_F_FLOAT fxi = 0.0; + T_F_FLOAT fyi = 0.0; + T_F_FLOAT fzi = 0.0; + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_INDEX jg = global_index(j); + #ifdef SHMEMTESTS_USE_SCALAR + #ifdef SHMEMTESTS_USE_HALO + const T_X_FLOAT xj_shmem = x(j,0); + const T_X_FLOAT yj_shmem = x(j,1); + const T_X_FLOAT zj_shmem = x(j,2); + #endif + #ifdef SHMEMTESTS_USE_HALO_LOCAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL + const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #ifdef SHMEMTESTS_USE_GLOBAL + const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); + const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); + const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); + #endif + #else + #ifdef SHMEMTESTS_USE_GLOBAL + const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); + const T_X_FLOAT xj_shmem = posj_shmem.x; + const T_X_FLOAT yj_shmem = posj_shmem.y; + const T_X_FLOAT zj_shmem = posj_shmem.z; + #else + #error "Unknown configuration" + #endif + #endif + + #ifdef SHMEMTESTS_USE_HALO + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + #else + T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? + (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) + :x_i-xj_shmem; + T_F_FLOAT dy = abs(y_i - yj_shmem)>domain_y/2? + (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) + :y_i-yj_shmem; + T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? + (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) + :z_i-zj_shmem; + #endif + +// if((abs(dx_shmem-dx)>1e-10) || (abs(dy_shmem-dy)>1e-10) || (abs(dz_shmem-dz)>1e-10)) +// printf("Neigh: %i %i %li %li %i : %lf %lf %lf %lf %lf\n",i,j,global_index(i),global_index(j),j>N_local?1:0,x(j,0),xj_shmem,domain_x,dx,dx_shmem); + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fpair = (r6inv * (lj1_ij*r6inv - lj2_ij)) * r2inv; + fxi += dx*fpair; + fyi += dy*fpair; + fzi += dz*fpair; + } + + printf("DATA: %i %i %i | %lf %lf %lf | %lf %lf %lf | %i %li %i | %f %f %f\n", + id(i),jj,(int)jg, + x_i, y_i, z_i, + xj_shmem,yj_shmem,zj_shmem, + (int)jg/N_MAX_MASK,N_MAX_MASK,int(jg%N_MAX_MASK), + fxi, fyi, fzi); + } + + f(i,0) += fxi; + f(i,1) += fyi; + f(i,2) += fzi; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagHalfNeigh, const T_INT& i) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + T_F_FLOAT fxi = 0.0; + T_F_FLOAT fyi = 0.0; + T_F_FLOAT fzi = 0.0; + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fpair = (r6inv * (lj1_ij*r6inv - lj2_ij)) * r2inv; + fxi += dx*fpair; + fyi += dy*fpair; + fzi += dz*fpair; + f_a(j,0) -= dx*fpair; + f_a(j,1) -= dy*fpair; + f_a(j,2) -= dz*fpair; + } + } + f_a(i,0) += fxi; + f_a(i,1) += fyi; + f_a(i,2) += fzi; + +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagFullNeighPE, const T_INT& i, T_V_FLOAT& PE) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + const bool shift_flag = true; + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + PE += 0.5*r6inv * (0.5*lj1_ij*r6inv - lj2_ij) / 6.0; // optimize later + + if (shift_flag) { + T_F_FLOAT r2invc = 1.0/cutsq_ij; + T_F_FLOAT r6invc = r2invc*r2invc*r2invc; + PE -= 0.5*r6invc * (0.5*lj1_ij*r6invc - lj2_ij) / 6.0; // optimize later + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagHalfNeighPE, const T_INT& i, T_V_FLOAT& PE) const { + const T_F_FLOAT x_i = x(i,0); + const T_F_FLOAT y_i = x(i,1); + const T_F_FLOAT z_i = x(i,2); + const int type_i = type(i); + const bool shift_flag = true; + + typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i); + + const int num_neighs = neighs_i.get_num_neighs(); + + for(int jj = 0; jj < num_neighs; jj++) { + T_INT j = neighs_i(jj); + const T_F_FLOAT dx = x_i - x(j,0); + const T_F_FLOAT dy = y_i - x(j,1); + const T_F_FLOAT dz = z_i - x(j,2); + + const int type_j = type(j); + const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + + const T_F_FLOAT cutsq_ij = STACKPARAMS?stack_cutsq[type_i][type_j]:rnd_cutsq(type_i,type_j); + + if( rsq < cutsq_ij ) { + const T_F_FLOAT lj1_ij = STACKPARAMS?stack_lj1[type_i][type_j]:rnd_lj1(type_i,type_j); + const T_F_FLOAT lj2_ij = STACKPARAMS?stack_lj2[type_i][type_j]:rnd_lj2(type_i,type_j); + + T_F_FLOAT r2inv = 1.0/rsq; + T_F_FLOAT r6inv = r2inv*r2inv*r2inv; + T_F_FLOAT fac; + if(j +KOKKOS_INLINE_FUNCTION +void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { + printf("CopyLocal: %i %lf %lf %lf\n",i,x(i,0),x(i,1),x(i,2)); + #ifdef SHMEMTESTS_USE_SCALAR + x_shmem_local(i,0) = x(i,0); + x_shmem_local(i,1) = x(i,1); + x_shmem_local(i,2) = x(i,2); + #else + double3 pos = {x(i,0),x(i,1),x(i,2)}; + x_shmem_local(i) = pos; + #endif +} + diff --git a/src/force_types/force_lj_neigh_impl.h b/src/force_types/force_lj_neigh_impl.h index 4740dc1..19e3fbf 100644 --- a/src/force_types/force_lj_neigh_impl.h +++ b/src/force_types/force_lj_neigh_impl.h @@ -38,10 +38,6 @@ #include -#ifdef EXAMINIMD_ENABLE_MPI -#include -#endif - template ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half_neigh_):Force(args,system,half_neigh_) { ntypes = system->ntypes; @@ -55,11 +51,6 @@ ForceLJNeigh::ForceLJNeigh(char** args, System* system, bool half N_local = 0; nhalo = 0; step = 0; - #if defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) || defined (EXAMINIMD_ENABLE_MPI) - MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); - #else - proc_rank=0; - #endif } template @@ -88,7 +79,6 @@ void ForceLJNeigh::init_coeff(int nargs, char** args) { Kokkos::deep_copy(h_lj1,lj1); Kokkos::deep_copy(h_lj2,lj2); Kokkos::deep_copy(h_cutsq,cutsq); - h_lj1(t1,t2) = 48.0 * eps * pow(sigma,12.0); h_lj2(t1,t2) = 24.0 * eps * pow(sigma,6.0); h_lj1(t2,t1) = h_lj1(t1,t2); @@ -111,30 +101,13 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); - + N_local = system->N_local; x = system->x; - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - x_shmem = system->x_shmem; - x_shmem_local = t_x_shmem_local(x_shmem.data(),x_shmem.extent(1)); - #endif - f = system->f; f_a = system->f; type = system->type; id = system->id; - global_index = system->global_index; - - domain_x = system->domain_x; - domain_y = system->domain_y; - domain_z = system->domain_z; - - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; - Kokkos::parallel_for("ForceLJNeigh::compute_fill_xshmem", Kokkos::RangePolicy(0,system->N_local), *this); - Kokkos::fence(); - Kokkos::Experimental::DefaultRemoteMemorySpace().fence();; - #endif if (use_stackparams) { if(half_neigh) Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_half_neigh_stackparams(0, system->N_local), *this); @@ -147,10 +120,7 @@ void ForceLJNeigh::compute(System* system, Binning* binning, Neig Kokkos::parallel_for("ForceLJNeigh::compute", t_policy_full_neigh(0, system->N_local), *this); } Kokkos::fence(); - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - Kokkos::Experimental::DefaultRemoteMemorySpace::fence();; - #endif - //x_shmem = t_x_shmem(); + step++; } @@ -159,9 +129,7 @@ T_V_FLOAT ForceLJNeigh::compute_energy(System* system, Binning* b // Set internal data handles NeighborClass* neighbor = (NeighborClass*) neighbor_; neigh_list = neighbor->get_neigh_list(); - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); - #endif + N_local = system->N_local; x = system->x; f = system->f; @@ -208,59 +176,10 @@ void ForceLJNeigh::operator() (TagFullNeigh, const T for(int jj = 0; jj < num_neighs; jj++) { T_INT j = neighs_i(jj); - //printf("Neigh: %i %i %li %li %i\n",i,j,global_index(i),global_index(j),j>N_local?1:0); - - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - - const T_INDEX jg = global_index(j); - #ifdef SHMEMTESTS_USE_SCALAR - #ifdef SHMEMTESTS_USE_HALO - const T_X_FLOAT xj_shmem = x(j,0);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = x(j,1);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = x(j,2);//x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #ifdef SHMEMTESTS_USE_HALO_LOCAL - const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x(j,0):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x(j,1):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x(j,2):x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #ifdef SHMEMTESTS_USE_LOCAL_GLOBAL - const T_X_FLOAT xj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+0]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+1]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = jg/N_MAX_MASK==proc_rank?x_shmem.data()[j*3+2]:x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #ifdef SHMEMTESTS_USE_GLOBAL - const T_X_FLOAT xj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,0); - const T_X_FLOAT yj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,1); - const T_X_FLOAT zj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK,2); - #endif - #else - #ifdef SHMEMTESTS_USE_GLOBAL - const double3 posj_shmem = x_shmem(jg/N_MAX_MASK,jg%N_MAX_MASK); - const T_X_FLOAT xj_shmem = posj_shmem.x; - const T_X_FLOAT yj_shmem = posj_shmem.y; - const T_X_FLOAT zj_shmem = posj_shmem.z; - #endif - #endif - - //printf("DATA: %i %i %i %lf %lf %lf %i %li %i\n",id(i),jj,(int)jg,xj_shmem,yj_shmem,zj_shmem,(int)jg/N_MAX_MASK,N_MAX_MASK,int(jg%N_MAX_MASK)); - T_F_FLOAT dx = abs(x_i - xj_shmem)>domain_x/2? - (x_i-xj_shmem<0?x_i-xj_shmem+domain_x:x_i-xj_shmem-domain_x) - :x_i-xj_shmem; - T_F_FLOAT dy = abs(y_i - yj_shmem)>domain_y/2? - (y_i-yj_shmem<0?y_i-yj_shmem+domain_y:y_i-yj_shmem-domain_y) - :y_i-yj_shmem; - T_F_FLOAT dz = abs(z_i - zj_shmem)>domain_z/2? - (z_i-zj_shmem<0?z_i-zj_shmem+domain_z:z_i-zj_shmem-domain_z) - :z_i-zj_shmem; - #else //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES const T_F_FLOAT dx = x_i - x(j,0); const T_F_FLOAT dy = y_i - x(j,1); const T_F_FLOAT dz = z_i - x(j,2); - #endif -// if((abs(dx_shmem-dx)>1e-10) || (abs(dy_shmem-dy)>1e-10) || (abs(dz_shmem-dz)>1e-10)) -// printf("Neigh: %i %i %li %li %i : %lf %lf %lf %lf %lf\n",i,j,global_index(i),global_index(j),j>N_local?1:0,x(j,0),xj_shmem,domain_x,dx,dx_shmem); const int type_j = type(j); const T_F_FLOAT rsq = dx*dx + dy*dy + dz*dz; @@ -421,20 +340,3 @@ void ForceLJNeigh::operator() (TagHalfNeighPE, const } } - -template -KOKKOS_INLINE_FUNCTION -void ForceLJNeigh::operator() (TagCopyLocalXShmem, const T_INT& i) const { - //printf("CopyLocal: %i %lf %lf %lf\n",i,x(i,0),x(i,1),x(i,2)); - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - #ifdef SHMEMTESTS_USE_SCALAR - x_shmem_local(i,0) = x(i,0); - x_shmem_local(i,1) = x(i,1); - x_shmem_local(i,2) = x(i,2); - #else - double3 pos = {x(i,0),x(i,1),x(i,2)}; - x_shmem_local(i) = pos; - #endif - #endif //EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES -} - diff --git a/src/force_types/force_snap_neigh_impl.h b/src/force_types/force_snap_neigh_impl.h index c1faa3a..3f8abe5 100644 --- a/src/force_types/force_snap_neigh_impl.h +++ b/src/force_types/force_snap_neigh_impl.h @@ -203,7 +203,7 @@ void ForceSNAP::compute(System* system, Binning* binning, Neighbo //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); int vector_length = 8; int team_size_max = Kokkos::TeamPolicy<>(nlocal,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU int team_size = 20;//max_neighs; if(team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; diff --git a/src/input.cpp b/src/input.cpp index 336cad7..799e04a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -373,8 +373,7 @@ void Input::check_lammps_command(int line) { force_type = FORCE_LJ; force_cutoff = atof(input_data.words[line][2]); force_line = line; - } - if(strcmp(input_data.words[line][1],"snap")==0) { + }else if(strcmp(input_data.words[line][1],"snap")==0) { known = true; force_type = FORCE_SNAP; force_cutoff = 4.73442;// atof(input_data.words[line][2]); @@ -505,6 +504,7 @@ void Input::create_lattice(Comm* comm) { } } } + system->N_local = n; system->N = n; int global_n_max = n; @@ -522,40 +522,8 @@ void Input::create_lattice(Comm* comm) { // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { - T_FLOAT ztmp = lattice_constant * (iz+lattice_offset_z); - for(T_INT iy=iy_start; iy<=iy_end; iy++) { - T_FLOAT ytmp = lattice_constant * (iy+lattice_offset_y); - for(T_INT ix=ix_start; ix<=ix_end; ix++) { - T_FLOAT xtmp = lattice_constant * (ix+lattice_offset_x); - if((xtmp >= s.sub_domain_lo_x) && - (ytmp >= s.sub_domain_lo_y) && - (ztmp >= s.sub_domain_lo_z) && - (xtmp < s.sub_domain_hi_x) && - (ytmp < s.sub_domain_hi_y) && - (ztmp < s.sub_domain_hi_z) ) { - n++; - } - } - } - } - global_n_max = n; - comm->reduce_max_int(&global_n_max,1); - system->grow(global_n_max); - System s = *system; - h_x = Kokkos::create_mirror_view(s.x); - h_v = Kokkos::create_mirror_view(s.v); - h_q = Kokkos::create_mirror_view(s.q); - h_type = Kokkos::create_mirror_view(s.type); - h_id = Kokkos::create_mirror_view(s.id); - n = 0; - // Initialize system using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { T_FLOAT ztmp = lattice_constant * (iz+lattice_offset_z); for(T_INT iy=iy_start; iy<=iy_end; iy++) { @@ -658,42 +626,8 @@ void Input::create_lattice(Comm* comm) { // zero out momentum of the whole system afterwards, to eliminate // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { - for(T_INT iy=iy_start; iy<=iy_end; iy++) { - for(T_INT ix=ix_start; ix<=ix_end; ix++) { - for(int k = 0; k<4; k++) { - T_FLOAT xtmp = lattice_constant * (1.0*ix+basis[k][0]); - T_FLOAT ytmp = lattice_constant * (1.0*iy+basis[k][1]); - T_FLOAT ztmp = lattice_constant * (1.0*iz+basis[k][2]); - if((xtmp >= s.sub_domain_lo_x) && - (ytmp >= s.sub_domain_lo_y) && - (ztmp >= s.sub_domain_lo_z) && - (xtmp < s.sub_domain_hi_x) && - (ytmp < s.sub_domain_hi_y) && - (ztmp < s.sub_domain_hi_z) ) { - n++; - } - } - } - } - } - global_n_max = n; - comm->reduce_max_int(&global_n_max,1); - system->grow(global_n_max); - System s = *system; - h_x = Kokkos::create_mirror_view(s.x); - h_v = Kokkos::create_mirror_view(s.v); - h_q = Kokkos::create_mirror_view(s.q); - h_type = Kokkos::create_mirror_view(s.type); - h_id = Kokkos::create_mirror_view(s.id); - n = 0; - // Initialize system using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - for(T_INT iz=iz_start; iz<=iz_end; iz++) { for(T_INT iy=iy_start; iy<=iy_end; iy++) { for(T_INT ix=ix_start; ix<=ix_end; ix++) { @@ -730,12 +664,8 @@ void Input::create_lattice(Comm* comm) { if(system->do_print) printf("Atoms: %i %i\n",system->N,system->N_local); } - // Initialize velocity using the equivalent of the LAMMPS - // velocity geom option, i.e. uniform random kinetic energies. - // zero out momentum of the whole system afterwards, to eliminate - // drift (bad for energy statistics) - { // Scope s + { //Scope System s = *system; T_FLOAT total_mass = 0.0; T_FLOAT total_momentum_x = 0.0; diff --git a/src/main.cpp b/src/main.cpp index 9efcf4c..b879154 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -59,7 +59,6 @@ int main(int argc, char* argv[]) { ExaMiniMD examinimd; examinimd.init(argc,argv); examinimd.run(examinimd.input->nsteps); - // examinimd.check_correctness(); examinimd.print_performance(); examinimd.shutdown(); diff --git a/src/modules_force.h b/src/modules_force.h index 526d89e..e780654 100644 --- a/src/modules_force.h +++ b/src/modules_force.h @@ -38,10 +38,15 @@ // Include Module header files for force #include +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES +#include +#else #include +#endif +#include +#include -//#include -//#include +// SNAP is outdated and likely subject to removal //#ifndef KOKKOS_ENABLE_OPENMPTARGET //#include //#endif diff --git a/src/neighbor_types/neighbor_2d.cpp b/src/neighbor_types/neighbor_2d.cpp index d0b8364..8a54522 100644 --- a/src/neighbor_types/neighbor_2d.cpp +++ b/src/neighbor_types/neighbor_2d.cpp @@ -38,7 +38,7 @@ #include -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU template struct Neighbor2D; #endif template struct Neighbor2D; diff --git a/src/neighbor_types/neighbor_csr.cpp b/src/neighbor_types/neighbor_csr.cpp index 429d657..cc5f09a 100644 --- a/src/neighbor_types/neighbor_csr.cpp +++ b/src/neighbor_types/neighbor_csr.cpp @@ -38,7 +38,7 @@ #include -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU template struct NeighborCSR; #endif template struct NeighborCSR; diff --git a/src/neighbor_types/neighbor_csr.h b/src/neighbor_types/neighbor_csr.h index df27ac0..68593c1 100644 --- a/src/neighbor_types/neighbor_csr.h +++ b/src/neighbor_types/neighbor_csr.h @@ -427,7 +427,7 @@ class NeighborCSR: public Neighbor { // Create actual CSR NeighList neigh_list = t_neigh_list( - Kokkos::View( neighs, Kokkos::pair(0,total_num_neighs)), + Kokkos::View( neighs, Kokkos::pair(0,total_num_neighs)), Kokkos::View( neigh_offsets, Kokkos::pair(0,N_local+1))); } diff --git a/src/neighbor_types/neighbor_csr_map_constr.cpp b/src/neighbor_types/neighbor_csr_map_constr.cpp index f79435b..adf9a5e 100644 --- a/src/neighbor_types/neighbor_csr_map_constr.cpp +++ b/src/neighbor_types/neighbor_csr_map_constr.cpp @@ -38,7 +38,7 @@ #include -#ifdef EMD_ENABLE_GPU +#ifdef EXAMINIMD_HAS_GPU template struct NeighborCSRMapConstr; #endif template struct NeighborCSRMapConstr; diff --git a/src/system.cpp b/src/system.cpp index 744c53c..c0d7590 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -52,7 +52,9 @@ System::System() { v = t_v(); f = t_f(); id = t_id(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index = t_index(); + #endif type = t_type(); q = t_q(); mass = t_mass(); @@ -61,7 +63,7 @@ System::System() { sub_domain_hi_x = sub_domain_hi_y = sub_domain_hi_z = 0.0; sub_domain_lo_x = sub_domain_lo_y = sub_domain_lo_z = 0.0; mvv2e = boltz = dt = 0.0; -#if defined(EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) + #if defined (EXAMINIMD_ENABLE_MPI) || defined (EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES) int proc_rank; MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); do_print = proc_rank == 0; @@ -76,7 +78,9 @@ void System::init() { v = t_v("System::v",N_max); f = t_f("System::f",N_max); id = t_id("System::id",N_max); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index = t_index("System::global_index",N_max); + #endif type = t_type("System::type",N_max); q = t_q("System::q",N_max); mass = t_mass("System::mass",ntypes); @@ -91,7 +95,9 @@ void System::destroy() { v = t_v(); f = t_f(); id = t_id(); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index = t_index(); + #endif type = t_type(); q = t_q(); mass = t_mass(); @@ -105,18 +111,34 @@ void System::grow(T_INT N_new) { Kokkos::resize(v,N_max); // Velocities Kokkos::resize(f,N_max); // Forces Kokkos::resize(id,N_max); // Id + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES Kokkos::resize(global_index,N_max); // Id + #endif Kokkos::resize(type,N_max); // Particle Type Kokkos::resize(q,N_max); // Charge - #ifdef EXAMINIMD_ENABLE_USE_KOKKOS_REMOTE_SPACES + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES int num_ranks; MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); - x_shmem = t_x_shmem("X_shmem", num_ranks, N_max); + x_shmem = t_x_shmem("X_shmem", num_ranks, N_max); // Positions (distrib) #endif } } +void System::print_particles_from_device_data() +{ + printf("Print all particles (GPU): \n"); + printf(" Owned: %d\n",N_local); + Kokkos::parallel_for("print_particles_2", N_local, KOKKOS_LAMBDA(int i){ + printf(" %d %lf %lf %lf | %lf %lf %lf | %lf %lf %lf | %d %e\n",i, + double(x(i,0)),double(x(i,1)),double(x(i,2)), + double(v(i,0)),double(v(i,1)),double(v(i,2)), + double(f(i,0)),double(f(i,1)),double(f(i,2)), + type(i),q(i)); + }); + Kokkos::fence(); +} + void System::print_particles() { printf("Print all particles: \n"); printf(" Owned: %d\n",N_local); diff --git a/src/system.h b/src/system.h index 1be1da3..0dae9d4 100644 --- a/src/system.h +++ b/src/system.h @@ -67,18 +67,19 @@ class System { // Per Particle Property t_x x; // Positions + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES + t_x_shmem x_shmem; // Positions + #endif t_v v; // Velocities t_f f; // Forces t_type type; // Particle Type t_id id; // Particle ID - t_index global_index; // Index for PGAS indexing - - t_q q; // Charge - #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES - t_x_shmem x_shmem; + t_index global_index; // Index for distibuted view indexing #endif + + t_q q; // Charge // Per Type Property t_mass mass; @@ -114,7 +115,9 @@ class System { p.q = q(i); p.id = id(i); p.type = type(i); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES p.global_index = global_index(i); + #endif return p; } @@ -125,7 +128,9 @@ class System { q(i) = p.q; id(i) = p.id; type(i) = p.type; + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index(i) = p.global_index; + #endif } KOKKOS_INLINE_FUNCTION @@ -139,7 +144,9 @@ class System { type(dest) = type(src); id(dest) = id(src); q(dest) = q(src); + #ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES global_index(dest) = global_index(src); + #endif } KOKKOS_INLINE_FUNCTION @@ -150,5 +157,6 @@ class System { } void print_particles(); + void print_particles_from_device_data(); }; #endif diff --git a/src/types.h b/src/types.h index b0b17a4..e476863 100644 --- a/src/types.h +++ b/src/types.h @@ -65,7 +65,9 @@ enum {NEIGH_NONE, NEIGH_CSR, NEIGH_CSR_MAPCONSTR, NEIGH_2D}; // Input File Type enum {INPUT_LAMMPS}; +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES enum INDEX_TYPE: int64_t { N_MAX_MASK = 1024*1024*1024 }; +#endif // Macros to work around the fact that std::max/min is not available on GPUs #define MAX(a,b) (a>b?a:b) @@ -126,8 +128,10 @@ typedef Kokkos::View> t_type_const_rnd; // Type (int is enough as type) typedef Kokkos::View t_id; // ID typedef Kokkos::View t_id_const; // ID +#ifdef EXAMINIMD_ENABLE_KOKKOS_REMOTE_SPACES typedef Kokkos::View t_index; // ID typedef Kokkos::View t_index_const; // ID +#endif typedef Kokkos::View t_q; // Charge typedef Kokkos::View t_q_const; // Charge @@ -202,7 +206,7 @@ t_scalar3 operator * } #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_SYCL) -#define EMD_ENABLE_GPU +#define EXAMINIMD_HAS_GPU #endif #endif