Skip to content

Commit

Permalink
Merge pull request #653 from streeve/fft_benchmark
Browse files Browse the repository at this point in the history
Update Cajita benchmarks
  • Loading branch information
streeve authored Aug 18, 2023
2 parents 092aa5d + 5a15d4c commit fe7d01a
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 161 deletions.
80 changes: 49 additions & 31 deletions benchmark/cajita/Cajita_FastFourierTransformPerformance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ using namespace Cajita;
// Performance test.
template <class Device>
void performanceTest( std::ostream& stream,
std::vector<double> grid_sizes_per_dim, MPI_Comm comm,
const std::string& test_prefix )
const Cajita::DimBlockPartitioner<3> partitioner,
std::vector<double> grid_sizes_per_dim_per_rank,
MPI_Comm comm, const std::string& test_prefix )
{
using exec_space = typename Device::execution_space;
using memory_space = typename Device::memory_space;
Expand All @@ -42,35 +43,37 @@ void performanceTest( std::ostream& stream,
std::array<double, 3> global_high_corner = { 1.0, 1.0, 1.0 };
std::array<bool, 3> is_dim_periodic = { true, true, true };

int num_grid_size_per_dim = grid_sizes_per_dim.size();
int num_grid_size = grid_sizes_per_dim_per_rank.size();

// number of runs in test loops
int num_runs = 10;

// get MPI rank
int comm_rank;
MPI_Comm_rank( comm, &comm_rank );

// create timers
Cabana::Benchmark::Timer setup_timer( test_prefix + "setup",
num_grid_size_per_dim );
num_grid_size );

Cabana::Benchmark::Timer transforms_timer( test_prefix + "transforms",
num_grid_size_per_dim );
num_grid_size );
// loop over the grid sizes
for ( int p = 0; p < num_grid_size_per_dim; ++p )
for ( int p = 0; p < num_grid_size; ++p )
{
double cell_size = 1.0 / grid_sizes_per_dim[p];
auto ranks_per_dim = partitioner.ranksPerDimension( comm, { 0, 0, 0 } );

std::array<int, 3> num_cell;
for ( int d = 0; d < 3; ++d )
{
num_cell[d] = grid_sizes_per_dim_per_rank[p] * ranks_per_dim[d];
}
auto global_mesh = createUniformGlobalMesh(
global_low_corner, global_high_corner, cell_size );
global_low_corner, global_high_corner, num_cell );

// Create the global grid
DimBlockPartitioner<3> partitioner;
auto global_grid =
createGlobalGrid( comm, global_mesh, is_dim_periodic, partitioner );

// Create a local grid
auto local_grid = createLocalGrid( global_grid, 0 );
int halo_width = 0;
auto local_grid = createLocalGrid( global_grid, halo_width );
auto owned_space = local_grid->indexSpace( Own(), Cell(), Local() );
auto ghosted_space = local_grid->indexSpace( Ghost(), Cell(), Local() );

Expand Down Expand Up @@ -106,21 +109,19 @@ void performanceTest( std::ostream& stream,

setup_timer.stop( p );

transforms_timer.start( p );

// Loop over number of runs
for ( int t = 0; t < num_runs; ++t )
{
transforms_timer.start( p );
fft->forward( *lhs, Experimental::FFTScaleFull() );
fft->reverse( *lhs, Experimental::FFTScaleNone() );
transforms_timer.stop( p );
}

transforms_timer.stop( p );
}

outputResults( stream, "grid_size_per_dim", grid_sizes_per_dim, setup_timer,
comm );
outputResults( stream, "grid_size_per_dim", grid_sizes_per_dim,
outputResults( stream, "grid_size_per_dim", grid_sizes_per_dim_per_rank,
setup_timer, comm );
outputResults( stream, "grid_size_per_dim", grid_sizes_per_dim_per_rank,
transforms_timer, comm );

stream << std::flush;
Expand Down Expand Up @@ -152,10 +153,10 @@ int main( int argc, char* argv[] )

// Declare the grid size per dimension
// currently, testing 3dims+symmetric
std::vector<double> grid_sizes_per_dim = { 16, 32 };
std::vector<double> grid_sizes_per_dim_per_rank = { 16, 32 };
if ( run_type == "large" )
{
grid_sizes_per_dim = { 16, 32, 64, 128 };
grid_sizes_per_dim_per_rank = { 16, 32, 64, 128 };
}

// Get the name of the output file.
Expand All @@ -164,23 +165,38 @@ int main( int argc, char* argv[] )
// Barrier before continuing
MPI_Barrier( MPI_COMM_WORLD );

// Get comm rank;
// Get comm rank and size;
int comm_rank;
MPI_Comm_rank( MPI_COMM_WORLD, &comm_rank );
int comm_size;
MPI_Comm_size( MPI_COMM_WORLD, &comm_size );

// Get partitioner
Cajita::DimBlockPartitioner<3> partitioner;
// Get ranks per dimension
std::array<int, 3> ranks_per_dimension =
partitioner.ranksPerDimension( MPI_COMM_WORLD, { 0, 0, 0 } );

// Open the output file on rank 0.
std::fstream file;
if ( 0 == comm_rank )
file.open( filename, std::fstream::out );

// Output file header
// Output problem details.
if ( 0 == comm_rank )
{
file.open( filename + "_" + std::to_string( comm_size ),
std::fstream::out );
file << "\n";
file << "Cajita FFT Performance Benchmark"
<< "\n";
file << "----------------------------------------------"
<< "\n";
file << "MPI Ranks: " << comm_size << "\n";
file << "MPI Cartesian Dim Ranks: (" << ranks_per_dimension[0] << ", "
<< ranks_per_dimension[1] << ", " << ranks_per_dimension[2]
<< ")\n";
file << "----------------------------------------------"
<< "\n";
file << "\n";
file << std::flush;
}

Expand All @@ -194,11 +210,13 @@ int main( int argc, char* argv[] )
// Don't run twice on the CPU if only host enabled.
if ( !std::is_same<device_type, host_device_type>{} )
{
performanceTest<device_type>( file, grid_sizes_per_dim, MPI_COMM_WORLD,
"device_default_" );
performanceTest<device_type>( file, partitioner,
grid_sizes_per_dim_per_rank,
MPI_COMM_WORLD, "device_default_" );
}
performanceTest<host_device_type>( file, grid_sizes_per_dim, MPI_COMM_WORLD,
"host_default_" );
performanceTest<host_device_type>( file, partitioner,
grid_sizes_per_dim_per_rank,
MPI_COMM_WORLD, "host_default_" );

// Close the output file on rank 0.
file.close();
Expand Down
Loading

0 comments on commit fe7d01a

Please sign in to comment.