Skip to content

Commit

Permalink
Merge branch 'cl2hpp-default' into 'master'
Browse files Browse the repository at this point in the history
cl2.hpp compatibility for all benchmarks

See merge request pc2/HPCC_FPGA!58
  • Loading branch information
Mellich committed Oct 8, 2021
2 parents 48e0386 + c993b1a commit b678d6d
Show file tree
Hide file tree
Showing 13 changed files with 43 additions and 51 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ default:
tags:
- jacamar
before_script:
- module load intelFPGA_pro/20.4.0 bittware_520n/20.4.0_max intel devel/CMake/3.15.3-GCCcore-8.3.0
- module load intelFPGA_pro/21.1.0 bittware_520n/20.4.0_max intel devel/CMake/3.15.3-GCCcore-8.3.0

###
#
Expand Down
1 change: 0 additions & 1 deletion FFT/src/host/execution.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ SOFTWARE.
#include <vector>

/* External library headers */
#include "CL/cl.hpp"
#include "parameters.h"
#include "fft_benchmark.hpp"

Expand Down
6 changes: 3 additions & 3 deletions FFT/src/host/execution_default.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ namespace bm_execution {
for (uint r =0; r < config.programSettings->numRepetitions; r++) {
auto startCalculation = std::chrono::high_resolution_clock::now();
for (int r=0; r < config.programSettings->kernelReplications; r++) {
fetchQueues[r].enqueueTask(fetchKernels[r]);
fftQueues[r].enqueueTask(fftKernels[r]);
fetchQueues[r].enqueueNDRangeKernel(fetchKernels[r], cl::NullRange, cl::NDRange(1), cl::NDRange(1));
fftQueues[r].enqueueNDRangeKernel(fftKernels[r], cl::NullRange, cl::NDRange(1), cl::NDRange(1));
#ifdef XILINX_FPGA
storeQueues[r].enqueueTask(storeKernels[r]);
storeQueues[r].enqueueNDRangeKernel(storeKernels[r], cl::NullRange, cl::NDRange(1), cl::NDRange(1));
#endif
}
for (int r=0; r < config.programSettings->kernelReplications; r++) {
Expand Down
40 changes: 20 additions & 20 deletions LINPACK/src/host/execution_types/execution_iec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
err =kernels.back().back().setArg(3, config.programSettings->matrixSize / config.programSettings->blockSize);
ASSERT_CL(err)
all_events.back().emplace_back();
err = lu_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &all_events.back().back());
err = lu_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &all_events.back().back());
ASSERT_CL(err)


Expand Down Expand Up @@ -216,11 +216,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co

if (tops + 1 == (config.programSettings->matrixSize / config.programSettings->blockSize)) {
all_events.back().emplace_back();
err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
ASSERT_CL(err)
}
else {
err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
ASSERT_CL(err)
}

Expand Down Expand Up @@ -253,11 +253,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co

if (tops + 1 == (config.programSettings->matrixSize / config.programSettings->blockSize)) {
all_events.back().emplace_back();
err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
ASSERT_CL(err)
}
else {
err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
ASSERT_CL(err)
}
network_layer_op_flags[0] |= LEFT_BLOCK;
Expand Down Expand Up @@ -325,7 +325,7 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
err = kernels.back().back().setArg(1, network_forward_flags);
ASSERT_CL(err)

err = network_queues_bottomright.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
err = network_queues_bottomright.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
ASSERT_CL(err)
}
// Create the network kernel for down -> top direction
Expand All @@ -344,11 +344,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co

if (std::distance(it,network_layer_op_flags.end()) == 1) {
all_events.back().emplace_back();
err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
ASSERT_CL(err)
}
else {
err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
ASSERT_CL(err)
}

Expand All @@ -368,11 +368,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co

if (std::distance(it,network_layer_op_flags.end()) == 1) {
all_events.back().emplace_back();
err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
ASSERT_CL(err)
}
else {
err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
ASSERT_CL(err)
}

Expand Down Expand Up @@ -418,16 +418,16 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
// this is the last taks that will be enqueued in this queue, so create an event
all_events.back().emplace_back();
// Distribute the workload over all available matrix multiplication kernels
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
//err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &communication_events, &(all_events.back().back()));
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
//err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &communication_events, &(all_events.back().back()));
}
else {
#ifndef NDEBUG
std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner L " << block_row << "," << block_col << std::endl;
#endif
// Distribute the workload over all available matrix multiplication kernels
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
//err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &communication_events);
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
//err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &communication_events);
}
current_update++;
current_replication = (current_replication + 1) % config.programSettings->kernelReplications;
Expand Down Expand Up @@ -463,14 +463,14 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
// this is the last taks that will be enqueued in this queue, so create an event
all_events.back().emplace_back();
// Distribute the workload over all available matrix multiplication kernels
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
}
else {
#ifndef NDEBUG
std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner T " << block_row << "," << block_col << std::endl;
#endif
// Distribute the workload over all available matrix multiplication kernels
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
}
ASSERT_CL(err)
current_update++;
Expand Down Expand Up @@ -518,14 +518,14 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
// this is the last taks that will be enqueued in this queue, so create an event
all_events.back().emplace_back();
// Distribute the workload over all available matrix multiplication kernels
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(std::prev(all_events.end())))), &(all_events.back().back()));
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(std::prev(all_events.end())))), &(all_events.back().back()));
}
else {
#ifndef NDEBUG
std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner " << block_row << "," << block_col << std::endl;
#endif
// Distribute the workload over all available matrix multiplication kernels
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(std::prev(all_events.end())))));
err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(std::prev(all_events.end())))));
}

ASSERT_CL(err)
Expand Down Expand Up @@ -563,13 +563,13 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
// // this is the last taks that will be enqueued in this queue, so create an event
// all_events.back().emplace_back();
// // Distribute the workload over all available matrix multiplication kernels
// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
// current_update = 0;
// current_replication++;
// }
// else {
// // Distribute the workload over all available matrix multiplication kernels
// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))));
// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))));
// current_update++;
// }
}
Expand Down
Loading

0 comments on commit b678d6d

Please sign in to comment.