diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 461c1f9e..b4834341 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,7 +9,7 @@ default: tags: - jacamar before_script: - - module load intelFPGA_pro/20.4.0 bittware_520n/20.4.0_max intel devel/CMake/3.15.3-GCCcore-8.3.0 + - module load intelFPGA_pro/21.1.0 bittware_520n/20.4.0_max intel devel/CMake/3.15.3-GCCcore-8.3.0 ### # diff --git a/FFT/src/host/execution.h b/FFT/src/host/execution.h index 01989309..2d588ded 100644 --- a/FFT/src/host/execution.h +++ b/FFT/src/host/execution.h @@ -28,7 +28,6 @@ SOFTWARE. #include /* External library headers */ -#include "CL/cl.hpp" #include "parameters.h" #include "fft_benchmark.hpp" diff --git a/FFT/src/host/execution_default.cpp b/FFT/src/host/execution_default.cpp index 614560ae..59a81f87 100644 --- a/FFT/src/host/execution_default.cpp +++ b/FFT/src/host/execution_default.cpp @@ -176,10 +176,10 @@ namespace bm_execution { for (uint r =0; r < config.programSettings->numRepetitions; r++) { auto startCalculation = std::chrono::high_resolution_clock::now(); for (int r=0; r < config.programSettings->kernelReplications; r++) { - fetchQueues[r].enqueueTask(fetchKernels[r]); - fftQueues[r].enqueueTask(fftKernels[r]); + fetchQueues[r].enqueueNDRangeKernel(fetchKernels[r], cl::NullRange, cl::NDRange(1), cl::NDRange(1)); + fftQueues[r].enqueueNDRangeKernel(fftKernels[r], cl::NullRange, cl::NDRange(1), cl::NDRange(1)); #ifdef XILINX_FPGA - storeQueues[r].enqueueTask(storeKernels[r]); + storeQueues[r].enqueueNDRangeKernel(storeKernels[r], cl::NullRange, cl::NDRange(1), cl::NDRange(1)); #endif } for (int r=0; r < config.programSettings->kernelReplications; r++) { diff --git a/LINPACK/src/host/execution_types/execution_iec.hpp b/LINPACK/src/host/execution_types/execution_iec.hpp index ea426799..da17307f 100644 --- a/LINPACK/src/host/execution_types/execution_iec.hpp +++ b/LINPACK/src/host/execution_types/execution_iec.hpp @@ -185,7 +185,7 @@ calculate(const hpcc_base::ExecutionSettings&co err =kernels.back().back().setArg(3, config.programSettings->matrixSize / config.programSettings->blockSize); ASSERT_CL(err) all_events.back().emplace_back(); - err = lu_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &all_events.back().back()); + err = lu_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &all_events.back().back()); ASSERT_CL(err) @@ -216,11 +216,11 @@ calculate(const hpcc_base::ExecutionSettings&co if (tops + 1 == (config.programSettings->matrixSize / config.programSettings->blockSize)) { all_events.back().emplace_back(); - err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); ASSERT_CL(err) } else { - err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) } @@ -253,11 +253,11 @@ calculate(const hpcc_base::ExecutionSettings&co if (tops + 1 == (config.programSettings->matrixSize / config.programSettings->blockSize)) { all_events.back().emplace_back(); - err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); ASSERT_CL(err) } else { - err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) } network_layer_op_flags[0] |= LEFT_BLOCK; @@ -325,7 +325,7 @@ calculate(const hpcc_base::ExecutionSettings&co err = kernels.back().back().setArg(1, network_forward_flags); ASSERT_CL(err) - err = network_queues_bottomright.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = network_queues_bottomright.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) } // Create the network kernel for down -> top direction @@ -344,11 +344,11 @@ calculate(const hpcc_base::ExecutionSettings&co if (std::distance(it,network_layer_op_flags.end()) == 1) { all_events.back().emplace_back(); - err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); ASSERT_CL(err) } else { - err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = network_queues_top.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) } @@ -368,11 +368,11 @@ calculate(const hpcc_base::ExecutionSettings&co if (std::distance(it,network_layer_op_flags.end()) == 1) { all_events.back().emplace_back(); - err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); ASSERT_CL(err) } else { - err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = network_queues_left.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) } @@ -418,16 +418,16 @@ calculate(const hpcc_base::ExecutionSettings&co // this is the last taks that will be enqueued in this queue, so create an event all_events.back().emplace_back(); // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); - //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &communication_events, &(all_events.back().back())); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &communication_events, &(all_events.back().back())); } else { #ifndef NDEBUG std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner L " << block_row << "," << block_col << std::endl; #endif // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); - //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &communication_events); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); + //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &communication_events); } current_update++; current_replication = (current_replication + 1) % config.programSettings->kernelReplications; @@ -463,14 +463,14 @@ calculate(const hpcc_base::ExecutionSettings&co // this is the last taks that will be enqueued in this queue, so create an event all_events.back().emplace_back(); // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); } else { #ifndef NDEBUG std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner T " << block_row << "," << block_col << std::endl; #endif // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); } ASSERT_CL(err) current_update++; @@ -518,14 +518,14 @@ calculate(const hpcc_base::ExecutionSettings&co // this is the last taks that will be enqueued in this queue, so create an event all_events.back().emplace_back(); // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(std::prev(all_events.end())))), &(all_events.back().back())); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(std::prev(all_events.end())))), &(all_events.back().back())); } else { #ifndef NDEBUG std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner " << block_row << "," << block_col << std::endl; #endif // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(std::prev(all_events.end()))))); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(std::prev(all_events.end()))))); } ASSERT_CL(err) @@ -563,13 +563,13 @@ calculate(const hpcc_base::ExecutionSettings&co // // this is the last taks that will be enqueued in this queue, so create an event // all_events.back().emplace_back(); // // Distribute the workload over all available matrix multiplication kernels -// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); +// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); // current_update = 0; // current_replication++; // } // else { // // Distribute the workload over all available matrix multiplication kernels -// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); +// err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); // current_update++; // } } diff --git a/LINPACK/src/host/execution_types/execution_pcie.hpp b/LINPACK/src/host/execution_types/execution_pcie.hpp index 895dbd86..14d7a769 100644 --- a/LINPACK/src/host/execution_types/execution_pcie.hpp +++ b/LINPACK/src/host/execution_types/execution_pcie.hpp @@ -209,7 +209,7 @@ calculate(const hpcc_base::ExecutionSettings&co err =kernels.back().back().setArg(5, config.programSettings->matrixSize / config.programSettings->blockSize); ASSERT_CL(err) all_events.back().emplace_back(); - err = lu_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = lu_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) // read back result of LU calculation so it can be distributed err = lu_queues.back().enqueueReadBuffer(Buffer_lu2, CL_TRUE, 0, @@ -259,7 +259,7 @@ calculate(const hpcc_base::ExecutionSettings&co err = kernels.back().back().setArg(6, config.programSettings->matrixSize / config.programSettings->blockSize); ASSERT_CL(err) - err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = top_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) if (tops + 1 == (config.programSettings->matrixSize / config.programSettings->blockSize)) { @@ -306,7 +306,7 @@ calculate(const hpcc_base::ExecutionSettings&co err = kernels.back().back().setArg(6, config.programSettings->matrixSize / config.programSettings->blockSize); ASSERT_CL(err) - err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = left_queues.back().enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); ASSERT_CL(err) if (tops + 1 == (config.programSettings->matrixSize / config.programSettings->blockSize)) { @@ -391,16 +391,16 @@ calculate(const hpcc_base::ExecutionSettings&co // this is the last taks that will be enqueued in this queue, so create an event all_events.back().emplace_back(); // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); - //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &communication_events, &(all_events.back().back())); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &communication_events, &(all_events.back().back())); } else { #ifndef NDEBUG std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner L " << block_row << "," << block_col << std::endl; #endif // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); - //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &communication_events); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); + //err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &communication_events); } current_update++; current_replication = (current_replication + 1) % config.programSettings->kernelReplications; @@ -436,14 +436,14 @@ calculate(const hpcc_base::ExecutionSettings&co // this is the last taks that will be enqueued in this queue, so create an event all_events.back().emplace_back(); // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back())); } else { #ifndef NDEBUG std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner T " << block_row << "," << block_col << std::endl; #endif // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(all_events.end())))); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(all_events.end())))); } ASSERT_CL(err) current_update++; @@ -491,14 +491,14 @@ calculate(const hpcc_base::ExecutionSettings&co // this is the last taks that will be enqueued in this queue, so create an event all_events.back().emplace_back(); // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(std::prev(all_events.end())))), &(all_events.back().back())); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(std::prev(all_events.end())))), &(all_events.back().back())); } else { #ifndef NDEBUG std::cout << "Torus " << config.programSettings->torus_row << "," << config.programSettings->torus_col << " Inner " << block_row << "," << block_col << std::endl; #endif // Distribute the workload over all available matrix multiplication kernels - err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange, &(*std::prev(std::prev(std::prev(all_events.end()))))); + err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1), &(*std::prev(std::prev(std::prev(all_events.end()))))); } ASSERT_CL(err) diff --git a/LINPACK/src/host/linpack_benchmark.cpp b/LINPACK/src/host/linpack_benchmark.cpp index 8d5aaad2..3409e35f 100644 --- a/LINPACK/src/host/linpack_benchmark.cpp +++ b/LINPACK/src/host/linpack_benchmark.cpp @@ -197,6 +197,7 @@ linpack::LinpackBenchmark::generateInputData() { std::mt19937 gen(this->mpi_comm_rank); std::uniform_real_distribution<> dis(0.0, 1.0); d->norma = 0.0; + d->normb = 0.0; /* Generate a matrix by using pseudo random number in the range (0,1) */ diff --git a/PTRANS/tests/test_host_functionality.cpp b/PTRANS/tests/test_host_functionality.cpp index 1733cf15..0f7c64a0 100644 --- a/PTRANS/tests/test_host_functionality.cpp +++ b/PTRANS/tests/test_host_functionality.cpp @@ -3,7 +3,6 @@ // #include "gtest/gtest.h" #include "parameters.h" -#include "CL/cl.hpp" #include "test_program_settings.h" #include "gmock/gmock-matchers.h" #include "transpose_benchmark.hpp" diff --git a/PTRANS/tests/test_transpose_data_handlers.cpp b/PTRANS/tests/test_transpose_data_handlers.cpp index 9b666bc2..f8531615 100644 --- a/PTRANS/tests/test_transpose_data_handlers.cpp +++ b/PTRANS/tests/test_transpose_data_handlers.cpp @@ -3,7 +3,6 @@ // #include "gtest/gtest.h" #include "parameters.h" -#include "CL/cl.hpp" #include "test_program_settings.h" #include "gmock/gmock-matchers.h" #include "transpose_benchmark.hpp" diff --git a/RandomAccess/src/host/execution.h b/RandomAccess/src/host/execution.h index 45308bee..88cf6736 100644 --- a/RandomAccess/src/host/execution.h +++ b/RandomAccess/src/host/execution.h @@ -27,8 +27,6 @@ SOFTWARE. #include /* External library headers */ -#include "CL/cl.hpp" - #include "parameters.h" #include "random_access_benchmark.hpp" diff --git a/RandomAccess/src/host/execution_single.cpp b/RandomAccess/src/host/execution_single.cpp index 24ce9802..486234bf 100644 --- a/RandomAccess/src/host/execution_single.cpp +++ b/RandomAccess/src/host/execution_single.cpp @@ -30,8 +30,6 @@ SOFTWARE. #include /* External library headers */ -#include "CL/cl.hpp" - #ifdef INTEL_FPGA #include "CL/cl_ext_intelfpga.h" #endif @@ -170,7 +168,7 @@ namespace bm_execution { #pragma omp barrier #pragma omp for nowait for (int r = 0; r < config.programSettings->kernelReplications; r++) { - compute_queue[r].enqueueTask(accesskernel[r]); + compute_queue[r].enqueueNDRangeKernel(accesskernel[r], cl::NullRange, cl::NDRange(1)); } #pragma omp for for (int r = 0; r < config.programSettings->kernelReplications; r++) { diff --git a/STREAM/src/host/execution.hpp b/STREAM/src/host/execution.hpp index e493cb0b..70d6f948 100644 --- a/STREAM/src/host/execution.hpp +++ b/STREAM/src/host/execution.hpp @@ -29,7 +29,6 @@ SOFTWARE. #include /* External library headers */ -#include "CL/cl.hpp" #include "parameters.h" #include "stream_benchmark.hpp" diff --git a/STREAM/src/host/execution_default.cpp b/STREAM/src/host/execution_default.cpp index d58e4e3d..71a4d04f 100644 --- a/STREAM/src/host/execution_default.cpp +++ b/STREAM/src/host/execution_default.cpp @@ -29,7 +29,6 @@ SOFTWARE. #include /* External library headers */ -#include "CL/cl.hpp" #include "CL/opencl.h" #ifdef INTEL_FPGA @@ -143,7 +142,7 @@ namespace bm_execution { } startExecution = std::chrono::high_resolution_clock::now(); for (int i=0; ikernelReplications; i++) { - ASSERT_CL(command_queues[i].enqueueTask(test_kernels[i])); + ASSERT_CL(command_queues[i].enqueueNDRangeKernel(test_kernels[i], cl::NullRange, cl::NDRange(1))); } for (int i=0; ikernelReplications; i++) { ASSERT_CL(command_queues[i].finish()); @@ -229,7 +228,7 @@ namespace bm_execution { std::vector copy_start_events({copy_user_event}); std::vector copy_events(config.programSettings->kernelReplications); for (int i = 0; i < config.programSettings->kernelReplications; i++) { - command_queues[i].enqueueTask(copy_kernels[i], ©_start_events, ©_events[i]); + command_queues[i].enqueueNDRangeKernel(copy_kernels[i], cl::NullRange, cl::NDRange(1), cl::NDRange(1), ©_start_events, ©_events[i]); } cl::UserEvent scale_user_event(*config.context, &err); @@ -237,7 +236,7 @@ namespace bm_execution { std::vector scale_start_events({scale_user_event}); std::vector scale_events(config.programSettings->kernelReplications); for (int i = 0; i < config.programSettings->kernelReplications; i++) { - command_queues[i].enqueueTask(scale_kernels[i], &scale_start_events, &scale_events[i]); + command_queues[i].enqueueNDRangeKernel(scale_kernels[i], cl::NullRange, cl::NDRange(1), cl::NDRange(1), &scale_start_events, &scale_events[i]); } cl::UserEvent add_user_event(*config.context, &err); @@ -245,7 +244,7 @@ namespace bm_execution { std::vector add_start_events({add_user_event}); std::vector add_events(config.programSettings->kernelReplications); for (int i = 0; i < config.programSettings->kernelReplications; i++) { - command_queues[i].enqueueTask(add_kernels[i], &add_start_events, &add_events[i]); + command_queues[i].enqueueNDRangeKernel(add_kernels[i], cl::NullRange, cl::NDRange(1), cl::NDRange(1), &add_start_events, &add_events[i]); } cl::UserEvent triad_user_event(*config.context, &err); @@ -253,7 +252,7 @@ namespace bm_execution { std::vector triad_start_events({triad_user_event}); std::vector triad_events(config.programSettings->kernelReplications); for (int i = 0; i < config.programSettings->kernelReplications; i++) { - command_queues[i].enqueueTask(triad_kernels[i], &triad_start_events, &triad_events[i]); + command_queues[i].enqueueNDRangeKernel(triad_kernels[i], cl::NullRange, cl::NDRange(1), cl::NDRange(1), &triad_start_events, &triad_events[i]); } startExecution = std::chrono::high_resolution_clock::now(); diff --git a/cmake/general_benchmark_build_setup.cmake b/cmake/general_benchmark_build_setup.cmake index 15dfd9b1..64aa8d0a 100644 --- a/cmake/general_benchmark_build_setup.cmake +++ b/cmake/general_benchmark_build_setup.cmake @@ -13,7 +13,7 @@ endif() if(DEFINED USE_DEPRECATED_HPP_HEADER) set(header_default ${USE_DEPRECATED_HPP_HEADER}) else() - set(header_default Yes) + set(header_default No) endif() if(DEFINED COMMUNICATION_TYPE_SUPPORT_ENABLED)