From e9a398d752ca7422a797edbe712f5d569d58459e Mon Sep 17 00:00:00 2001 From: Simone Balducci <93096843+sbaldu@users.noreply.github.com> Date: Fri, 19 Jul 2024 00:36:13 +0200 Subject: [PATCH] Reduce binding code boilerplate (#44) * Substitute `run*` functions with a single template * Template `mainRun` functions * Remove repeted includes --- .../alpaka/BindingModules/binding_cpu.cc | 141 +---- .../alpaka/BindingModules/binding_cpu_tbb.cc | 141 +---- .../alpaka/BindingModules/binding_gpu_cuda.cc | 144 +---- .../alpaka/BindingModules/binding_gpu_hip.cc | 142 +---- .../alpaka/BindingModules/binding_kernels.cc | 3 - CLUEstering/alpaka/CLUE/ConvolutionalKernel.h | 6 - CLUEstering/alpaka/CLUE/Run.h | 553 +----------------- CLUEstering/alpaka/DataFormats/Points.h | 6 - .../alpaka/DataFormats/alpaka/TilesAlpaka.h | 7 - 9 files changed, 80 insertions(+), 1063 deletions(-) diff --git a/CLUEstering/alpaka/BindingModules/binding_cpu.cc b/CLUEstering/alpaka/BindingModules/binding_cpu.cc index 5ba368e0..643b8e2c 100644 --- a/CLUEstering/alpaka/BindingModules/binding_cpu.cc +++ b/CLUEstering/alpaka/BindingModules/binding_cpu.cc @@ -2,15 +2,11 @@ #include #include -#include "../CLUE/CLUEAlgoAlpaka.h" #include "../CLUE/Run.h" -#include "../DataFormats/Points.h" -#include "../DataFormats/alpaka/PointsAlpaka.h" #include #include #include -#include namespace alpaka_serial_sync { void listDevices(const std::string& backend) { @@ -27,17 +23,18 @@ namespace alpaka_serial_sync { } } + template std::vector> mainRun(float dc, float rhoc, float outlier, int pPBin, const std::vector>& coords, const std::vector& weights, - const FlatKernel& kernel, + const Kernel& kernel, int Ndim, size_t block_size, size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); + const auto dev_acc = alpaka::getDevByIdx(device_id); // Create the queue Queue queue_(dev_acc); @@ -45,140 +42,34 @@ namespace alpaka_serial_sync { // Running the clustering algorithm // switch (Ndim) { [[unlikely]] case (1): - return run1( + return run<1, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (2): - return run2( + return run<2, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (3): - return run3( + return run<3, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (4): - return run4( + return run<4, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (5): - return run5( + return run<5, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (6): - return run6( + return run<6, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (7): - return run7( + return run<7, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (8): - return run8( + return run<8, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (9): - return run9( + return run<9, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const ExponentialKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const GaussianKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( + return run<10, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] default: std::cout << "This library only works up to 10 dimensions\n"; @@ -202,7 +93,7 @@ namespace alpaka_serial_sync { const FlatKernel&, int, size_t, - size_t>(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); } }; // namespace alpaka_serial_sync diff --git a/CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc b/CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc index a5b34b49..b09195fd 100644 --- a/CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc +++ b/CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc @@ -2,15 +2,11 @@ #include #include -#include "../CLUE/CLUEAlgoAlpaka.h" #include "../CLUE/Run.h" -#include "../DataFormats/Points.h" -#include "../DataFormats/alpaka/PointsAlpaka.h" #include #include #include -#include namespace alpaka_tbb_async { void listDevices(const std::string& backend) { @@ -27,17 +23,18 @@ namespace alpaka_tbb_async { } } + template std::vector> mainRun(float dc, float rhoc, float outlier, int pPBin, const std::vector>& coords, const std::vector& weights, - const FlatKernel& kernel, + const Kernel& kernel, int Ndim, size_t block_size, size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); + const auto dev_acc = alpaka::getDevByIdx(device_id); // Create the queue Queue queue_(dev_acc); @@ -45,140 +42,34 @@ namespace alpaka_tbb_async { // Running the clustering algorithm // switch (Ndim) { [[unlikely]] case (1): - return run1( + return run<1, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (2): - return run2( + return run<2, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (3): - return run3( + return run<3, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (4): - return run4( + return run<4, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (5): - return run5( + return run<5, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (6): - return run6( + return run<6, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (7): - return run7( + return run<7, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (8): - return run8( + return run<8, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (9): - return run9( + return run<9, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const ExponentialKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const GaussianKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( + return run<10, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] default: std::cout << "This library only works up to 10 dimensions\n"; @@ -200,7 +91,7 @@ namespace alpaka_tbb_async { const FlatKernel&, int, size_t, - size_t>(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); } }; // namespace alpaka_tbb_async diff --git a/CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc b/CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc index 275b3d99..4efbddcf 100644 --- a/CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc +++ b/CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc @@ -1,16 +1,11 @@ #include #include -#include "../CLUE/CLUEAlgoAlpaka.h" #include "../CLUE/Run.h" -#include "../DataFormats/Points.h" -#include "../DataFormats/alpaka/PointsAlpaka.h" -#include "../AlpakaCore/initialise.h" #include #include #include -#include using cms::alpakatools::initialise; @@ -29,125 +24,18 @@ namespace alpaka_cuda_async { } } + template std::vector> mainRun(float dc, float rhoc, float outlier, int pPBin, const std::vector>& coords, const std::vector& weights, - const FlatKernel& kernel, + const Kernel& kernel, int Ndim, size_t block_size, size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - /* initialise(); */ - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const ExponentialKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const GaussianKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); + const auto dev_acc = alpaka::getDevByIdx(device_id); // Create the queue Queue queue_(dev_acc); @@ -155,34 +43,34 @@ namespace alpaka_cuda_async { // Running the clustering algorithm // switch (Ndim) { [[unlikely]] case (1): - return run1( + return run<1, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (2): - return run2( + return run<2, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (3): - return run3( + return run<3, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (4): - return run4( + return run<4, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (5): - return run5( + return run<5, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (6): - return run6( + return run<6, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (7): - return run7( + return run<7, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (8): - return run8( + return run<8, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (9): - return run9( + return run<9, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (10): - return run10( + return run<10, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] default: std::cout << "This library only works up to 10 dimensions\n"; @@ -204,7 +92,7 @@ namespace alpaka_cuda_async { const FlatKernel&, int, size_t, - size_t>(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); } }; // namespace alpaka_cuda_async diff --git a/CLUEstering/alpaka/BindingModules/binding_gpu_hip.cc b/CLUEstering/alpaka/BindingModules/binding_gpu_hip.cc index 6a5fb247..2702473b 100644 --- a/CLUEstering/alpaka/BindingModules/binding_gpu_hip.cc +++ b/CLUEstering/alpaka/BindingModules/binding_gpu_hip.cc @@ -1,16 +1,11 @@ #include #include -#include "../CLUE/CLUEAlgoAlpaka.h" #include "../CLUE/Run.h" -#include "../DataFormats/Points.h" -#include "../DataFormats/alpaka/PointsAlpaka.h" -#include "../AlpakaCore/initialise.h" #include #include #include -#include namespace alpaka_rocm_async { void listDevices(const std::string& backend) { @@ -27,17 +22,18 @@ namespace alpaka_rocm_async { } } + template std::vector> mainRun(float dc, float rhoc, float outlier, int pPBin, const std::vector>& coords, const std::vector& weights, - const FlatKernel& kernel, + const Kernel& kernel, int Ndim, size_t block_size, size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); + const auto dev_acc = alpaka::getDevByIdx(device_id); // Create the queue Queue queue_(dev_acc); @@ -45,140 +41,34 @@ namespace alpaka_rocm_async { // Running the clustering algorithm // switch (Ndim) { [[unlikely]] case (1): - return run1( + return run<1, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (2): - return run2( + return run<2, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[likely]] case (3): - return run3( + return run<3, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (4): - return run4( + return run<4, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (5): - return run5( + return run<5, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (6): - return run6( + return run<6, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (7): - return run7( + return run<7, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (8): - return run8( + return run<8, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (9): - return run9( + return run<9, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const ExponentialKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] default: - std::cout << "This library only works up to 10 dimensions\n"; - return {}; - } - } - - std::vector> mainRun(float dc, - float rhoc, - float outlier, - int pPBin, - const std::vector>& coords, - const std::vector& weights, - const GaussianKernel& kernel, - int Ndim, - size_t block_size, - size_t device_id) { - auto const dev_acc = alpaka::getDevByIdx(device_id); - - // Create the queue - Queue queue_(dev_acc); - - // Running the clustering algorithm // - switch (Ndim) { - [[unlikely]] case (1): - return run1( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (2): - return run2( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[likely]] case (3): - return run3( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (4): - return run4( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (5): - return run5( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (6): - return run6( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (7): - return run7( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (8): - return run8( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (9): - return run9( - dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); - [[unlikely]] case (10): - return run10( + return run<10, Kernel>( dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size); [[unlikely]] default: std::cout << "This library only works up to 10 dimensions\n"; @@ -202,7 +92,7 @@ namespace alpaka_rocm_async { const FlatKernel&, int, size_t, - size_t>(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); m.def("mainRun", pybind11::overload_cast(&mainRun), + size_t>(&mainRun), "mainRun"); } }; // namespace alpaka_rocm_async diff --git a/CLUEstering/alpaka/BindingModules/binding_kernels.cc b/CLUEstering/alpaka/BindingModules/binding_kernels.cc index 0b6602c6..6f4ad54c 100644 --- a/CLUEstering/alpaka/BindingModules/binding_kernels.cc +++ b/CLUEstering/alpaka/BindingModules/binding_kernels.cc @@ -1,12 +1,9 @@ -#include - #include "../CLUE/ConvolutionalKernel.h" #include #include #include -#include PYBIND11_MODULE(CLUE_Convolutional_Kernels, m) { m.doc() = "Binding of the convolutional kernels used in the CLUE algorithm."; diff --git a/CLUEstering/alpaka/CLUE/ConvolutionalKernel.h b/CLUEstering/alpaka/CLUE/ConvolutionalKernel.h index 1deba188..2f7cdf4b 100644 --- a/CLUEstering/alpaka/CLUE/ConvolutionalKernel.h +++ b/CLUEstering/alpaka/CLUE/ConvolutionalKernel.h @@ -2,12 +2,6 @@ #define convolutional_kernels_h #include -#include -#include -#include -#include -#include - #include class FlatKernel { diff --git a/CLUEstering/alpaka/CLUE/Run.h b/CLUEstering/alpaka/CLUE/Run.h index 9ba16d94..9afb962e 100644 --- a/CLUEstering/alpaka/CLUE/Run.h +++ b/CLUEstering/alpaka/CLUE/Run.h @@ -2,547 +2,26 @@ #define run_h #include + #include "CLUEAlgoAlpaka.h" -#include "ConvolutionalKernel.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { - std::vector> run1(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<1> h_points(coordinates, weight); - PointsAlpaka<1> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run1(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<1> h_points(coordinates, weight); - PointsAlpaka<1> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run1(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<1> h_points(coordinates, weight); - PointsAlpaka<1> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run2(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<2> h_points(coordinates, weight); - PointsAlpaka<2> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run2(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<2> h_points(coordinates, weight); - PointsAlpaka<2> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run2(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<2> h_points(coordinates, weight); - PointsAlpaka<2> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run3(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<3> h_points(coordinates, weight); - PointsAlpaka<3> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run3(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<3> h_points(coordinates, weight); - PointsAlpaka<3> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run3(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<3> h_points(coordinates, weight); - PointsAlpaka<3> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run4(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<4> h_points(coordinates, weight); - PointsAlpaka<4> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run4(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<4> h_points(coordinates, weight); - PointsAlpaka<4> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run4(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<4> h_points(coordinates, weight); - PointsAlpaka<4> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run5(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<5> h_points(coordinates, weight); - PointsAlpaka<5> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run5(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<5> h_points(coordinates, weight); - PointsAlpaka<5> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run5(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<5> h_points(coordinates, weight); - PointsAlpaka<5> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run6(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<6> h_points(coordinates, weight); - PointsAlpaka<6> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run6(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<6> h_points(coordinates, weight); - PointsAlpaka<6> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run6(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<6> h_points(coordinates, weight); - PointsAlpaka<6> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run7(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<7> h_points(coordinates, weight); - PointsAlpaka<7> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run7(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<7> h_points(coordinates, weight); - PointsAlpaka<7> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run7(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<7> h_points(coordinates, weight); - PointsAlpaka<7> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run8(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<8> h_points(coordinates, weight); - PointsAlpaka<8> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run8(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<8> h_points(coordinates, weight); - PointsAlpaka<8> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run8(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<8> h_points(coordinates, weight); - PointsAlpaka<8> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run9(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<9> h_points(coordinates, weight); - PointsAlpaka<9> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run9(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<9> h_points(coordinates, weight); - PointsAlpaka<9> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run9(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<9> h_points(coordinates, weight); - PointsAlpaka<9> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run10(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const FlatKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<10> h_points(coordinates, weight); - PointsAlpaka<10> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run10(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const ExponentialKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<10> h_points(coordinates, weight); - PointsAlpaka<10> d_points(queue_, weight.size()); - - return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); - } - - std::vector> run10(float dc, - float rhoc, - float outlier, - int pPBin, - std::vector> const& coordinates, - std::vector const& weight, - const GaussianKernel& kernel, - Queue queue_, - size_t block_size) { - CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); - - // Create the host and device points - Points<10> h_points(coordinates, weight); - PointsAlpaka<10> d_points(queue_, weight.size()); + template + std::vector> run(float dc, + float rhoc, + float outlier, + int pPBin, + const std::vector>& coordinates, + const std::vector& weight, + const Kernel& kernel, + Queue queue_, + size_t block_size) { + CLUEAlgoAlpaka algo(dc, rhoc, outlier, pPBin, queue_); + + // Create the host and device points + Points h_points(coordinates, weight); + PointsAlpaka d_points(queue_, weight.size()); return algo.make_clusters(h_points, d_points, kernel, queue_, block_size); } diff --git a/CLUEstering/alpaka/DataFormats/Points.h b/CLUEstering/alpaka/DataFormats/Points.h index 9dc05dbb..8c532e40 100644 --- a/CLUEstering/alpaka/DataFormats/Points.h +++ b/CLUEstering/alpaka/DataFormats/Points.h @@ -2,12 +2,6 @@ #define points_h #include "alpaka/AlpakaVecArray.h" -#include "alpaka/PointsAlpaka.h" -#include -#include -#include -#include -#include #include using cms::alpakatools::VecArray; diff --git a/CLUEstering/alpaka/DataFormats/alpaka/TilesAlpaka.h b/CLUEstering/alpaka/DataFormats/alpaka/TilesAlpaka.h index 7764e414..96c69abf 100644 --- a/CLUEstering/alpaka/DataFormats/alpaka/TilesAlpaka.h +++ b/CLUEstering/alpaka/DataFormats/alpaka/TilesAlpaka.h @@ -5,14 +5,7 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include -#include #include "../../AlpakaCore/alpakaConfig.h" #include "../../AlpakaCore/alpakaMemory.h"