Skip to content

Commit

Permalink
Fix and improve setup of alpaka tiles (cms-patatrack#38)
Browse files Browse the repository at this point in the history
* Setup temporary Tiles using `host_buffer`

* Rework members of TilesAlpaka as private

* Add resize of tiles outer VecArray

* Change default values for tile number and depths

* Set default `ppbin` to 128

* Add non-const `data()` to VecArray

* Add `KernelResetTiles`

* Setup Tiles directly on device

* Define `CoordinateExtremes` class

* Make `min_max` and `tile_size` private

* Add test for multiple backends results

* Remove unneeded alpaka wait
  • Loading branch information
sbaldu authored Jul 23, 2024
1 parent a6aceea commit 9dac229
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 45 deletions.
4 changes: 2 additions & 2 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10):
self.elapsed_time = 0.

def set_params(self, dc: float, rhoc: float,
outlier: float, ppbin: int = 10) -> None:
outlier: float, ppbin: int = 128) -> None:
self.dc_ = dc
self.rhoc = rhoc
self.outlier = outlier
Expand Down Expand Up @@ -1215,7 +1215,7 @@ def to_csv(self, output_folder: str, file_name: str) -> None:
df_.to_csv(out_path,index=False)

if __name__ == "__main__":
c = clusterer(0.4,5,1.)
c = clusterer(0.8, 5, 1.)
c.read_data('./blob.csv')
c.input_plotter()
c.run_clue(backend="cpu serial", verbose=True)
Expand Down
77 changes: 50 additions & 27 deletions CLUEstering/alpaka/CLUE/CLUEAlgoAlpaka.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
PointsAlpaka<Ndim>& d_points,
const KernelType& kernel,
Queue queue_,
size_t block_size);
std::size_t block_size);

private:
float dc_;
Expand All @@ -73,18 +73,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
void setup(const Points<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
Queue queue_,
size_t block_size);
std::size_t block_size);

// Construction of the tiles
void calculate_tile_size(TilesAlpaka<Ndim>& h_tiles, const Points<Ndim>& h_points);
void calculate_tile_size(CoordinateExtremes<Ndim>& min_max,
float* tile_sizes,
const Points<Ndim>& h_points,
uint32_t nPerDim);
};

// Private methods
template <typename TAcc, uint8_t Ndim>
void CLUEAlgoAlpaka<TAcc, Ndim>::calculate_tile_size(TilesAlpaka<Ndim>& h_tiles,
const Points<Ndim>& h_points) {
void CLUEAlgoAlpaka<TAcc, Ndim>::calculate_tile_size(CoordinateExtremes<Ndim>& min_max,
float* tile_sizes,
const Points<Ndim>& h_points,
uint32_t nPerDim) {
for (size_t dim{}; dim != Ndim; ++dim) {
float tileSize;
const float dimMax{
(*std::max_element(h_points.m_coords.begin(),
h_points.m_coords.end(),
Expand All @@ -98,13 +102,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
return vec1[dim] < vec2[dim];
}))[dim]};

VecArray<float, 2> temp;
temp.push_back_unsafe(dimMin);
temp.push_back_unsafe(dimMax);
h_tiles.min_max[dim] = temp;
tileSize = (dimMax - dimMin) / h_tiles.nPerDim();
min_max.min(dim) = dimMin;
min_max.max(dim) = dimMax;

h_tiles.tile_size[dim] = tileSize;
const float tileSize{(dimMax - dimMin) / nPerDim};
tile_sizes[dim] = tileSize;
}
}

Expand All @@ -117,6 +119,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
cms::alpakatools::VecArray<int32_t, max_followers>[]>(queue_, reserve);

// Copy to the public pointers
m_tiles = (*d_tiles).data();
m_seeds = (*d_seeds).data();
m_followers = (*d_followers).data();
}
Expand All @@ -125,14 +128,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
void CLUEAlgoAlpaka<TAcc, Ndim>::setup(const Points<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
Queue queue_,
size_t block_size) {
// Create temporary tiles object
TilesAlpaka<Ndim> temp;
calculate_tile_size(temp, h_points);
temp.resizeTiles();
std::size_t block_size) {
// calculate the number of tiles and their size
const auto nTiles{std::ceil(h_points.n / static_cast<float>(pointsPerTile_))};
const auto nPerDim{std::ceil(std::pow(nTiles, 1. / Ndim))};

CoordinateExtremes<Ndim> min_max;
float tile_size[Ndim];
calculate_tile_size(min_max, tile_size, h_points, nPerDim);

const auto device = alpaka::getDev(queue_);
alpaka::memcpy(
queue_,
cms::alpakatools::make_device_view(device, (*d_tiles)->minMax(), 2 * Ndim),
cms::alpakatools::make_host_view(min_max.data(), 2 * Ndim));
alpaka::memcpy(
queue_,
cms::alpakatools::make_device_view(device, (*d_tiles)->tileSize(), Ndim),
cms::alpakatools::make_host_view(tile_size, Ndim));

const Idx tiles_grid_size = cms::alpakatools::divide_up_by(nTiles, block_size);
const auto tiles_working_div =
cms::alpakatools::make_workdiv<Acc1D>(tiles_grid_size, block_size);
alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(
tiles_working_div, KernelResetTiles{}, m_tiles, nTiles, nPerDim));

alpaka::memcpy(queue_, *d_tiles, cms::alpakatools::make_host_view(temp));
m_tiles = (*d_tiles).data();
alpaka::memcpy(
queue_,
d_points.coords,
Expand All @@ -144,8 +165,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
alpaka::memset(queue_, (*d_seeds), 0x00);

// Define the working division
Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
const Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
const auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(
working_div, KernelResetFollowers{}, m_followers, h_points.n));
Expand All @@ -159,37 +180,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
PointsAlpaka<Ndim>& d_points,
const KernelType& kernel,
Queue queue_,
size_t block_size) {
std::size_t block_size) {
setup(h_points, d_points, queue_, block_size);

const Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
alpaka::enqueue(
queue_,
alpaka::createTaskKernel<Acc1D>(
working_div, KernelFillTiles(), d_points.view(), m_tiles, h_points.n));
working_div, KernelFillTiles{}, d_points.view(), m_tiles, h_points.n));

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div,
KernelCalculateLocalDensity(),
KernelCalculateLocalDensity{},
m_tiles,
d_points.view(),
kernel,
/* m_domains.data(), */
dc_,
h_points.n));

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div,
KernelCalculateNearestHigher(),
KernelCalculateNearestHigher{},
m_tiles,
d_points.view(),
/* m_domains.data(), */
outlierDeltaFactor_,
dc_,
h_points.n));

alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div,
KernelFindClusters<Ndim>(),
KernelFindClusters<Ndim>{},
m_seeds,
m_followers,
d_points.view(),
Expand All @@ -204,7 +227,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
cms::alpakatools::make_workdiv<Acc1D>(grid_size_seeds, block_size);
alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(working_div_seeds,
KernelAssignClusters<Ndim>(),
KernelAssignClusters<Ndim>{},
m_seeds,
m_followers,
d_points.view()));
Expand Down
16 changes: 16 additions & 0 deletions CLUEstering/alpaka/CLUE/CLUEAlpakaKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define CLUE_Alpaka_Kernels_h

#include <alpaka/core/Common.hpp>
#include <chrono>
#include <cstdint>

#include "../AlpakaCore/alpakaWorkDiv.h"
Expand All @@ -21,6 +22,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
template <uint8_t Ndim>
using PointsView = typename PointsAlpaka<Ndim>::PointsAlpakaView;

struct KernelResetTiles {
template <typename TAcc, uint8_t Ndim>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
TilesAlpaka<Ndim>* tiles,
uint32_t nTiles,
uint32_t nPerDim) const {
if (cms::alpakatools::once_per_grid(acc)) {
tiles->resizeTiles(nTiles, nPerDim);
}
cms::alpakatools::for_each_element_in_grid(
acc, nTiles, [&](uint32_t i) -> void { tiles->clear(i); });
}
};

struct KernelResetFollowers {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(const TAcc& acc,
Expand Down Expand Up @@ -66,6 +81,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
// query N_{dc_}(i)

VecArray<float, Ndim> coords_j{dev_points->coords[j]};

float dist_ij_sq{0.f};
for (int dim{}; dim != Ndim; ++dim) {
dist_ij_sq += (coords_j[dim] - coords_i[dim]) * (coords_j[dim] - coords_i[dim]);
Expand Down
1 change: 1 addition & 0 deletions CLUEstering/alpaka/DataFormats/alpaka/AlpakaVecArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ namespace cms::alpakatools {
inline constexpr const T& operator[](int i) const { return m_data[i]; }
inline constexpr void reset() { m_size = 0; }
inline constexpr int capacity() const { return maxSize; }
inline constexpr T* data() { return m_data; }
inline constexpr T const* data() const { return m_data; }
inline constexpr void resize(int size) { m_size = size; }
inline constexpr bool empty() const { return 0 == m_size; }
Expand Down
56 changes: 43 additions & 13 deletions CLUEstering/alpaka/DataFormats/alpaka/TilesAlpaka.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,64 @@
#include <cstdint>
#include <stdint.h>

#include "../../AlpakaCore/alpakaWorkDiv.h"
#include "../../AlpakaCore/alpakaConfig.h"
#include "../../AlpakaCore/alpakaMemory.h"
#include "AlpakaVecArray.h"

using cms::alpakatools::VecArray;

constexpr uint32_t max_tile_depth{1 << 10};
constexpr uint32_t max_n_tiles{1 << 10};
constexpr uint32_t max_n_tiles{1 << 15};

namespace ALPAKA_ACCELERATOR_NAMESPACE {

template <uint8_t Ndim>
class CoordinateExtremes {
private:
float m_data[2 * Ndim];

public:
CoordinateExtremes() = default;

ALPAKA_FN_HOST_ACC const float* data() const { return m_data; }
ALPAKA_FN_HOST_ACC float* data() { return m_data; }

ALPAKA_FN_HOST_ACC float min(int i) const { return m_data[2 * i]; }
ALPAKA_FN_HOST_ACC float& min(int i) { return m_data[2 * i]; }
ALPAKA_FN_HOST_ACC float max(int i) const { return m_data[2 * i + 1]; }
ALPAKA_FN_HOST_ACC float& max(int i) { return m_data[2 * i + 1]; }
};

template <uint8_t Ndim>
class TilesAlpaka {
public:
TilesAlpaka()
: n_tiles{1000}, n_tiles_per_dim{static_cast<int>(std::pow(1000, 1. / Ndim))} {};
TilesAlpaka() = default;

// Public member
VecArray<VecArray<float, 2>, Ndim> min_max;
VecArray<float, Ndim> tile_size;
ALPAKA_FN_HOST_ACC inline constexpr const float* minMax() const {
return min_max.data();
}
ALPAKA_FN_HOST_ACC inline constexpr float* minMax() { return min_max.data(); }

// Public methods
void resizeTiles() { m_tiles.resize(n_tiles); }
ALPAKA_FN_HOST_ACC inline constexpr const float* tileSize() const {
return tile_size;
}
ALPAKA_FN_HOST_ACC inline constexpr float* tileSize() { return tile_size; }

// getter
int nPerDim() const { return n_tiles_per_dim; }
ALPAKA_FN_HOST_ACC void resizeTiles(std::size_t nTiles, int nPerDim) {
this->n_tiles = nTiles;
this->n_tiles_per_dim = nPerDim;

this->m_tiles.resize(nTiles);
}

template <typename TAcc>
ALPAKA_FN_HOST_ACC inline constexpr int getBin(const TAcc& acc,
float coord_,
int dim_) const {
int coord_Bin{(int)((coord_ - min_max[dim_][0]) / tile_size[dim_])};
int coord_Bin{(int)((coord_ - min_max.min(dim_)) / tile_size[dim_])};

// Address the cases of underflow and overflow and underflow
// Address the cases of underflow and overflow
coord_Bin = alpaka::math::min(acc, coord_Bin, n_tiles_per_dim - 1);
coord_Bin = alpaka::math::max(acc, coord_Bin, 0);

Expand Down Expand Up @@ -90,20 +114,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

ALPAKA_FN_HOST_ACC inline constexpr auto size() { return n_tiles; }

ALPAKA_FN_HOST_ACC inline constexpr int nPerDim() const { return n_tiles_per_dim; }

ALPAKA_FN_HOST_ACC inline constexpr void clear() {
for (int i{}; i < n_tiles; ++i) {
m_tiles[i].reset();
}
}

ALPAKA_FN_HOST_ACC inline constexpr void clear(uint32_t i) { m_tiles[i].reset(); }

ALPAKA_FN_HOST_ACC inline constexpr VecArray<uint32_t, max_tile_depth>& operator[](
int globalBinId) {
return m_tiles[globalBinId];
}

private:
size_t n_tiles;
std::size_t n_tiles;
int n_tiles_per_dim;
CoordinateExtremes<Ndim> min_max;
float tile_size[Ndim];
VecArray<VecArray<uint32_t, max_tile_depth>, max_n_tiles> m_tiles;
};
} // namespace ALPAKA_ACCELERATOR_NAMESPACE
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from setuptools import setup

__version__ = "2.2.4"
__version__ = "2.2.5"

this_directory = Path(__file__).parent
long_description = (this_directory/'README.md').read_text()
Expand Down
4 changes: 2 additions & 2 deletions tests/test_partial_dimensional_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ def test_square_box(square, box):
Compare the clustering of a 2D square with that of a 3D box
clustered using only two dimensions
'''
c1 = clue.clusterer(.4, 2., 1.6)
c1 = clue.clusterer(1., 2., 1.6)
c1.read_data(square)
c1.run_clue()

c2 = clue.clusterer(.4, 2., 1.6)
c2 = clue.clusterer(1., 2., 1.6)
c2.read_data(box)
c2.run_clue(dimensions=[0, 1])

Expand Down

0 comments on commit 9dac229

Please sign in to comment.