Skip to content

Commit

Permalink
Feature partial dimension clustering (cms-patatrack#19)
Browse files Browse the repository at this point in the history
* Update to version `2.0.1`

* Allow clustering with only certain dimensions

* Fix typo

* Put dimension partitioner in a private method

* Add tests for dimension partitioner

* Write docstring

* Fix typo

* Add test

* Add new test datasets

* Adding docstrings

* Add checks
  • Loading branch information
sbaldu authored Mar 4, 2024
1 parent 20d7f41 commit 98b13bd
Show file tree
Hide file tree
Showing 5 changed files with 10,173 additions and 14 deletions.
49 changes: 36 additions & 13 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,11 +549,34 @@ def list_devices(self, backend: str = "all") -> None:
raise ValueError("Invalid backend. The allowed choices for the"
+ " backend are: all, cpu serial, cpu tbb and gpu cuda.")

def _partial_dimension_dataset(self, dimensions: list):
"""
Returns a dataset containing only the coordinates of the chosen dimensions.
This method returns a dataset containing only the coordinates of the chosen
dimensions when a set of dimensions is chosen in the `run_clue` method. This
allows to run the algorithm in a lower dimensional space.
Parameters
----------
dimensions : list
The list of the dimensions that should be considered.
Returns
-------
np.ndarray
Array containing the coordinates of the chosen dimensions.
"""

return np.array([self.clust_data.coords.T[dim] for dim in dimensions]).T

def run_clue(self,
backend: str = "cpu serial",
block_size: int = 1024,
device_id: int = 0,
verbose: bool = False) -> None:
verbose: bool = False,
dimensions: Union[list, None] = None) -> None:
"""
Executes the CLUE clustering algorithm.
Expand Down Expand Up @@ -582,38 +605,38 @@ def run_clue(self,
None
"""

if dimensions is None:
data = self.clust_data.coords
else:
data = self._partial_dimension_dataset(dimensions)
start = time.time_ns()
if backend == "cpu serial":
cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
self.clust_data.coords, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
data, self.clust_data.weight, self.kernel,
self.clust_data.n_dim, block_size, device_id)
elif backend == "cpu tbb":
if tbb_found:
cluster_id_is_seed = cpu_tbb.mainRun(self.dc_, self.rhoc, self.outlier,
self.ppbin, self.clust_data.coords,
self.clust_data.weight, self.kernel,
self.clust_data.n_dim, block_size,
self.ppbin, data, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
else:
print("TBB module not found. Please re-compile the library and try again.")

elif backend == "gpu cuda":
if cuda_found:
cluster_id_is_seed = gpu_cuda.mainRun(self.dc_, self.rhoc, self.outlier,
self.ppbin, self.clust_data.coords,
self.clust_data.weight, self.kernel,
self.clust_data.n_dim, block_size,
self.ppbin, data, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
else:
print("CUDA module not found. Please re-compile the library and try again.")

elif backend == "gpu hip":
if hip_found:
cluster_id_is_seed = gpu_hip.mainRun(self.dc_, self.rhoc, self.outlier,
self.ppbin, self.clust_data.coords,
self.clust_data.weight, self.kernel,
self.clust_data.n_dim, block_size,
self.ppbin, data, self.clust_data.weight,
self.kernel, self.clust_data.n_dim, block_size,
device_id)
else:
print("HIP module not found. Please re-compile the library and try again.")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from setuptools import setup

__version__ = "2.0.0"
__version__ = "2.0.1"
this_directory = Path(__file__).parent
long_description = (this_directory/'README.md').read_text()

Expand Down
Loading

0 comments on commit 98b13bd

Please sign in to comment.