Skip to content

Commit

Permalink
HDBSCAN and KMeans API improvements for improving CPU interoperability (
Browse files Browse the repository at this point in the history
#6181)

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - William Hicks (https://github.com/wphicks)

URL: #6181
  • Loading branch information
dantegd authored Dec 24, 2024
1 parent b58e307 commit 7731ba2
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 2 deletions.
6 changes: 6 additions & 0 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ from cuml.common import input_to_cuml_array
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
from cuml.internals.global_settings import GlobalSettings
from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.internals.import_utils import has_hdbscan
Expand Down Expand Up @@ -782,6 +783,9 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
self.n_rows = n_rows
self.n_cols = n_cols

if GlobalSettings().accelerator_active:
self._raw_data = self.X_m.to_output("numpy")

cdef uintptr_t _input_ptr = X_m.ptr

IF GPUBUILD == 1:
Expand Down Expand Up @@ -1133,6 +1137,8 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
self.condensed_tree_._raw_tree
self._cpu_model.single_linkage_tree_ = \
self.single_linkage_tree_._linkage
if hasattr(self, "_raw_data"):
self._cpu_model._raw_data = self._raw_data
if self.gen_min_span_tree:
self._cpu_model.minimum_spanning_tree_ = \
self.minimum_spanning_tree_._mst
Expand Down
6 changes: 5 additions & 1 deletion python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ class KMeans(UniversalBase,
Compute k-means clustering with X.

"""
self._n_features_out = self.n_clusters
if self.init == 'preset':
check_cols = self.n_features_in_
check_dtype = self.dtype
Expand All @@ -302,6 +303,8 @@ class KMeans(UniversalBase,
else None),
check_dtype=check_dtype)

self.feature_names_in_ = _X_m.index

IF GPUBUILD == 1:

cdef uintptr_t input_ptr = _X_m.ptr
Expand Down Expand Up @@ -704,4 +707,5 @@ class KMeans(UniversalBase,

def get_attr_names(self):
return ['cluster_centers_', 'labels_', 'inertia_',
'n_iter_', 'n_features_in_', '_n_threads']
'n_iter_', 'n_features_in_', '_n_threads',
"feature_names_in_", "_n_features_out"]
5 changes: 4 additions & 1 deletion python/cuml/cuml/manifold/umap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -577,11 +577,13 @@ class UMAP(UniversalBase,
convert_format=False)
self.n_rows, self.n_dims = self._raw_data.shape
self.sparse_fit = True
self._sparse_data = True
if self.build_algo == "nn_descent":
raise ValueError("NN Descent does not support sparse inputs")

# Handle dense inputs
else:
self._sparse_data = False
if data_on_host:
convert_to_mem_type = MemoryType.host
else:
Expand Down Expand Up @@ -908,6 +910,7 @@ class UMAP(UniversalBase,
self.metric_kwds, False, self.random_state)

super().gpu_to_cpu()
self._cpu_model._validate_parameters()

@classmethod
def _get_param_names(cls):
Expand Down Expand Up @@ -943,4 +946,4 @@ class UMAP(UniversalBase,
return ['_raw_data', 'embedding_', '_input_hash', '_small_data',
'_knn_dists', '_knn_indices', '_knn_search_index',
'_disconnection_distance', '_n_neighbors', '_a', '_b',
'_initial_alpha']
'_initial_alpha', '_sparse_data']

0 comments on commit 7731ba2

Please sign in to comment.