From b478b8c7fda54a140bf576879fbbd413fe06a9a3 Mon Sep 17 00:00:00 2001 From: Severin Dicks <37635888+Intron7@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:22:02 +0100 Subject: [PATCH] Update randomstate scrublet (#301) * update randomstate * update PCA * update get randomstate * move to utils * add release note * revert pca --- docs/release-notes/0.10.12.md | 11 +++++++++++ docs/release-notes/index.md | 2 ++ .../preprocessing/_scrublet/core.py | 3 ++- .../preprocessing/_scrublet/pipeline.py | 12 ++++++------ .../preprocessing/_scrublet/sparse_utils.py | 3 +-- src/rapids_singlecell/preprocessing/_utils.py | 9 +++++++++ 6 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 docs/release-notes/0.10.12.md diff --git a/docs/release-notes/0.10.12.md b/docs/release-notes/0.10.12.md new file mode 100644 index 00000000..40737bed --- /dev/null +++ b/docs/release-notes/0.10.12.md @@ -0,0 +1,11 @@ +### 0.10.12 {small}`the-future` + +```{rubric} Features +``` +```{rubric} Performance +``` +```{rubric} Bug fixes +``` +```{rubric} Misc +``` +* Update `get_random_state` for `scrublet `{pr}`301` {smaller}`S Dicks` diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md index faa775ff..1faf4152 100644 --- a/docs/release-notes/index.md +++ b/docs/release-notes/index.md @@ -3,6 +3,8 @@ # Release notes ## Version 0.10.0 +```{include} /release-notes/0.10.12.md +``` ```{include} /release-notes/0.10.11.md ``` ```{include} /release-notes/0.10.10.md diff --git a/src/rapids_singlecell/preprocessing/_scrublet/core.py b/src/rapids_singlecell/preprocessing/_scrublet/core.py index b8bb2f0d..372623f9 100644 --- a/src/rapids_singlecell/preprocessing/_scrublet/core.py +++ b/src/rapids_singlecell/preprocessing/_scrublet/core.py @@ -9,9 +9,10 @@ from anndata import AnnData, concat from cuml.neighbors import NearestNeighbors from cupyx.scipy import sparse -from scanpy._utils import get_random_state from scanpy.preprocessing._utils import sample_comb +from rapids_singlecell.preprocessing._utils import get_random_state + from .sparse_utils import subsample_counts if TYPE_CHECKING: diff --git a/src/rapids_singlecell/preprocessing/_scrublet/pipeline.py b/src/rapids_singlecell/preprocessing/_scrublet/pipeline.py index 9b3779b6..7f52ece0 100644 --- a/src/rapids_singlecell/preprocessing/_scrublet/pipeline.py +++ b/src/rapids_singlecell/preprocessing/_scrublet/pipeline.py @@ -3,10 +3,9 @@ from typing import TYPE_CHECKING, Literal import cupy as cp -from cupyx import cusparse from cupyx.scipy import sparse -from rapids_singlecell.preprocessing._utils import _get_mean_var +from rapids_singlecell.preprocessing._utils import _get_mean_var, _sparse_to_dense from .sparse_utils import sparse_multiply, sparse_zscore @@ -59,10 +58,10 @@ def truncated_svd( self._counts_obs_norm = self._counts_obs_norm.astype(cp.float32) self._counts_sim_norm = self._counts_sim_norm.astype(cp.float32) - X_obs = cusparse.sparseToDense(self._counts_obs_norm) + X_obs = _sparse_to_dense(self._counts_obs_norm) svd = TruncatedSVD(n_components=n_prin_comps, random_state=random_state).fit(X_obs) X_obs = svd.transform(X_obs) - X_sim = svd.transform(cusparse.sparseToDense(self._counts_sim_norm)) + X_sim = svd.transform(_sparse_to_dense(self._counts_sim_norm)) self.set_manifold(X_obs, X_sim) @@ -79,8 +78,9 @@ def pca( self._counts_obs_norm = self._counts_obs_norm.astype(cp.float32) self._counts_sim_norm = self._counts_sim_norm.astype(cp.float32) - X_obs = cusparse.sparseToDense(self._counts_obs_norm) + X_obs = _sparse_to_dense(self._counts_obs_norm) + pca = PCA(n_components=n_prin_comps, random_state=random_state).fit(X_obs) X_obs = pca.transform(X_obs) - X_sim = pca.transform(cusparse.sparseToDense(self._counts_sim_norm)) + X_sim = pca.transform(_sparse_to_dense(self._counts_sim_norm)) self.set_manifold(X_obs, X_sim) diff --git a/src/rapids_singlecell/preprocessing/_scrublet/sparse_utils.py b/src/rapids_singlecell/preprocessing/_scrublet/sparse_utils.py index dcbc02c0..eec45a15 100644 --- a/src/rapids_singlecell/preprocessing/_scrublet/sparse_utils.py +++ b/src/rapids_singlecell/preprocessing/_scrublet/sparse_utils.py @@ -5,9 +5,8 @@ import cupy as cp import numpy as np from cupyx.scipy import sparse -from scanpy._utils import get_random_state -from rapids_singlecell.preprocessing._utils import _get_mean_var +from rapids_singlecell.preprocessing._utils import _get_mean_var, get_random_state if TYPE_CHECKING: from numpy.typing import NDArray diff --git a/src/rapids_singlecell/preprocessing/_utils.py b/src/rapids_singlecell/preprocessing/_utils.py index 754270fa..1bb533f2 100644 --- a/src/rapids_singlecell/preprocessing/_utils.py +++ b/src/rapids_singlecell/preprocessing/_utils.py @@ -4,11 +4,14 @@ from typing import TYPE_CHECKING, Literal import cupy as cp +import numpy as np from cupyx.scipy.sparse import issparse, isspmatrix_csc, isspmatrix_csr, spmatrix if TYPE_CHECKING: from anndata import AnnData + from rapids_singlecell._utils import AnyRandom + def _sparse_to_dense(X: spmatrix, order: Literal["C", "F"] | None = None) -> cp.ndarray: if order is None: @@ -154,3 +157,9 @@ def _check_use_raw(adata: AnnData, use_raw: None | bool, layer: str | None) -> b if layer is not None: return False return adata.raw is not None + + +def get_random_state(seed: AnyRandom) -> np.random.RandomState: + if isinstance(seed, np.random.RandomState): + return seed + return np.random.RandomState(seed)