From 9308e211ec52429693a24df457c8cdf326cdacce Mon Sep 17 00:00:00 2001 From: Intron7 Date: Tue, 15 Oct 2024 16:37:28 +0200 Subject: [PATCH] slim down tests --- tests/dask/test_dask_pca.py | 135 ++++++++++-------------------- tests/dask/test_normalize_dask.py | 68 ++++++++------- tests/dask/test_qc_dask.py | 56 +++---------- tests/dask/test_scale_dask.py | 62 +++++--------- 4 files changed, 119 insertions(+), 202 deletions(-) diff --git a/tests/dask/test_dask_pca.py b/tests/dask/test_dask_pca.py index 95801d96..08d6d5d9 100644 --- a/tests/dask/test_dask_pca.py +++ b/tests/dask/test_dask_pca.py @@ -2,6 +2,7 @@ import cupy as cp import numpy as np +import pytest from cupyx.scipy import sparse as cusparse from scanpy.datasets import pbmc3k, pbmc3k_processed from scipy import sparse @@ -13,129 +14,85 @@ ) -def test_pca_sparse_dask(client): - sparse_ad = pbmc3k_processed() - default = pbmc3k_processed() - sparse_ad.X = sparse.csr_matrix(sparse_ad.X.astype(np.float64)) - default.X = as_sparse_cupy_dask_array(default.X.astype(np.float64)) - rsc.pp.pca(sparse_ad) - rsc.pp.pca(default) +@pytest.mark.parametrize("data_kind", ["sparse", "dense"]) +def test_pca_dask(client, data_kind): + adata_1 = pbmc3k_processed() + adata_2 = pbmc3k_processed() - cp.testing.assert_allclose( - np.abs(sparse_ad.obsm["X_pca"]), - cp.abs(default.obsm["X_pca"].compute()), - rtol=1e-7, - atol=1e-6, - ) + if data_kind == "sparse": + adata_1.X = sparse.csr_matrix(adata_1.X.astype(np.float64)) + adata_2.X = as_sparse_cupy_dask_array(adata_2.X.astype(np.float64)) + elif data_kind == "dense": + adata_1.X = cp.array(adata_1.X.astype(np.float64)) + adata_2.X = as_dense_cupy_dask_array(adata_2.X.astype(np.float64)) + else: + raise ValueError(f"Unknown data_kind {data_kind}") - cp.testing.assert_allclose( - np.abs(sparse_ad.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6 - ) + rsc.pp.pca(adata_1, svd_solver="full") + rsc.pp.pca(adata_2, svd_solver="full") cp.testing.assert_allclose( - np.abs(sparse_ad.uns["pca"]["variance_ratio"]), - np.abs(default.uns["pca"]["variance_ratio"]), + np.abs(adata_1.obsm["X_pca"]), + cp.abs(adata_2.obsm["X_pca"].compute()), rtol=1e-7, atol=1e-6, ) - -def test_pca_dense_dask_full_pipeline(client): - dense = pbmc3k() - default = pbmc3k() - dense.X = cp.array(dense.X.astype(np.float64).toarray()) - default.X = as_dense_cupy_dask_array(default.X.astype(np.float64).toarray()) - - rsc.pp.filter_genes(dense, min_count=500) - rsc.pp.filter_genes(default, min_count=500) - - rsc.pp.normalize_total(dense, target_sum=1e4) - rsc.pp.normalize_total(default, target_sum=1e4) - - rsc.pp.log1p(dense) - rsc.pp.log1p(default) - - rsc.pp.pca(dense, svd_solver="full") - rsc.pp.pca(default, svd_solver="full") - cp.testing.assert_allclose( - np.abs(dense.obsm["X_pca"]), - cp.abs(default.obsm["X_pca"].compute()), + np.abs(adata_1.varm["PCs"]), + np.abs(adata_2.varm["PCs"]), rtol=1e-7, atol=1e-6, ) cp.testing.assert_allclose( - np.abs(dense.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6 - ) - - cp.testing.assert_allclose( - np.abs(dense.uns["pca"]["variance_ratio"]), - np.abs(default.uns["pca"]["variance_ratio"]), + np.abs(adata_1.uns["pca"]["variance_ratio"]), + np.abs(adata_2.uns["pca"]["variance_ratio"]), rtol=1e-7, atol=1e-6, ) -def test_pca_sparse_dask_full_pipeline(client): - sparse_ad = pbmc3k() - default = pbmc3k() - sparse_ad.X = cusparse.csr_matrix(sparse.csr_matrix(sparse_ad.X.astype(np.float64))) - default.X = as_sparse_cupy_dask_array(default.X.astype(np.float64)) +@pytest.mark.parametrize("data_kind", ["sparse", "dense"]) +def test_pca_dask_full_pipeline(client, data_kind): + adata_1 = pbmc3k() + adata_2 = pbmc3k() - rsc.pp.filter_genes(sparse_ad, min_count=100) - rsc.pp.filter_genes(default, min_count=100) + if data_kind == "sparse": + adata_1.X = cusparse.csr_matrix(sparse.csr_matrix(adata_1.X.astype(np.float64))) + adata_2.X = as_sparse_cupy_dask_array(adata_2.X.astype(np.float64)) + elif data_kind == "dense": + adata_1.X = cp.array(adata_1.X.astype(np.float64).toarray()) + adata_2.X = as_dense_cupy_dask_array(adata_2.X.astype(np.float64).toarray()) + else: + raise ValueError(f"Unknown data_kind {data_kind}") - rsc.pp.normalize_total(sparse_ad, target_sum=1e4) - rsc.pp.normalize_total(default, target_sum=1e4) + rsc.pp.filter_genes(adata_1, min_count=500) + rsc.pp.filter_genes(adata_2, min_count=500) - rsc.pp.log1p(sparse_ad) - rsc.pp.log1p(default) - - rsc.pp.pca(sparse_ad) - rsc.pp.pca(default) - - cp.testing.assert_allclose( - np.abs(sparse_ad.obsm["X_pca"]), - cp.abs(default.obsm["X_pca"].compute()), - rtol=1e-7, - atol=1e-6, - ) - - cp.testing.assert_allclose( - np.abs(sparse_ad.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6 - ) - - cp.testing.assert_allclose( - np.abs(sparse_ad.uns["pca"]["variance_ratio"]), - np.abs(default.uns["pca"]["variance_ratio"]), - rtol=1e-7, - atol=1e-6, - ) + rsc.pp.normalize_total(adata_1, target_sum=1e4) + rsc.pp.normalize_total(adata_2, target_sum=1e4) + rsc.pp.log1p(adata_1) + rsc.pp.log1p(adata_2) -def test_pca_dense_dask(client): - sparse_ad = pbmc3k_processed() - default = pbmc3k_processed() - sparse_ad.X = cp.array(sparse_ad.X.astype(np.float64)) - default.X = as_dense_cupy_dask_array(default.X.astype(np.float64)) - rsc.pp.pca(sparse_ad, svd_solver="full") - rsc.pp.pca(default, svd_solver="full") + rsc.pp.pca(adata_1, svd_solver="full") + rsc.pp.pca(adata_2, svd_solver="full") cp.testing.assert_allclose( - np.abs(sparse_ad.obsm["X_pca"]), - cp.abs(default.obsm["X_pca"].compute()), + np.abs(adata_1.obsm["X_pca"]), + cp.abs(adata_2.obsm["X_pca"].compute()), rtol=1e-7, atol=1e-6, ) cp.testing.assert_allclose( - np.abs(sparse_ad.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6 + np.abs(adata_1.varm["PCs"]), np.abs(adata_2.varm["PCs"]), rtol=1e-7, atol=1e-6 ) cp.testing.assert_allclose( - np.abs(sparse_ad.uns["pca"]["variance_ratio"]), - np.abs(default.uns["pca"]["variance_ratio"]), + np.abs(adata_1.uns["pca"]["variance_ratio"]), + np.abs(adata_2.uns["pca"]["variance_ratio"]), rtol=1e-7, atol=1e-6, ) diff --git a/tests/dask/test_normalize_dask.py b/tests/dask/test_normalize_dask.py index dcdca21c..5a76111a 100644 --- a/tests/dask/test_normalize_dask.py +++ b/tests/dask/test_normalize_dask.py @@ -1,6 +1,7 @@ from __future__ import annotations import cupy as cp +import pytest import scanpy as sc from cupyx.scipy import sparse as cusparse from scanpy.datasets import pbmc3k @@ -12,51 +13,60 @@ ) -def test_normalize_sparse(client): +@pytest.mark.parametrize("data_kind", ["sparse", "dense"]) +def test_normalize_total(client, data_kind): adata = pbmc3k() sc.pp.filter_cells(adata, min_genes=100) sc.pp.filter_genes(adata, min_cells=3) dask_data = adata.copy() - dask_data.X = as_sparse_cupy_dask_array(dask_data.X) - adata.X = cusparse.csr_matrix(adata.X) - rsc.pp.normalize_total(adata) - rsc.pp.normalize_total(dask_data) - cp.testing.assert_allclose(adata.X.toarray(), dask_data.X.compute().toarray()) + if data_kind == "sparse": + dask_data.X = as_sparse_cupy_dask_array(dask_data.X) + adata.X = cusparse.csr_matrix(adata.X) + elif data_kind == "dense": + dask_data.X = as_dense_cupy_dask_array(dask_data.X) + adata.X = cp.array(adata.X.toarray()) + else: + raise ValueError(f"Unknown data_kind {data_kind}") -def test_normalize_dense(client): - adata = pbmc3k() - sc.pp.filter_cells(adata, min_genes=100) - sc.pp.filter_genes(adata, min_cells=3) - dask_data = adata.copy() - dask_data.X = as_dense_cupy_dask_array(dask_data.X) - adata.X = cp.array(adata.X.toarray()) rsc.pp.normalize_total(adata) rsc.pp.normalize_total(dask_data) - cp.testing.assert_allclose(adata.X, dask_data.X.compute()) + if data_kind == "sparse": + adata_X = adata.X.toarray() + dask_X = dask_data.X.compute().toarray() + else: + adata_X = adata.X + dask_X = dask_data.X.compute() -def test_log1p_sparse(client): - adata = pbmc3k() - sc.pp.filter_cells(adata, min_genes=100) - sc.pp.filter_genes(adata, min_cells=3) - sc.pp.normalize_total(adata) - dask_data = adata.copy() - dask_data.X = as_sparse_cupy_dask_array(dask_data.X) - adata.X = cusparse.csr_matrix(adata.X) - rsc.pp.log1p(adata) - rsc.pp.log1p(dask_data) - cp.testing.assert_allclose(adata.X.toarray(), dask_data.X.compute().toarray()) + cp.testing.assert_allclose(adata_X, dask_X) -def test_log1p_dense(client): +@pytest.mark.parametrize("data_kind", ["sparse", "dense"]) +def test_log1p(client, data_kind): adata = pbmc3k() sc.pp.filter_cells(adata, min_genes=100) sc.pp.filter_genes(adata, min_cells=3) sc.pp.normalize_total(adata) dask_data = adata.copy() - dask_data.X = as_dense_cupy_dask_array(dask_data.X) - adata.X = cp.array(adata.X.toarray()) + + if data_kind == "sparse": + dask_data.X = as_sparse_cupy_dask_array(dask_data.X) + adata.X = cusparse.csr_matrix(adata.X) + elif data_kind == "dense": + dask_data.X = as_dense_cupy_dask_array(dask_data.X) + adata.X = cp.array(adata.X.toarray()) + else: + raise ValueError(f"Unknown data_kind {data_kind}") + rsc.pp.log1p(adata) rsc.pp.log1p(dask_data) - cp.testing.assert_allclose(adata.X, dask_data.X.compute()) + + if data_kind == "sparse": + adata_X = adata.X.toarray() + dask_X = dask_data.X.compute().toarray() + else: + adata_X = adata.X + dask_X = dask_data.X.compute() + + cp.testing.assert_allclose(adata_X, dask_X) diff --git a/tests/dask/test_qc_dask.py b/tests/dask/test_qc_dask.py index f1fd5f9c..2beafc85 100644 --- a/tests/dask/test_qc_dask.py +++ b/tests/dask/test_qc_dask.py @@ -2,6 +2,7 @@ import cupy as cp import numpy as np +import pytest from cupyx.scipy import sparse as cusparse from scanpy.datasets import pbmc3k @@ -12,55 +13,20 @@ ) -def test_qc_metrics_sparse(client): +@pytest.mark.parametrize("data_kind", ["sparse", "dense"]) +def test_qc_metrics_sparse(client, data_kind): adata = pbmc3k() adata.var["mt"] = adata.var_names.str.startswith("MT-") dask_data = adata.copy() - dask_data.X = as_sparse_cupy_dask_array(dask_data.X) - adata.X = cusparse.csr_matrix(adata.X) - rsc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], log1p=True) - rsc.pp.calculate_qc_metrics(dask_data, qc_vars=["mt"], log1p=True) - np.testing.assert_allclose( - adata.obs["n_genes_by_counts"], dask_data.obs["n_genes_by_counts"] - ) - np.testing.assert_allclose(adata.obs["total_counts"], dask_data.obs["total_counts"]) - np.testing.assert_allclose( - adata.obs["log1p_n_genes_by_counts"], dask_data.obs["log1p_n_genes_by_counts"] - ) - np.testing.assert_allclose( - adata.obs["log1p_total_counts"], dask_data.obs["log1p_total_counts"] - ) - np.testing.assert_allclose( - adata.obs["pct_counts_mt"], dask_data.obs["pct_counts_mt"] - ) - np.testing.assert_allclose( - adata.obs["total_counts_mt"], dask_data.obs["total_counts_mt"] - ) - np.testing.assert_allclose( - adata.obs["log1p_total_counts_mt"], dask_data.obs["log1p_total_counts_mt"] - ) - np.testing.assert_allclose( - adata.var["n_cells_by_counts"], dask_data.var["n_cells_by_counts"] - ) - np.testing.assert_allclose(adata.var["total_counts"], dask_data.var["total_counts"]) - np.testing.assert_allclose(adata.var["mean_counts"], dask_data.var["mean_counts"]) - np.testing.assert_allclose( - adata.var["pct_dropout_by_counts"], dask_data.var["pct_dropout_by_counts"] - ) - np.testing.assert_allclose( - adata.var["log1p_total_counts"], dask_data.var["log1p_total_counts"] - ) - np.testing.assert_allclose( - adata.var["log1p_mean_counts"], dask_data.var["log1p_mean_counts"] - ) + if data_kind == "sparse": + dask_data.X = as_sparse_cupy_dask_array(dask_data.X) + adata.X = cusparse.csr_matrix(adata.X) + elif data_kind == "dense": + dask_data.X = as_dense_cupy_dask_array(dask_data.X) + adata.X = cp.array(adata.X.toarray()) + else: + raise ValueError(f"Unknown data_kind {data_kind}") - -def test_qc_metrics_dense(client): - adata = pbmc3k() - adata.var["mt"] = adata.var_names.str.startswith("MT-") - dask_data = adata.copy() - dask_data.X = as_dense_cupy_dask_array(dask_data.X) - adata.X = cp.array(adata.X.toarray()) rsc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], log1p=True) rsc.pp.calculate_qc_metrics(dask_data, qc_vars=["mt"], log1p=True) np.testing.assert_allclose( diff --git a/tests/dask/test_scale_dask.py b/tests/dask/test_scale_dask.py index 6a9a7351..d8c3eb1f 100644 --- a/tests/dask/test_scale_dask.py +++ b/tests/dask/test_scale_dask.py @@ -2,6 +2,7 @@ import cupy as cp import numpy as np +import pytest import scanpy as sc from cupyx.scipy import sparse as cusparse from scanpy.datasets import pbmc3k @@ -23,45 +24,28 @@ def _get_anndata(): return adata.copy() -def test_zc_sparse(client): +@pytest.mark.parametrize("data_kind", ["sparse", "dense"]) +@pytest.mark.parametrize("zero_center", [True, False]) +def test_scale(client, data_kind, zero_center): adata = _get_anndata() - mask = np.random.randint(0, 2, adata.shape[0], dtype=np.bool_) + mask = np.random.randint(0, 2, adata.shape[0], dtype=bool) dask_data = adata.copy() - dask_data.X = as_sparse_cupy_dask_array(dask_data.X.astype(np.float64)) - adata.X = cusparse.csr_matrix(adata.X.astype(np.float64)) - rsc.pp.scale(adata, mask_obs=mask, max_value=10) - rsc.pp.scale(dask_data, mask_obs=mask, max_value=10) - cp.testing.assert_allclose(adata.X, dask_data.X.compute()) - -def test_nzc_sparse(client): - adata = _get_anndata() - mask = np.random.randint(0, 2, adata.shape[0], dtype=np.bool_) - dask_data = adata.copy() - dask_data.X = as_sparse_cupy_dask_array(dask_data.X) - adata.X = cusparse.csr_matrix(adata.X) - rsc.pp.scale(adata, zero_center=False, mask_obs=mask, max_value=10) - rsc.pp.scale(dask_data, zero_center=False, mask_obs=mask, max_value=10) - cp.testing.assert_allclose(adata.X.toarray(), dask_data.X.compute().toarray()) - - -def test_zc_dense(client): - adata = _get_anndata() - mask = np.random.randint(0, 2, adata.shape[0], dtype=np.bool_) - dask_data = adata.copy() - dask_data.X = as_dense_cupy_dask_array(dask_data.X.astype(np.float64)) - adata.X = cp.array(adata.X.toarray().astype(np.float64)) - rsc.pp.scale(adata, mask_obs=mask, max_value=10) - rsc.pp.scale(dask_data, mask_obs=mask, max_value=10) - cp.testing.assert_allclose(adata.X, dask_data.X.compute()) - - -def test_nzc_dense(client): - adata = _get_anndata() - mask = np.random.randint(0, 2, adata.shape[0], dtype=np.bool_) - dask_data = adata.copy() - dask_data.X = as_dense_cupy_dask_array(dask_data.X.astype(np.float64)) - adata.X = cp.array(adata.X.toarray().astype(np.float64)) - rsc.pp.scale(adata, zero_center=False, mask_obs=mask, max_value=10) - rsc.pp.scale(dask_data, zero_center=False, mask_obs=mask, max_value=10) - cp.testing.assert_allclose(adata.X, dask_data.X.compute()) + if data_kind == "sparse": + dask_data.X = as_sparse_cupy_dask_array(dask_data.X.astype(np.float64)) + adata.X = cusparse.csr_matrix(adata.X.astype(np.float64)) + elif data_kind == "dense": + dask_data.X = as_dense_cupy_dask_array(dask_data.X.astype(np.float64)) + adata.X = cp.array(adata.X.toarray().astype(np.float64)) + else: + raise ValueError(f"Unknown data_kind {data_kind}") + + rsc.pp.scale(adata, zero_center=zero_center, mask_obs=mask, max_value=10) + rsc.pp.scale(dask_data, zero_center=zero_center, mask_obs=mask, max_value=10) + if data_kind == "sparse" and not zero_center: + adata_X = adata.X.toarray() + dask_X = dask_data.X.compute().toarray() + else: + adata_X = adata.X + dask_X = dask_data.X.compute() + cp.testing.assert_allclose(adata_X, dask_X)