Skip to content

Commit

Permalink
slim down tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Intron7 committed Oct 15, 2024
1 parent b9e4931 commit 9308e21
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 202 deletions.
135 changes: 46 additions & 89 deletions tests/dask/test_dask_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import cupy as cp
import numpy as np
import pytest
from cupyx.scipy import sparse as cusparse
from scanpy.datasets import pbmc3k, pbmc3k_processed
from scipy import sparse
Expand All @@ -13,129 +14,85 @@
)


def test_pca_sparse_dask(client):
sparse_ad = pbmc3k_processed()
default = pbmc3k_processed()
sparse_ad.X = sparse.csr_matrix(sparse_ad.X.astype(np.float64))
default.X = as_sparse_cupy_dask_array(default.X.astype(np.float64))
rsc.pp.pca(sparse_ad)
rsc.pp.pca(default)
@pytest.mark.parametrize("data_kind", ["sparse", "dense"])
def test_pca_dask(client, data_kind):
adata_1 = pbmc3k_processed()
adata_2 = pbmc3k_processed()

cp.testing.assert_allclose(
np.abs(sparse_ad.obsm["X_pca"]),
cp.abs(default.obsm["X_pca"].compute()),
rtol=1e-7,
atol=1e-6,
)
if data_kind == "sparse":
adata_1.X = sparse.csr_matrix(adata_1.X.astype(np.float64))
adata_2.X = as_sparse_cupy_dask_array(adata_2.X.astype(np.float64))
elif data_kind == "dense":
adata_1.X = cp.array(adata_1.X.astype(np.float64))
adata_2.X = as_dense_cupy_dask_array(adata_2.X.astype(np.float64))
else:
raise ValueError(f"Unknown data_kind {data_kind}")

cp.testing.assert_allclose(
np.abs(sparse_ad.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6
)
rsc.pp.pca(adata_1, svd_solver="full")
rsc.pp.pca(adata_2, svd_solver="full")

cp.testing.assert_allclose(
np.abs(sparse_ad.uns["pca"]["variance_ratio"]),
np.abs(default.uns["pca"]["variance_ratio"]),
np.abs(adata_1.obsm["X_pca"]),
cp.abs(adata_2.obsm["X_pca"].compute()),
rtol=1e-7,
atol=1e-6,
)


def test_pca_dense_dask_full_pipeline(client):
dense = pbmc3k()
default = pbmc3k()
dense.X = cp.array(dense.X.astype(np.float64).toarray())
default.X = as_dense_cupy_dask_array(default.X.astype(np.float64).toarray())

rsc.pp.filter_genes(dense, min_count=500)
rsc.pp.filter_genes(default, min_count=500)

rsc.pp.normalize_total(dense, target_sum=1e4)
rsc.pp.normalize_total(default, target_sum=1e4)

rsc.pp.log1p(dense)
rsc.pp.log1p(default)

rsc.pp.pca(dense, svd_solver="full")
rsc.pp.pca(default, svd_solver="full")

cp.testing.assert_allclose(
np.abs(dense.obsm["X_pca"]),
cp.abs(default.obsm["X_pca"].compute()),
np.abs(adata_1.varm["PCs"]),
np.abs(adata_2.varm["PCs"]),
rtol=1e-7,
atol=1e-6,
)

cp.testing.assert_allclose(
np.abs(dense.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6
)

cp.testing.assert_allclose(
np.abs(dense.uns["pca"]["variance_ratio"]),
np.abs(default.uns["pca"]["variance_ratio"]),
np.abs(adata_1.uns["pca"]["variance_ratio"]),
np.abs(adata_2.uns["pca"]["variance_ratio"]),
rtol=1e-7,
atol=1e-6,
)


def test_pca_sparse_dask_full_pipeline(client):
sparse_ad = pbmc3k()
default = pbmc3k()
sparse_ad.X = cusparse.csr_matrix(sparse.csr_matrix(sparse_ad.X.astype(np.float64)))
default.X = as_sparse_cupy_dask_array(default.X.astype(np.float64))
@pytest.mark.parametrize("data_kind", ["sparse", "dense"])
def test_pca_dask_full_pipeline(client, data_kind):
adata_1 = pbmc3k()
adata_2 = pbmc3k()

rsc.pp.filter_genes(sparse_ad, min_count=100)
rsc.pp.filter_genes(default, min_count=100)
if data_kind == "sparse":
adata_1.X = cusparse.csr_matrix(sparse.csr_matrix(adata_1.X.astype(np.float64)))
adata_2.X = as_sparse_cupy_dask_array(adata_2.X.astype(np.float64))
elif data_kind == "dense":
adata_1.X = cp.array(adata_1.X.astype(np.float64).toarray())
adata_2.X = as_dense_cupy_dask_array(adata_2.X.astype(np.float64).toarray())
else:
raise ValueError(f"Unknown data_kind {data_kind}")

rsc.pp.normalize_total(sparse_ad, target_sum=1e4)
rsc.pp.normalize_total(default, target_sum=1e4)
rsc.pp.filter_genes(adata_1, min_count=500)
rsc.pp.filter_genes(adata_2, min_count=500)

rsc.pp.log1p(sparse_ad)
rsc.pp.log1p(default)

rsc.pp.pca(sparse_ad)
rsc.pp.pca(default)

cp.testing.assert_allclose(
np.abs(sparse_ad.obsm["X_pca"]),
cp.abs(default.obsm["X_pca"].compute()),
rtol=1e-7,
atol=1e-6,
)

cp.testing.assert_allclose(
np.abs(sparse_ad.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6
)

cp.testing.assert_allclose(
np.abs(sparse_ad.uns["pca"]["variance_ratio"]),
np.abs(default.uns["pca"]["variance_ratio"]),
rtol=1e-7,
atol=1e-6,
)
rsc.pp.normalize_total(adata_1, target_sum=1e4)
rsc.pp.normalize_total(adata_2, target_sum=1e4)

rsc.pp.log1p(adata_1)
rsc.pp.log1p(adata_2)

def test_pca_dense_dask(client):
sparse_ad = pbmc3k_processed()
default = pbmc3k_processed()
sparse_ad.X = cp.array(sparse_ad.X.astype(np.float64))
default.X = as_dense_cupy_dask_array(default.X.astype(np.float64))
rsc.pp.pca(sparse_ad, svd_solver="full")
rsc.pp.pca(default, svd_solver="full")
rsc.pp.pca(adata_1, svd_solver="full")
rsc.pp.pca(adata_2, svd_solver="full")

cp.testing.assert_allclose(
np.abs(sparse_ad.obsm["X_pca"]),
cp.abs(default.obsm["X_pca"].compute()),
np.abs(adata_1.obsm["X_pca"]),
cp.abs(adata_2.obsm["X_pca"].compute()),
rtol=1e-7,
atol=1e-6,
)

cp.testing.assert_allclose(
np.abs(sparse_ad.varm["PCs"]), np.abs(default.varm["PCs"]), rtol=1e-7, atol=1e-6
np.abs(adata_1.varm["PCs"]), np.abs(adata_2.varm["PCs"]), rtol=1e-7, atol=1e-6
)

cp.testing.assert_allclose(
np.abs(sparse_ad.uns["pca"]["variance_ratio"]),
np.abs(default.uns["pca"]["variance_ratio"]),
np.abs(adata_1.uns["pca"]["variance_ratio"]),
np.abs(adata_2.uns["pca"]["variance_ratio"]),
rtol=1e-7,
atol=1e-6,
)
68 changes: 39 additions & 29 deletions tests/dask/test_normalize_dask.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import cupy as cp
import pytest
import scanpy as sc
from cupyx.scipy import sparse as cusparse
from scanpy.datasets import pbmc3k
Expand All @@ -12,51 +13,60 @@
)


def test_normalize_sparse(client):
@pytest.mark.parametrize("data_kind", ["sparse", "dense"])
def test_normalize_total(client, data_kind):
adata = pbmc3k()
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=3)
dask_data = adata.copy()
dask_data.X = as_sparse_cupy_dask_array(dask_data.X)
adata.X = cusparse.csr_matrix(adata.X)
rsc.pp.normalize_total(adata)
rsc.pp.normalize_total(dask_data)
cp.testing.assert_allclose(adata.X.toarray(), dask_data.X.compute().toarray())

if data_kind == "sparse":
dask_data.X = as_sparse_cupy_dask_array(dask_data.X)
adata.X = cusparse.csr_matrix(adata.X)
elif data_kind == "dense":
dask_data.X = as_dense_cupy_dask_array(dask_data.X)
adata.X = cp.array(adata.X.toarray())
else:
raise ValueError(f"Unknown data_kind {data_kind}")

def test_normalize_dense(client):
adata = pbmc3k()
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=3)
dask_data = adata.copy()
dask_data.X = as_dense_cupy_dask_array(dask_data.X)
adata.X = cp.array(adata.X.toarray())
rsc.pp.normalize_total(adata)
rsc.pp.normalize_total(dask_data)
cp.testing.assert_allclose(adata.X, dask_data.X.compute())

if data_kind == "sparse":
adata_X = adata.X.toarray()
dask_X = dask_data.X.compute().toarray()
else:
adata_X = adata.X
dask_X = dask_data.X.compute()

def test_log1p_sparse(client):
adata = pbmc3k()
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata)
dask_data = adata.copy()
dask_data.X = as_sparse_cupy_dask_array(dask_data.X)
adata.X = cusparse.csr_matrix(adata.X)
rsc.pp.log1p(adata)
rsc.pp.log1p(dask_data)
cp.testing.assert_allclose(adata.X.toarray(), dask_data.X.compute().toarray())
cp.testing.assert_allclose(adata_X, dask_X)


def test_log1p_dense(client):
@pytest.mark.parametrize("data_kind", ["sparse", "dense"])
def test_log1p(client, data_kind):
adata = pbmc3k()
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata)
dask_data = adata.copy()
dask_data.X = as_dense_cupy_dask_array(dask_data.X)
adata.X = cp.array(adata.X.toarray())

if data_kind == "sparse":
dask_data.X = as_sparse_cupy_dask_array(dask_data.X)
adata.X = cusparse.csr_matrix(adata.X)
elif data_kind == "dense":
dask_data.X = as_dense_cupy_dask_array(dask_data.X)
adata.X = cp.array(adata.X.toarray())
else:
raise ValueError(f"Unknown data_kind {data_kind}")

rsc.pp.log1p(adata)
rsc.pp.log1p(dask_data)
cp.testing.assert_allclose(adata.X, dask_data.X.compute())

if data_kind == "sparse":
adata_X = adata.X.toarray()
dask_X = dask_data.X.compute().toarray()
else:
adata_X = adata.X
dask_X = dask_data.X.compute()

cp.testing.assert_allclose(adata_X, dask_X)
56 changes: 11 additions & 45 deletions tests/dask/test_qc_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import cupy as cp
import numpy as np
import pytest
from cupyx.scipy import sparse as cusparse
from scanpy.datasets import pbmc3k

Expand All @@ -12,55 +13,20 @@
)


def test_qc_metrics_sparse(client):
@pytest.mark.parametrize("data_kind", ["sparse", "dense"])
def test_qc_metrics_sparse(client, data_kind):
adata = pbmc3k()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
dask_data = adata.copy()
dask_data.X = as_sparse_cupy_dask_array(dask_data.X)
adata.X = cusparse.csr_matrix(adata.X)
rsc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], log1p=True)
rsc.pp.calculate_qc_metrics(dask_data, qc_vars=["mt"], log1p=True)
np.testing.assert_allclose(
adata.obs["n_genes_by_counts"], dask_data.obs["n_genes_by_counts"]
)
np.testing.assert_allclose(adata.obs["total_counts"], dask_data.obs["total_counts"])
np.testing.assert_allclose(
adata.obs["log1p_n_genes_by_counts"], dask_data.obs["log1p_n_genes_by_counts"]
)
np.testing.assert_allclose(
adata.obs["log1p_total_counts"], dask_data.obs["log1p_total_counts"]
)
np.testing.assert_allclose(
adata.obs["pct_counts_mt"], dask_data.obs["pct_counts_mt"]
)
np.testing.assert_allclose(
adata.obs["total_counts_mt"], dask_data.obs["total_counts_mt"]
)
np.testing.assert_allclose(
adata.obs["log1p_total_counts_mt"], dask_data.obs["log1p_total_counts_mt"]
)
np.testing.assert_allclose(
adata.var["n_cells_by_counts"], dask_data.var["n_cells_by_counts"]
)
np.testing.assert_allclose(adata.var["total_counts"], dask_data.var["total_counts"])
np.testing.assert_allclose(adata.var["mean_counts"], dask_data.var["mean_counts"])
np.testing.assert_allclose(
adata.var["pct_dropout_by_counts"], dask_data.var["pct_dropout_by_counts"]
)
np.testing.assert_allclose(
adata.var["log1p_total_counts"], dask_data.var["log1p_total_counts"]
)
np.testing.assert_allclose(
adata.var["log1p_mean_counts"], dask_data.var["log1p_mean_counts"]
)
if data_kind == "sparse":
dask_data.X = as_sparse_cupy_dask_array(dask_data.X)
adata.X = cusparse.csr_matrix(adata.X)
elif data_kind == "dense":
dask_data.X = as_dense_cupy_dask_array(dask_data.X)
adata.X = cp.array(adata.X.toarray())
else:
raise ValueError(f"Unknown data_kind {data_kind}")


def test_qc_metrics_dense(client):
adata = pbmc3k()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
dask_data = adata.copy()
dask_data.X = as_dense_cupy_dask_array(dask_data.X)
adata.X = cp.array(adata.X.toarray())
rsc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], log1p=True)
rsc.pp.calculate_qc_metrics(dask_data, qc_vars=["mt"], log1p=True)
np.testing.assert_allclose(
Expand Down
Loading

0 comments on commit 9308e21

Please sign in to comment.