diff --git a/docs/release-notes/0.9.4.md b/docs/release-notes/0.9.4.md index d89997b7..e187726b 100644 --- a/docs/release-notes/0.9.4.md +++ b/docs/release-notes/0.9.4.md @@ -1,7 +1,8 @@ -### 0.9.4 {small}`the future` +### 0.9.4 ```{rubric} Features ``` * {func}`~rapids_singlecell.tl.leiden` now provides `random_state` parameter for initializing the optimization {pr}`102` {smaller}`J Pintar & S Dicks` * {func}`~rapids_singlecell.tl.louvain` now provides `threshold` parameter for setting the minimum modularity gain between levels {pr}`103` {smaller}`J Pintar & S Dicks` * added a kernel for sparse sum along the major axis {pr}`112` {smaller}`S Dicks` +* {func}`~rapids_singlecell.pp.calculate_qc_metrics` now uses less atomics and global memory during kernels {pr}`113` {smaller}`S Dicks` diff --git a/src/rapids_singlecell/preprocessing/_kernels/_qc_kernels.py b/src/rapids_singlecell/preprocessing/_kernels/_qc_kernels.py index 36820521..18bbfdc8 100644 --- a/src/rapids_singlecell/preprocessing/_kernels/_qc_kernels.py +++ b/src/rapids_singlecell/preprocessing/_kernels/_qc_kernels.py @@ -12,14 +12,17 @@ int start_idx = indptr[gene]; int stop_idx = indptr[gene+1]; + {0} sums_genes_i = 0; + int gene_ex_i = 0; for(int cell = start_idx; cell < stop_idx; cell++){ {0} value = data[cell]; int cell_number = index[cell]; - atomicAdd(&sums_genes[gene], value); + sums_genes_i += value; atomicAdd(&sums_cells[cell_number], value); - atomicAdd(&gene_ex[gene], 1); + gene_ex_i += 1; atomicAdd(&cell_ex[cell_number], 1); - + sums_genes[gene] = sums_genes_i; + gene_ex[gene] = gene_ex_i; } } """ @@ -36,14 +39,17 @@ int start_idx = indptr[cell]; int stop_idx = indptr[cell+1]; + {0} sums_cells_i = 0; + int cell_ex_i = 0; for(int gene = start_idx; gene < stop_idx; gene++){ {0} value = data[gene]; int gene_number = index[gene]; atomicAdd(&sums_genes[gene_number], value); - atomicAdd(&sums_cells[cell], value); + sums_cells_i += value; atomicAdd(&gene_ex[gene_number], 1); - atomicAdd(&cell_ex[cell], 1); - + cell_ex_i += 1; + sums_cells[cell] = sums_cells_i; + cell_ex[cell] = cell_ex_i; } } """ @@ -58,8 +64,6 @@ if(cell >= n_cells || gene >=n_genes){ return; } - - long long int index = static_cast(cell) * n_genes + gene; {0} value = data[index]; if (value>0.0){ @@ -103,12 +107,14 @@ int start_idx = indptr[cell]; int stop_idx = indptr[cell+1]; + {0} sums_cells_i = 0; for(int gene = start_idx; gene < stop_idx; gene++){ int gene_number = index[gene]; if (mask[gene_number]==true){ - atomicAdd(&sums_cells[cell], data[gene]); + sums_cells_i += data[gene]; } + sums_cells[cell] = sums_cells_i; } } """