Skip to content

Commit

Permalink
Merge pull request #24 from IlyaLab/main
Browse files Browse the repository at this point in the history
update to the docs branch
  • Loading branch information
Gibbsdavidl authored Feb 9, 2024
2 parents 340d2db + 6bd7951 commit 1a85476
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 20 deletions.
15 changes: 10 additions & 5 deletions gssnng/score_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,15 @@ def run_gssnng(

samp_neighbors = None
error_checking(mat, samp_neighbors, recompute_neighbors,
gs_obj, score_method, ranked, method_params)
gs_obj, score_method, ranked, method_params, 0)

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
all_scores = _proc_data(mat, gs_obj, groupby, smooth_mode, recompute_neighbors,
score_method, method_params, samp_neighbors,
noise_trials, ranked, cores)
noise_trials, ranked, cores, 0)

# warning: the all_scores rows might have a diferent order!
# make sure to resort them according to the mat.obs.index
Expand Down Expand Up @@ -154,15 +154,15 @@ def with_gene_sets(

samp_neighbors = None
error_checking(adata, samp_neighbors, recompute_neighbors,
gs_obj, score_method, ranked, method_params)
gs_obj, score_method, ranked, method_params, 0)

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
all_scores = _proc_data(adata, gs_obj, groupby, smooth_mode, recompute_neighbors,
score_method, method_params, samp_neighbors,
noise_trials, ranked, cores)
noise_trials, ranked, cores, 0)
## join in new results
adata.obs = adata.obs.join(all_scores, how='left')

Expand Down Expand Up @@ -229,7 +229,8 @@ def _proc_data(
samp_neighbors: int,
noise_trials: int,
ranked: bool,
cores: int
cores: int,
return_data: int
):
"""
In many cases, the neighbors should be defined. If you have mixed clinical endpoints,
Expand All @@ -247,6 +248,7 @@ def _proc_data(
:param noise_trials: number of noisy samples to create, integer
:param ranked: whether the gene expression counts should be rank ordered
:param cores: number of parallel processes to work through groupby groups
:param return_data: should the smoothed data list be returned?
:returns: scores in a dict for each cell in a list.
"""
Expand Down Expand Up @@ -284,6 +286,9 @@ def _proc_data(
data_list = _build_data_list(adata, groupby, cats, recompute_neighbors, samp_neighbors, smooth_mode)
# then we can start scoring cells #

if return_data == 1:
return(data_list)

# building up the argument list for the parallel call of _score_all_cells_all_sets
arglist = []
for smoothed_adata, groupname in data_list:
Expand Down
52 changes: 52 additions & 0 deletions gssnng/smooth_anndatas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import anndata
from gssnng.score_cells import _proc_data
from gssnng.util import error_checking
from typing import Union

def smooth_anndata(
adata: anndata.AnnData,
groupby: Union[str, list, dict],
smooth_mode: str,
recompute_neighbors: int,
method_params: dict,
cores: int
) -> anndata.AnnData:

"""
nearest neighbor smoothing of the expression matrix
:param adata
anndata.AnnData containing the cells to be scored
:param groupby
either a column label in adata.obs, and all categories taken, or a dict specifies one group.
:param smooth_mode
`adjacency` or `connectivity`, which representation of the neighborhood graph to use.
`adjacency` weights all neighbors equally, `connectivity` weights close neighbors more
:param recompute_neighbors
should neighbors be recomputed within each group, 0 for no, >0 for yes and specifies N
:param method_params
specific params for each method.
:param cores
number of parallel processes to work through groupby groups
:returns: a list of adatas with smoothed data
"""

return_data = 1
noise_trials = 0 ### not used currently
samp_neighbors = None ### also not used
just_smoothing=1

error_checking(adata, samp_neighbors, recompute_neighbors,
None, None, None, method_params, just_smoothing)

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
data_list = _proc_data(adata, None, groupby, smooth_mode, recompute_neighbors,
None, method_params, samp_neighbors,
noise_trials, None, cores, return_data)

print("**done**")
return(data_list)
29 changes: 29 additions & 0 deletions gssnng/test/test_return_smoothed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
if __name__ == '__main__':

import scanpy as sc
from gssnng.smooth_anndatas import smooth_anndata
import time

def test_return_smoothed(adata):
res0 = smooth_anndata(adata=adata,
groupby='louvain',
smooth_mode='adjacency',
recompute_neighbors=32,
method_params={},
cores=4)
return(res0)


def test_score_all_sets():
q = sc.datasets.pbmc3k_processed()
t0 = time.time()
print('start time: ' + str(t0))
data_list = test_return_smoothed(q)
print('******DONE*******')
t1 = time.time()
print('end time: ' + str(t1))
print('TOTAL TIME: ' + str(t1-t0))
print(len(data_list))

test_score_all_sets()
print('test done')
34 changes: 19 additions & 15 deletions gssnng/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def error_checking(
gs_obj,
score_method,
ranked,
method_params
method_params,
just_smoothing
):
"""
QC on the adata. Need to make sure there's enough neighbors available given the sampling size.
Expand All @@ -23,30 +24,33 @@ def error_checking(
:param samp_neighbors: integer, number of neighbors to sample
"""

if type(method_params) != type(dict()):
raise Exception('ERROR: please use a dictionary to pass method params')

if any([xi in adata.obs.columns for xi in gs_obj.get_gs_names()]):
#raise Exception('ERROR: gene set names in columns of adata.obs, please drop.')
print("Warning! Dropping gene set names from obs!")
genesetlist = [x.name for x in gs_obj.set_list]
for gsi in genesetlist:
print('dropping: ' + gsi)
adata.obs.drop(columns=[gsi], inplace=True)

if 'gssnng_groupby' in adata.obs.columns:
adata.obs.drop(columns='gssnng_groupby', inplace=True)
#raise Exception("Error: please drop 'gssnng_groupby' as a column name.")
print('... and dropping gssnng_groupby column...')

if ranked == False and score_method == 'singscore':
raise Exception('ERROR: singscore requires ranked data, set ranked parameter to True')

if (recompute_neighbors == None) or (recompute_neighbors == 0):
n_neighbors = adata.uns['neighbors']['params']['n_neighbors'] #[0]# in older AnnData versions need this??
else:
n_neighbors = recompute_neighbors

if just_smoothing == 0:
# then do all other checks
if type(method_params) != type(dict()):
raise Exception('ERROR: please use a dictionary to pass method params')

if any([xi in adata.obs.columns for xi in gs_obj.get_gs_names()]):
#raise Exception('ERROR: gene set names in columns of adata.obs, please drop.')
print("Warning! Dropping gene set names from obs!")
genesetlist = [x.name for x in gs_obj.set_list]
for gsi in genesetlist:
print('dropping: ' + gsi)
adata.obs.drop(columns=[gsi], inplace=True)

if ranked == False and score_method == 'singscore':
raise Exception('ERROR: singscore requires ranked data, set ranked parameter to True')


#if n_neighbors < samp_neighbors:
# print('*******')
# print('WARNING: Number of neighbors too low for sampling parameter!')
Expand Down

0 comments on commit 1a85476

Please sign in to comment.