Skip to content

Commit

Permalink
update to docs, examples, and smooth function
Browse files Browse the repository at this point in the history
  • Loading branch information
Gibbsdavidl committed Feb 10, 2024
1 parent cd3b90f commit 35a84e4
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 65 deletions.
2 changes: 1 addition & 1 deletion docs/decoupler_api_doc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Gene Set Scoring on the Nearest Neighbor Graph (gssnng) for Single Cell RNA-seq
:maxdepth: 2
Installation
Scoring Functions
Example script
Usage
Scoring Functions
Parameters
Groupby
Gene sets
Expand Down
2 changes: 1 addition & 1 deletion docs/gmt_files_doc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Gene Set Scoring on the Nearest Neighbor Graph (gssnng) for Single Cell RNA-seq
:maxdepth: 2
Installation
Scoring Functions
Example script
Usage
Scoring Functions
Parameters
Groupby
Gene sets
Expand Down
62 changes: 58 additions & 4 deletions docs/smoothing_adatas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Gene Set Scoring on the Nearest Neighbor Graph (gssnng) for Single Cell RNA-seq
:maxdepth: 2
Installation
Scoring Functions
Example script
Usage
Parameters
Expand Down Expand Up @@ -73,13 +72,68 @@ See gssnng/notebooks for examples on all methods.

::

from gssnng import nnsmooth
from gssnng import smoothing

q = sc.datasets.pbmc3k_processed()

q_list = nnsmooth.smooth_adata(adata=q, # AnnData object
q_list = smoothing.smooth_adata(adata=q, # AnnData object
groupby='louvain', # Will sample neighbors within this group, can take a list
smooth_mode='connectivity', # Smooths matrix using distance weights from NN graph.
recompute_neighbors=32, # Rebuild nearest neighbor graph with groups, 0 turns off function
recompute_neighbors=11, # Rebuild nearest neighbor graph with groups, 0 turns off function
cores=4) # Smoothed in parallel.



Parameters
==========

These parameters are used with the "scores_cells.with_gene_sets" function.::

adata: AnnData object from scanpy.read_*
AnnData containing the cells to be scored

groupby: [str, list, dict]
either a column label in adata.obs, and all categories taken, or a dict specifies one group.
SEE DESCRIPTION BELOW

smooth_mode: "adjacency", "connectivity", or "off"
Dictates how to use the neighborhood graph.
`adjacency` weights all neighbors equally, `connectivity` weights close neighbors more

recompute_neighbors: int
should neighbors be recomputed within each group, 0 for no, >0 for yes and specifies N

cores: int
number of parallel processes to work through groupby groups


Groupby
=======

The specific neighborhood for each cell can be controlled by using the groupby parameter. In the example
above, by setting groupby='louvain', only cells within a louvain cluster will be considered as being part of the
neighborhood and will available for sampling.

Groupby specifies a column name that's found in the AnnData.obs table, and it can also take a list of column names.
In that case, cells will be grouped as the intersection of categories. For example, using groupby=['louvain','phenotype']
will take cells that are first in a given louvain cluster and then also in a given phenotype group. By also setting
the recompute_neighbors, the nearest neighbor graph is recomputed within this subset of cells. Controlling the
neighborhood leads to more controlled smoothing of the count matrix and is more suitable for downstream comparisons.


References
==========

rank biased overlap: https://arxiv.org/pdf/1408.3587.pdf

singscore: https://pubmed.ncbi.nlm.nih.gov/30400809/

anndata: https://anndata.readthedocs.io/en/latest/

MSigDB: https://www.gsea-msigdb.org/gsea/msigdb/

ssGSEA: https://gsea-msigdb.github.io/ssGSEA-gpmodule/v10/index.html

decoupler: https://academic.oup.com/bioinformaticsadvances/article/2/1/vbac016/6544613

omnipath: https://omnipathdb.org/
51 changes: 0 additions & 51 deletions gssnng/nnsmooth.py

This file was deleted.

57 changes: 57 additions & 0 deletions gssnng/smoothing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#from gssnng.score_cells import _proc_data
import gssnng
from gssnng.util import error_checking
from typing import Union
import numpy as np
from scipy import sparse
import logging
import anndata


NN_DISTANCE_KEY = 'distances' # scanpy names in .obsp
Expand All @@ -10,6 +15,58 @@
# multiplying should leave a "one-vector" still sum to one


# returns a list of adatas, each with a nearest neighbor smoothed expression matrix
def smooth_adata(
adata: anndata.AnnData,
groupby: Union[str, list, dict],
smooth_mode: str,
recompute_neighbors: int,
cores: int
) -> anndata.AnnData:

"""
returns a list of adatas, each with a nearest neighbor smoothed expression matrix
:param adata
anndata.AnnData containing the cells to be scored
:param groupby
either a column label in adata.obs, and all categories taken, or a dict specifies one group.
:param smooth_mode
`adjacency` or `connectivity`, which representation of the neighborhood graph to use.
`adjacency` weights all neighbors equally, `connectivity` weights close neighbors more
:param recompute_neighbors
should neighbors be recomputed within each group, 0 for no, >0 for yes and specifies N
:param method_params
specific params for each method.
:param cores
number of parallel processes to work through groupby groups
:returns: a list of adatas with smoothed data
"""

return_data = 1
noise_trials = 0 ### not used currently
samp_neighbors = None ### also not used
just_smoothing=1

# no params for now
method_params = dict()

error_checking(adata, samp_neighbors, recompute_neighbors,
None, None, None, method_params, just_smoothing)


# score each cell with the list of gene sets
data_list = gssnng.score_cells._proc_data(adata, None, groupby, smooth_mode, recompute_neighbors,
None, method_params, samp_neighbors,
noise_trials, None, cores, return_data)

print("**done**")
return(data_list)




def get_smoothing_matrix(adata, mode, add_diag):
"""
using the nearest neighbor graph in adata.obsp, calculate the smoothing
Expand Down
18 changes: 10 additions & 8 deletions gssnng/test/example_smoothing_counts.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
if __name__ == '__main__':
from gssnng import smoothing
import scanpy as sc
import time

from gssnng import nnsmooth
import scanpy as sc
import time
if __name__ == '__main__':

print("reading data")
q = sc.datasets.pbmc3k_processed()

t0 = time.time()
print('start time: ' + str(t0))
print('starting the smOOthing')

print("scoring cells")
q_list = smooth_anndatas.smooth_anndata(
q_list = smoothing.smooth_adata(
adata=q,
groupby='louvain',
smooth_mode='connectivity',
recompute_neighbors=0,
recompute_neighbors=11,
cores=8
)

t1 = time.time()

print("Adata List with SMooTHed counts.")
print("Each is a tuple with groupby category and adata as elements.")
print(len(q_list))
for qi in q_list:
print(qi[1] + " X size: " + str(qi[0].X.shape))

print('end time: ' + str(t1))
print('TOTAL TIME: ' + str(t1-t0))
Expand Down

0 comments on commit 35a84e4

Please sign in to comment.