Skip to content

Commit

Permalink
STew Updates - execution time improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
reyvnth committed Jan 17, 2024
1 parent 5581ef5 commit 61b4acf
Show file tree
Hide file tree
Showing 15 changed files with 325 additions and 72 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: STew
Title: STew
Version: 2.0
Version: 2.1
Author: Nanxi Guo, Juan Vargas, Douglas Fritz, Revanth Krishna, Fan Zhang (University of Colorado Anschutz)
Maintainer: Revanth Krishna <[email protected]>
Correspondance: Fan Zhang <[email protected]>
Expand All @@ -15,7 +15,7 @@ BugReports: https://github.com/fanzhanglab/STew/issues
Depends:
R (>= 4.0),
ggplot2 (>= 3.4),
tidyverse
tidyverse,
Remotes:
JEFworks-Lab/MERINGUE,
satijalab/Seurat
Expand Down Expand Up @@ -48,7 +48,7 @@ Imports:
parameters
Suggests: knitr,
rmarkdown,
ggpointdensity,
ggpointdensity
LazyData: true
LazyDataCompression: xz
VignetteBuilder: knitr
5 changes: 4 additions & 1 deletion R/BinarySearch.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
BinarySearch <- function(argu,sumabs){
if(l2n(argu)==0 || sum(abs(argu/l2n(argu)))<=sumabs) return(0)
if (length(sumabs) == 0) {
sumabs <- 0
}
if(l2n(argu)==0 || sum(abs(argu/l2n(argu)))<= sumabs) return(0)
lam1 <- 0
lam2 <- max(abs(argu))-1e-5
iter <- 1
Expand Down
11 changes: 6 additions & 5 deletions R/BuildSNNSeurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,28 @@
#' @param k.param Maximum number of nearest neighbors to compute. Default - 30.
#' @param prune.SNN Sets the cutoff for acceptable Jaccard index when computing the neighborhood overlap for the SNN construction. Any edges with values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the stringency of pruning (0 --- no pruning, 1 --- prune everything). Default - 1/15.
#' @param nn.eps Error bound: default of 0.0 implies exact nearest neighbor search
#' @param obj STew S3 Object
#' @param obj stCCA S3 Object
#' @param cols columns selected
#'
#' @export
#'
#'
#' @import Seurat
#' future
#' @import future
#' future.apply
#' RANN
#' Seurat
BuildSNNSeurat <- function(obj, k.param = 30, prune.SNN = 1/15, nn.eps = 0, cols = NULL) {

environment(BuildSNNSeurat) <- asNamespace("Seurat")

future::plan(multisession, workers = 4)
future::plan(multisession, workers = 8)


data.use <- obj$joint_emb[, cols]
my.knn <- nn2(data = data.use, k = k.param, searchtype = "standard", eps = nn.eps)
nn.ranked <- my.knn$nn.idx

snn_res <- Seurat:::ComputeSNN(nn_ranked = nn.ranked, prune = prune.SNN)

rownames(snn_res) <- row.names(data.use)
colnames(snn_res) <- row.names(data.use)

Expand Down
26 changes: 13 additions & 13 deletions R/CCAAlgorithm.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
CCAAlgorithm <- function(x,z,v,penaltyx,penaltyz,K,niter,trace,upos,uneg,vpos,vneg){

if(K>1) v.init <- v[apply(z^2,2,sum)!=0,]
if(K==1) v.init <- v[apply(z^2,2,sum)!=0]


if(K>1) v.init <- v[colSums(z^2) != 0]
if(K==1) v.init <- v[colSums(z^2) != 0]
v.init <- matrix(v.init,ncol=K)
u=v=d=NULL
xres <- x; zres <- z
xres <- x[,apply(x^2,2,sum)!=0]
zres <- z[,apply(z^2,2,sum)!=0]
xres <- x[, colSums(x^2) != 0]
zres <- z[, colSums(x^2) != 0]
for(k in 1:K){
if(vpos && sum(abs(v.init[v.init[,k]>0,k]))<sum(abs(v.init[v.init[,k]<0,k]))) v.init[,k] <- -v.init[,k]
if(vneg && sum(abs(v.init[v.init[,k]<0,k]))<sum(abs(v.init[v.init[,k]>0,k]))) v.init[,k] <- -v.init[,k]
Expand All @@ -19,16 +18,17 @@ CCAAlgorithm <- function(x,z,v,penaltyx,penaltyz,K,niter,trace,upos,uneg,vpos,vn
u <- cbind(u, out$u)
v <- cbind(v, out$v)
}



ubig <- u
vbig <- v

ubig <- matrix(0,nrow=ncol(x),ncol=K)
ubig[apply(x^2,2,sum)!=0,] <- u


ubig[colSums(x^2) != 0, ] <- u

vbig <- matrix(0,nrow=ncol(z),ncol=K)
vbig[apply(z^2,2,sum)!=0,] <- v

vbig[colSums(z^2) != 0, ] <- v


return(list(u=ubig,v=vbig,d=d))
}
36 changes: 32 additions & 4 deletions R/CCA_permute_both.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,36 @@ CCA.permute.both=
set.seed(seed)
call <- match.call()
if(standardize){
x <- scale(x,TRUE,TRUE)
z <- scale(z,TRUE,TRUE)
# x <- scale(x,TRUE,TRUE)
plan(multisession)
x@x <- x@x / rep.int(colSums(x), diff(x@p))
# Scaling for dense matrix 'z' in parallel



nsplit = 10

# Splitting the matrix
split_indices = cut(1:nrow(z), nsplit, labels = FALSE)

# Convert each part to a sparse matrix
sparseMxList = lapply(1:nsplit, function(i) {
sub_matrix = z[split_indices == i, ]
Matrix(as.matrix(sub_matrix), sparse = TRUE)
})

# Combine the list of sparse matrices into one
sparse.M = Reduce(function(x, y) rbind(x, y), sparseMxList)

# Set row and column names
rownames(sparse.M) <- rownames(z)
colnames(sparse.M) <- rownames(z)

z <- sparse.M

z@x <- z@x / rep.int(colSums(z), diff(z@p))


}
v <- CheckVs(v,x,z,1)
ccperms=nnonzerous.perms=nnonzerovs.perms=matrix(NA, length(penaltyxs), nperms)
Expand All @@ -31,7 +59,7 @@ CCA.permute.both=
nnonzerous[j] <- sum(out$u != 0)
nnonzerovs[j] <- sum(out$v != 0)
if (mean(out$u == 0) != 1 && mean(out$v == 0) != 1) {
ccs[j] <- cor(x %*% out$u, z %*% out$v)
ccs[j] <- cor(as.matrix(x %*% out$u), as.matrix(z %*% out$v))
} else {
ccs[j] <- 0
}
Expand All @@ -40,7 +68,7 @@ CCA.permute.both=
nnonzerous_perm <- sum(out$u != 0)
nnonzerovs_perm <- sum(out$v != 0)
if (mean(out$u == 0) != 1 && mean(out$v == 0) != 1) {
ccperm <- cor(x[sampx, ] %*% out$u, z[sampz, ] %*% out$v)
ccperm <- cor(as.matrix(x[sampx, ] %*% out$u), as.matrix(z[sampz, ] %*% out$v))
} else {
ccperm <- 0
}
Expand Down
5 changes: 3 additions & 2 deletions R/CheckVs.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
CheckVs <- function(v,x,z,K){ # If v is NULL, then get v as appropriate.
##print(list(v=v, x = x, z = z, K = K))
library(RSpectra)
if(!is.null(v) && !is.matrix(v)) v <- matrix(v,nrow=ncol(z))
if(!is.null(v) && ncol(v)<K) v <- NULL
if(!is.null(v) && ncol(v)>K) v <- matrix(v[,1:K],ncol=K)
Expand All @@ -13,9 +14,9 @@ CheckVs <- function(v,x,z,K){ # If v is NULL, then get v as appropriate.
if(attempt==10) stop("Problem computing SVD.")
} else if (is.null(v) && (ncol(z)<=nrow(z) || ncol(x)<=nrow(x))){
attempt <- 1
v <- try(matrix(svd(t(x)%*%z)$v[,1:K],ncol=K), silent=TRUE)
v <- try(matrix(svds(t(x)%*%z, k = K)$v[,1:K],ncol=K), silent=TRUE)
while(("try-error" %in% class(v)) && attempt<10){
v <- try(matrix(svd(t(x)%*%z)$v[,1:K],ncol=K), silent=TRUE)
v <- try(matrix(svds(t(x)%*%z, k = K)$v[,1:K],ncol=K), silent=TRUE)
attempt <- attempt+1
}
if(attempt==10) stop("Problem computing SVD.")
Expand Down
7 changes: 3 additions & 4 deletions R/FindClusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,17 @@
#' @export
#'
#' @import future.apply
#' Seurat
#' future
#' Seurat
FindClusters <- function(resolution_list, obj) {

environment(BuildSNNSeurat) <- asNamespace("Seurat")
future::plan(multisession, workers = 4)
future::plan(multisession, workers = 8)
snn <- obj$snn
ids_cos <- Reduce(cbind, future_lapply(resolution_list, function(res_use) {
Seurat:::RunModularityClustering(SNN = snn, modularity = 1,
resolution = res_use, algorithm = 3, n.start = 10,
n.iter = 10, random.seed = 0, print.output = FALSE,
temp.file.location = NULL, edge.file.name = NULL)
edge.file.name = NULL)
}, future.seed = TRUE))
colnames(ids_cos) <- sprintf("res_%.2f", resolution_list)
ids_cos <- as.data.frame(ids_cos)
Expand Down
34 changes: 29 additions & 5 deletions R/cca_main.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,36 @@ cca_main <- function(x, z, obj = NULL, penaltyx=NULL, penaltyz=NULL, K=1, niter=
if(sum(is.na(x))+sum(is.na(z)) > 0) stop("Cannot have NAs in x or z")
if(nrow(x)!=nrow(z)) stop("x and z must have same number of rows")
if(standardize){
sdx <- apply(x,2,sd)
sdz <- apply(z,2,sd)
sdx <- sqrt(colMeans(STew$exp_adj_matrix ^ 2) - colMeans(STew$exp_adj_matrix) ^ 2)
sdz <- sqrt(colMeans(STew$adj_matrix ^ 2) - colMeans(STew$adj_matrix) ^ 2)
if(min(sdx)==0) stop("Cannot standardize because some of the columns of x have std. dev. 0")
if(min(sdz)==0) stop("Cannot standardize because some of the columns of z have std. dev. 0")
x <- scale(x,TRUE,sdx)
z <- scale(z,TRUE,sdz)
x@x <- x@x / rep.int(colSums(x), diff(x@p))
# Scaling for dense matrix 'z' in parallel



nsplit = 10

# Splitting the matrix
split_indices = cut(1:nrow(z), nsplit, labels = FALSE)

# Convert each part to a sparse matrix
sparseMxList = lapply(1:nsplit, function(i) {
sub_matrix = z[split_indices == i, ]
Matrix(as.matrix(sub_matrix), sparse = TRUE)
})

# Combine the list of sparse matrices into one
sparse.M = Reduce(function(x, y) rbind(x, y), sparseMxList)

# Set row and column names
rownames(sparse.M) <- rownames(z)
colnames(sparse.M) <- rownames(z)

z <- sparse.M

z@x <- z@x / rep.int(colSums(z), diff(z@p))
}
if(!is.null(outcome)){
pheno.out <- CCAPhenotypeZeroSome(x,z,y,qt=.8, cens=cens, outcome=outcome)
Expand Down Expand Up @@ -74,7 +98,7 @@ cca_main <- function(x, z, obj = NULL, penaltyx=NULL, penaltyz=NULL, K=1, niter=
out$v.init <- v
out$cors <- numeric(K)
for(k in 1:K){
if(sum(out$u[,k]!=0)>0 && sum(out$v[,k]!=0)>0) out$cors[k] <- cor(x%*%out$u[,k],z%*%out$v[,k])
if(sum(out$u[,k]!=0)>0 && sum(out$v[,k]!=0)>0) out$cors[k] <- cor(as.matrix(x %*% out$u[, k]), as.matrix(z %*% out$v[, k]))
}
class(out) <- "cca_main"
if(!is.null(obj)) {
Expand Down
Loading

0 comments on commit 61b4acf

Please sign in to comment.