STew Updates - execution time improvements

fanzhanglab · Jan 17, 2024 · 61b4acf · 61b4acf
1 parent 5581ef5
commit 61b4acf
Show file tree

Hide file tree

Showing 15 changed files with 325 additions and 72 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: STew
 Title: STew
-Version: 2.0
+Version: 2.1
 Author: Nanxi Guo, Juan Vargas, Douglas Fritz, Revanth Krishna, Fan Zhang (University of Colorado Anschutz)
 Maintainer: Revanth Krishna <[email protected]>
 Correspondance: Fan Zhang <[email protected]>
@@ -15,7 +15,7 @@ BugReports: https://github.com/fanzhanglab/STew/issues
 Depends: 
     R (>= 4.0),
     ggplot2 (>= 3.4),
-    tidyverse
+    tidyverse,
 Remotes:
     JEFworks-Lab/MERINGUE,
     satijalab/Seurat
@@ -48,7 +48,7 @@ Imports:
     parameters
 Suggests: knitr,
     rmarkdown,
-    ggpointdensity,
+    ggpointdensity
 LazyData: true
 LazyDataCompression: xz
 VignetteBuilder: knitr
diff --git a/R/BinarySearch.R b/R/BinarySearch.R
@@ -1,5 +1,8 @@
 BinarySearch <- function(argu,sumabs){
-  if(l2n(argu)==0 || sum(abs(argu/l2n(argu)))<=sumabs) return(0)
+  if (length(sumabs) == 0) {
+    sumabs <- 0
+  }
+  if(l2n(argu)==0 || sum(abs(argu/l2n(argu)))<= sumabs) return(0)
   lam1 <- 0
   lam2 <- max(abs(argu))-1e-5
   iter <- 1

diff --git a/R/BuildSNNSeurat.R b/R/BuildSNNSeurat.R
@@ -5,27 +5,28 @@
 #' @param k.param  Maximum number of nearest neighbors to compute. Default - 30.
 #' @param prune.SNN Sets the cutoff for acceptable Jaccard index when computing the neighborhood overlap for the SNN construction. Any edges with values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the stringency of pruning (0 --- no pruning, 1 --- prune everything). Default - 1/15.
 #' @param nn.eps Error bound: default of 0.0 implies exact nearest neighbor search
-#' @param obj STew S3 Object
+#' @param obj stCCA S3 Object
 #' @param cols columns selected
 #'
 #' @export
 #'
 #'
-#' @import Seurat
-#' future
+#' @import future
 #' future.apply
 #' RANN
+#' Seurat
 BuildSNNSeurat <- function(obj, k.param = 30, prune.SNN = 1/15, nn.eps = 0, cols = NULL) {
 
-  environment(BuildSNNSeurat) <- asNamespace("Seurat")
 
-  future::plan(multisession, workers = 4)
+  future::plan(multisession, workers = 8)
+
 
   data.use <- obj$joint_emb[, cols]
   my.knn <- nn2(data = data.use, k = k.param, searchtype = "standard", eps = nn.eps)
   nn.ranked <- my.knn$nn.idx
 
   snn_res <- Seurat:::ComputeSNN(nn_ranked = nn.ranked, prune = prune.SNN)
+
   rownames(snn_res) <- row.names(data.use)
   colnames(snn_res) <- row.names(data.use)
 

diff --git a/R/CCAAlgorithm.R b/R/CCAAlgorithm.R
@@ -1,13 +1,12 @@
 CCAAlgorithm <- function(x,z,v,penaltyx,penaltyz,K,niter,trace,upos,uneg,vpos,vneg){
-
-    if(K>1) v.init <- v[apply(z^2,2,sum)!=0,]
-    if(K==1) v.init <- v[apply(z^2,2,sum)!=0]
-
+
+    if(K>1) v.init <- v[colSums(z^2) != 0]
+    if(K==1) v.init <- v[colSums(z^2) != 0]
     v.init <- matrix(v.init,ncol=K)
     u=v=d=NULL
     xres <- x; zres <- z
-    xres <- x[,apply(x^2,2,sum)!=0]
-    zres <- z[,apply(z^2,2,sum)!=0]
+    xres <- x[, colSums(x^2) != 0]
+    zres <- z[, colSums(x^2) != 0]
     for(k in 1:K){
         if(vpos && sum(abs(v.init[v.init[,k]>0,k]))<sum(abs(v.init[v.init[,k]<0,k]))) v.init[,k] <- -v.init[,k]
         if(vneg && sum(abs(v.init[v.init[,k]<0,k]))<sum(abs(v.init[v.init[,k]>0,k]))) v.init[,k] <- -v.init[,k]
@@ -19,16 +18,17 @@ CCAAlgorithm <- function(x,z,v,penaltyx,penaltyz,K,niter,trace,upos,uneg,vpos,vn
         u <- cbind(u, out$u)
         v <- cbind(v, out$v)
     }
-
+
+
     ubig <- u
     vbig <- v
-    
+
     ubig <- matrix(0,nrow=ncol(x),ncol=K)
-    ubig[apply(x^2,2,sum)!=0,] <- u
-
-
+    ubig[colSums(x^2) != 0, ] <- u
+
     vbig <- matrix(0,nrow=ncol(z),ncol=K)
-    vbig[apply(z^2,2,sum)!=0,] <- v
-
+    vbig[colSums(z^2) != 0, ] <- v
+
+
     return(list(u=ubig,v=vbig,d=d))
 }
diff --git a/R/CCA_permute_both.R b/R/CCA_permute_both.R
@@ -6,8 +6,36 @@ CCA.permute.both=
         set.seed(seed)
         call <- match.call()
         if(standardize){
-            x <- scale(x,TRUE,TRUE)
-            z <- scale(z,TRUE,TRUE)
+            # x <- scale(x,TRUE,TRUE)
+          plan(multisession)
+          x@x <- x@x / rep.int(colSums(x), diff(x@p))
+          # Scaling for dense matrix 'z' in parallel
+
+
+
+          nsplit = 10
+
+          # Splitting the matrix
+          split_indices = cut(1:nrow(z), nsplit, labels = FALSE)
+
+          # Convert each part to a sparse matrix
+          sparseMxList = lapply(1:nsplit, function(i) {
+            sub_matrix = z[split_indices == i, ]
+            Matrix(as.matrix(sub_matrix), sparse = TRUE)
+          })
+
+          # Combine the list of sparse matrices into one
+          sparse.M = Reduce(function(x, y) rbind(x, y), sparseMxList)
+
+          # Set row and column names
+          rownames(sparse.M) <- rownames(z)
+          colnames(sparse.M) <- rownames(z)
+
+          z <- sparse.M
+
+          z@x <- z@x / rep.int(colSums(z), diff(z@p))
+
+
         }
         v <- CheckVs(v,x,z,1)
         ccperms=nnonzerous.perms=nnonzerovs.perms=matrix(NA, length(penaltyxs), nperms)
@@ -31,7 +59,7 @@ CCA.permute.both=
                 nnonzerous[j] <- sum(out$u != 0)
                 nnonzerovs[j] <- sum(out$v != 0)
                 if (mean(out$u == 0) != 1 && mean(out$v == 0) != 1) {
-                    ccs[j] <- cor(x %*% out$u, z %*% out$v)
+                    ccs[j] <- cor(as.matrix(x %*% out$u), as.matrix(z %*% out$v))
                 } else {
                     ccs[j] <- 0
                 }
@@ -40,7 +68,7 @@ CCA.permute.both=
             nnonzerous_perm <- sum(out$u != 0)
             nnonzerovs_perm <- sum(out$v != 0)
             if (mean(out$u == 0) != 1 && mean(out$v == 0) != 1) {
-                ccperm <- cor(x[sampx, ] %*% out$u, z[sampz, ] %*% out$v)
+                ccperm <- cor(as.matrix(x[sampx, ] %*% out$u), as.matrix(z[sampz, ] %*% out$v))
             } else {
                 ccperm <- 0
             }

diff --git a/R/CheckVs.R b/R/CheckVs.R
@@ -1,5 +1,6 @@
 CheckVs <- function(v,x,z,K){ # If v is NULL, then get v as appropriate.
   ##print(list(v=v, x = x, z = z, K = K))
+  library(RSpectra)
   if(!is.null(v) && !is.matrix(v)) v <- matrix(v,nrow=ncol(z))
   if(!is.null(v) && ncol(v)<K) v <- NULL
   if(!is.null(v) && ncol(v)>K) v <- matrix(v[,1:K],ncol=K)
@@ -13,9 +14,9 @@ CheckVs <- function(v,x,z,K){ # If v is NULL, then get v as appropriate.
     if(attempt==10) stop("Problem computing SVD.")
   } else if (is.null(v) && (ncol(z)<=nrow(z) || ncol(x)<=nrow(x))){
     attempt <- 1
-    v <- try(matrix(svd(t(x)%*%z)$v[,1:K],ncol=K), silent=TRUE)
+    v <- try(matrix(svds(t(x)%*%z, k = K)$v[,1:K],ncol=K), silent=TRUE)
     while(("try-error" %in% class(v)) && attempt<10){
-      v <- try(matrix(svd(t(x)%*%z)$v[,1:K],ncol=K), silent=TRUE)
+      v <- try(matrix(svds(t(x)%*%z, k = K)$v[,1:K],ncol=K), silent=TRUE)
       attempt <- attempt+1
     }
     if(attempt==10) stop("Problem computing SVD.")

diff --git a/R/FindClusters.R b/R/FindClusters.R
@@ -8,18 +8,17 @@
 #' @export
 #'
 #' @import future.apply
-#' Seurat
 #' future
+#' Seurat
 FindClusters <- function(resolution_list, obj) {
 
-  environment(BuildSNNSeurat) <- asNamespace("Seurat")
-  future::plan(multisession, workers = 4)
+  future::plan(multisession, workers = 8)
   snn <- obj$snn
   ids_cos <- Reduce(cbind, future_lapply(resolution_list, function(res_use) {
     Seurat:::RunModularityClustering(SNN = snn, modularity = 1,
                                      resolution = res_use, algorithm = 3, n.start = 10,
                                      n.iter = 10, random.seed = 0, print.output = FALSE,
-                                     temp.file.location = NULL, edge.file.name = NULL)
+                                 edge.file.name = NULL)
   }, future.seed = TRUE))
   colnames(ids_cos) <- sprintf("res_%.2f", resolution_list)
   ids_cos <- as.data.frame(ids_cos)

diff --git a/R/cca_main.R b/R/cca_main.R
@@ -31,12 +31,36 @@ cca_main <- function(x, z, obj = NULL, penaltyx=NULL, penaltyz=NULL, K=1, niter=
     if(sum(is.na(x))+sum(is.na(z)) > 0) stop("Cannot have NAs in x or z")
     if(nrow(x)!=nrow(z)) stop("x and z must have same number of rows")
     if(standardize){
-        sdx <- apply(x,2,sd)
-        sdz <- apply(z,2,sd)
+        sdx <- sqrt(colMeans(STew$exp_adj_matrix ^ 2) - colMeans(STew$exp_adj_matrix) ^ 2)
+        sdz <- sqrt(colMeans(STew$adj_matrix ^ 2) - colMeans(STew$adj_matrix) ^ 2)
         if(min(sdx)==0) stop("Cannot standardize because some of the columns of x have std. dev. 0")
         if(min(sdz)==0) stop("Cannot standardize because some of the columns of z have std. dev. 0")
-        x <- scale(x,TRUE,sdx)
-        z <- scale(z,TRUE,sdz)
+        x@x <- x@x / rep.int(colSums(x), diff(x@p))
+        # Scaling for dense matrix 'z' in parallel
+
+
+
+        nsplit = 10
+
+        # Splitting the matrix
+        split_indices = cut(1:nrow(z), nsplit, labels = FALSE)
+
+        # Convert each part to a sparse matrix
+        sparseMxList = lapply(1:nsplit, function(i) {
+          sub_matrix = z[split_indices == i, ]
+          Matrix(as.matrix(sub_matrix), sparse = TRUE)
+        })
+
+        # Combine the list of sparse matrices into one
+        sparse.M = Reduce(function(x, y) rbind(x, y), sparseMxList)
+
+        # Set row and column names
+        rownames(sparse.M) <- rownames(z)
+        colnames(sparse.M) <- rownames(z)
+
+        z <- sparse.M
+
+        z@x <- z@x / rep.int(colSums(z), diff(z@p))
     }
     if(!is.null(outcome)){
         pheno.out <- CCAPhenotypeZeroSome(x,z,y,qt=.8, cens=cens, outcome=outcome)
@@ -74,7 +98,7 @@ cca_main <- function(x, z, obj = NULL, penaltyx=NULL, penaltyz=NULL, K=1, niter=
     out$v.init <- v
     out$cors <- numeric(K)
     for(k in 1:K){
-        if(sum(out$u[,k]!=0)>0 && sum(out$v[,k]!=0)>0) out$cors[k] <- cor(x%*%out$u[,k],z%*%out$v[,k])
+        if(sum(out$u[,k]!=0)>0 && sum(out$v[,k]!=0)>0) out$cors[k] <- cor(as.matrix(x %*% out$u[, k]), as.matrix(z %*% out$v[, k]))
     }
     class(out) <- "cca_main"
     if(!is.null(obj)) {