Skip to content

Commit

Permalink
Add code for pseudobulk computation from Seurat objects.
Browse files Browse the repository at this point in the history
  • Loading branch information
ddiez committed Dec 8, 2023
1 parent 4c3a2f3 commit 7f7f212
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 1 deletion.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ S3method(plot_violin,Seurat)
S3method(plot_violin,SingleCellExperiment)
S3method(plot_violin,data.frame)
S3method(process,Seurat)
S3method(pseudobulk,Seurat)
S3method(reduce_dim,SingleCellExperiment)
S3method(reduce_dim,matrix)
S3method(run_enrichment,data.frame)
Expand Down Expand Up @@ -138,6 +139,7 @@ export(plot_velocity)
export(plot_violin)
export(plot_volcano)
export(process)
export(pseudobulk)
export(read_cpdb_out)
export(reduce_dim)
export(run_enrichment)
Expand All @@ -156,6 +158,7 @@ importFrom(ComplexHeatmap,Heatmap)
importFrom(ComplexHeatmap,columnAnnotation)
importFrom(GenomeInfoDb,standardChromosomes)
importFrom(Matrix,t)
importFrom(Seurat,DietSeurat)
importFrom(Seurat,FindClusters)
importFrom(Seurat,FindNeighbors)
importFrom(Seurat,FindVariableFeatures)
Expand Down
57 changes: 57 additions & 0 deletions R/pseudobulk.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' pseudobulk
#'
#' Generate pseudobulk counts of single cell data per cell population and sample name. Return a list of DGEList objects.
#'
#' @param x Seurat object.
#' @param split.by Split single cell data using this column. Typically cell populations or clusters.
#' @param group.by Aggregate counts from cells in each group. Typically sample names.
#' @param samples A data.frame with sample information to include in slot "samples" in the DGEList object.
#' @param genes A data.frame with gene information to include in slot "genes" in the DGEList object.
#' @param assay Seurat assay to use. Default: NULL (default assay).
#' @param layers Seurat layers to use. Default: counts.
#'
#' @export
pseudobulk <- function(x, split.by, group.by, samples=NULL, genes=NULL, ...) {
UseMethod("pseudobulk")
}

#' @rdname pseudobulk
#' @export
pseudobulk.Seurat <- function(x, split.by, group.by, samples=NULL, genes=NULL, assay=NULL, layers="counts", ...) {
assay <- DefaultAssay(x)
x <- Seurat::DietSeurat(x, layers=layers, assay=assay)

groups <- x[[]][[split.by]]
if (!is.factor(groups)) groups <- factor(groups)
group_levels <- levels(groups)
xl <- lapply(group_levels, function(group) {
x[, groups == group]
})
names(xl) <- group_levels

if (is.null(samples)) {
samples <- x[[]][[group.by]]
if (!is.factor(samples))
samples <- levels(factor(samples))
samples <- data.frame(row.names=samples)
}

xl <- lapply(xl, function(x) {
tmp <- Seurat::AggregateExpression(x, pb.method="aggregate", group.by=group.by, assay=assay)[[assay]]
if (!is.null(samples))
tmp <- tmp[, rownames(samples)]
tmp
})

dge <- lapply(names(xl), function(n) {
s <- samples
s$cluster <- n
tmp <- edgeR::DGEList(xl[[n]], samples=s)
tmp$genes <- genes
colnames(tmp) <- unname(colnames(tmp))
tmp
})
names(dge) <- names(xl)

dge
}
2 changes: 1 addition & 1 deletion R/scmisc-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
#' @importFrom grid gpar grid.circle unit.c
#' @importFrom stats prcomp kmeans dist hclust cutree as.dist cor p.adjust setNames
#' @importFrom utils tail head
#' @importFrom Seurat FindClusters FindNeighbors FindVariableFeatures RunPCA RunUMAP ScaleData
#' @importFrom Seurat FindClusters FindNeighbors FindVariableFeatures RunPCA RunUMAP ScaleData DietSeurat
## usethis namespace: end
NULL
38 changes: 38 additions & 0 deletions man/pseudobulk.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions tests/testthat/test-pseudobulk.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
context("test-pseudobulk")

m <- matrix(c(1:24), nrow=3, dimnames=list(paste0("gene-", 1:3), paste0("cell-", 1:8)))
samplename <- rep(c("D1", "D2"), 4)
celltype <- rep(c("A", "B"), each=4)
meta <- data.frame(samplename=samplename, celltype=celltype)

x <- CreateSeuratObject(as.sparse(m), meta=meta)

samples <- data.frame(sex=c("F", "M"), row.names=c("D1", "D2"))

y <- pseudobulk(x, split.by="celltype", group.by="samplename", assay="RNA", samples=samples)

test_that("sort_features works", {
expect_equal(length(y), 2)
expect_equal(dim(y[[1]]), c(3, 2))
expect_equal(dim(y[[2]]), c(3, 2))
expect_equal(dim(y[[1]]$samples), c(2, 5))
expect_equal(dim(y[[2]]$samples), c(2, 5))
expect_null(y[[1]]$genes)
expect_null(y[[2]]$genes)
})

genes <- data.frame(row.names=rownames(m))
y <- pseudobulk(x, split.by="celltype", group.by="samplename", assay="RNA", genes=genes)

test_that("sort_features works", {
expect_equal(length(y), 2)
expect_equal(dim(y[[1]]), c(3, 2))
expect_equal(dim(y[[2]]), c(3, 2))
expect_equal(dim(y[[1]]$samples), c(2, 4))
expect_equal(dim(y[[2]]$samples), c(2, 4))
expect_equal(dim(y[[1]]$genes), c(3, 0))
expect_equal(dim(y[[2]]$genes), c(3, 0))
})

0 comments on commit 7f7f212

Please sign in to comment.