Skip to content

Commit

Permalink
Merge pull request #373 from cole-trapnell-lab/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
hpliner authored Jun 11, 2020
2 parents 1a02274 + 161c121 commit 4137e48
Show file tree
Hide file tree
Showing 53 changed files with 1,441 additions and 550 deletions.
83 changes: 48 additions & 35 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,51 +1,64 @@
cache: packages
sudo: required
warnings_are_errors: true
os: linux
dist: xenial
language: r
before_install:
- sudo apt-get install -y libudunits2-dev
- sudo apt-get install -y gdal-bin
- sudo apt-get install -y libgdal1-dev
r:
- bioc-release
r_packages:
- covr
#r_packages:
# - covr
r_github_packages:
- VPetukhov/ggrastr
- cole-trapnell-lab/leidenbase
r_binary_packages:
- assertthat
- dplyr
- devtools
- ggplot2
- ggrepel
- igraph
- irlba
- lmtest
- MASS
- Matrix
- pbapply
- pbmcapply
- pheatmap
- plotly
- plyr
- proxy
- pryr
- pscl
- purrr
- RANN
- Rcpp
- reshape2
- RhpcBLASctl
- roxygen2
- shiny
- slam
- spdep
- speedglm
- stringr
- tibble
- tidyr
- viridis
#r_binary_packages:
r_packages:
- covr
- assertthat
- dplyr
- devtools
- ggplot2
- ggrepel
- igraph
- irlba
- lmtest
- MASS
- Matrix
- pbapply
- pbmcapply
- pheatmap
- plotly
- plyr
- proxy
- pryr
- pscl
- purrr
- RANN
- Rcpp
- reshape2
- RhpcBLASctl
- roxygen2
- shiny
- slam
- spdep
- speedglm
- stringr
- tibble
- tidyr
- viridis

before_script:
- export PKG_NAME=$(Rscript -e 'cat(paste0(devtools::as.package(".")$package))')
- export PKG_TARBALL=$(Rscript -e 'pkg <- devtools::as.package("."); cat(paste0(pkg$package,"_",pkg$version,".tar.gz"))')
- R CMD build --no-build-vignettes .
- R CMD INSTALL ${PKG_TARBALL}
- rm ${PKG_TARBALL}
- echo "Session info:"
- Rscript -e "library(${PKG_NAME});devtools::session_info('${PKG_NAME}')"

script:
- |
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: monocle3
Title: Clustering, differential expression, and trajectory analysis for single-
cell RNA-Seq
Version: 0.2.1
Version: 0.2.2
Authors@R:
person(given = "Hannah",
family = "Pliner",
Expand All @@ -27,7 +27,7 @@ License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
LinkingTo:
Rcpp
Depends:
Expand Down Expand Up @@ -74,7 +74,7 @@ Imports:
speedglm (>= 0.3-2),
stringr (>= 1.4.0),
SummarizedExperiment (>= 1.11.5),
uwot (>= 0.1.3),
uwot (>= 0.1.8),
tibble (>= 2.1.1),
tidyr (>= 0.8.3),
viridis (>= 0.5.1)
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(graph_test)
export(learn_graph)
export(load_a549)
export(load_cellranger_data)
export(load_mm_data)
export(load_mtx_data)
export(model_predictions)
export(new_cell_data_set)
Expand Down
21 changes: 21 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
# monocle3 0.2.2

### Changes
* Added load_mm_data() to load data from matrix market sparse file and gene and cell data files.
* Added rann.k parameter to learn_graph().
* Added speedglm.maxiter parameter to top_marker().

### Bug fixes
* Fixed combine_cds() issues.
* Fixed learn_graph(use_partition=FALSE) issue
* Fixed batchelor::fastMNN(pc.input) deprecation issue
* Fixed choose_graph_segments() issue.
* Fixed missing gaussian family in fit_models().
* Fixed add pseudocount to violin plot.
* Fixed add detect_genes() to fit_models() if needed.
* Fixed compare_models() issues.
* Fixed check for undefined values in fit_models() formula.
* Fixed plot_cells() plotting order issue.
* Fixed find_gene_modules() run-to-run variation issue.
* Fixed rlist package namespace collision.
* Fixed allow short gene names in aggregate_gene_expression(gene_group_df).

# monocle3 0.2.0

Expand Down
9 changes: 3 additions & 6 deletions R/alignment.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,12 @@ align_cds <- function(cds,
"single-cell RNA-sequencing data are corrected by matching",
"mutual nearest neighbors.' Nat. Biotechnol., 36(5),",
"421-427. doi: 10.1038/nbt.4091"))
corrected_PCA = batchelor::fastMNN(as.matrix(preproc_res),
batch=colData(cds)[,alignment_group],
k=alignment_k,
cos.norm=FALSE,
pc.input = TRUE)
corrected_PCA = batchelor::reducedMNN(as.matrix(preproc_res),
batch=colData(cds)[,alignment_group],
k=alignment_k)
preproc_res = corrected_PCA$corrected
cds <- add_citation(cds, "MNN_correct")
}

reducedDims(cds)[["Aligned"]] <- as.matrix(preproc_res)

cds
Expand Down
6 changes: 3 additions & 3 deletions R/cluster_cells.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ cluster_cells <- function(cds,
assertthat::assert_that(!is.null(reducedDims(cds)[[reduction_method]]),
msg = paste("No dimensionality reduction for",
reduction_method, "calculated.",
"Please run reduce_dimensions with",
"Please run reduce_dimension with",
"reduction_method =", reduction_method,
"before running cluster_cells"))

Expand All @@ -119,11 +119,11 @@ cluster_cells <- function(cds,
partition_qval, verbose)
partitions <- igraph::components(
cluster_graph_res$cluster_g)$membership[cluster_result$optim_res$membership]
names(partitions) <- row.names(reduced_dim_res)
partitions <- as.factor(partitions)
} else {
partitions <- rep(1, nrow(colData(cds)))
}
names(partitions) <- row.names(reduced_dim_res)
clusters <- factor(igraph::membership(cluster_result$optim_res))
cds@clusters[[reduction_method]] <- list(cluster_result = cluster_result,
partitions = partitions,
Expand All @@ -144,11 +144,11 @@ cluster_cells <- function(cds,
partition_qval, verbose)
partitions <- igraph::components(
cluster_graph_res$cluster_g)$membership[cluster_result$optim_res$membership]
names(partitions) <- row.names(reduced_dim_res)
partitions <- as.factor(partitions)
} else {
partitions <- rep(1, nrow(colData(cds)))
}
names(partitions) <- row.names(reduced_dim_res)
clusters <- factor(igraph::membership(cluster_result$optim_res))
cds@clusters[[reduction_method]] <- list(cluster_result = cluster_result,
partitions = partitions,
Expand Down
43 changes: 30 additions & 13 deletions R/cluster_genes.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
#' @param umap.fast_sgd Whether to allow UMAP to perform fast stochastic gradient descent. Defaults to TRUE. Setting FALSE will result in slower, but deterministic behavior (if cores=1).
#' @param umap.nn_method The method used for nearest neighbor network construction during UMAP.
#' @param k number of kNN used in creating the k nearest neighbor graph for Louvain clustering. The number of kNN is related to the resolution of the clustering result, bigger number of kNN gives low resolution and vice versa. Default to be 20
#' @param louvain_iter Integer number of iterations used for Louvain clustering. The clustering result gives the largest modularity score will be used as the final clustering result. Default to be 1. Note that if louvain_iter is large than 1, the `seed` argument will be ignored.
#' @param leiden_iter Integer number of iterations used for Leiden clustering. The clustering result with the largest modularity score is used as the final clustering result. Default to be 1.
#' @param partition_qval Significance threshold used in Louvain community graph partitioning.
#' @param weight A logic argument to determine whether or not we will use
#' Jaccard coefficient for two nearest neighbors (based on the overlapping of
#' their kNN) as the weight used for Louvain clustering. Default to be FALSE.
#' @param resolution Resolution parameter passed to Louvain. Can be a list. If
#' so, this method will evaluate modularity at each resolution and use the
#' one with the highest value.
#' @param random_seed the seed used by the random number generator in louvain-igraph package. This argument will be ignored if louvain_iter is larger than 1.
#' @param random_seed the seed used by the random number generator in Leiden.
#' @param cores number of cores computer should use to execute function
#' @param verbose Whether or not verbose output is printed.
#' @param ... Additional arguments passed to UMAP and Louvain analysis.
Expand All @@ -35,15 +35,15 @@ find_gene_modules <- function(cds,
umap.fast_sgd = FALSE,
umap.nn_method = "annoy",
k = 20,
louvain_iter = 1,
leiden_iter = 1,
partition_qval = 0.05,
weight = FALSE,
resolution = NULL,
random_seed = 0L,
cores=1,
verbose = F,
...) {
method = 'louvain'
method = 'leiden'
assertthat::assert_that(
tryCatch(expr = ifelse(match.arg(reduction_method) == "",TRUE, TRUE),
error = function(e) FALSE),
Expand All @@ -55,23 +55,27 @@ find_gene_modules <- function(cds,
assertthat::assert_that(is.character(reduction_method))
assertthat::assert_that(assertthat::is.count(k))
assertthat::assert_that(is.logical(weight))
assertthat::assert_that(assertthat::is.count(louvain_iter))
assertthat::assert_that(assertthat::is.count(leiden_iter))
## TO DO what is resolution?
assertthat::assert_that(is.numeric(partition_qval))
assertthat::assert_that(is.logical(verbose))
assertthat::assert_that(!is.null(reducedDims(cds)[[reduction_method]]),
msg = paste("No dimensionality reduction for",
reduction_method, "calculated.",
"Please run reduce_dimensions with",
"Please run reduce_dimension with",
"reduction_method =", reduction_method,
"before running cluster_cells"))

preprocess_mat <- cds@preprocess_aux$gene_loadings
if (is.null(cds@preprocess_aux$beta) == FALSE){
preprocess_mat = preprocess_mat %*% cds@preprocess_aux$beta
preprocess_mat = preprocess_mat %*% (-cds@preprocess_aux$beta)
}
preprocess_mat = preprocess_mat[intersect(rownames(cds), row.names(preprocess_mat)),]

# uwot::umap uses a random number generator
if( random_seed != 0L )
set.seed( random_seed )

umap_res = uwot::umap(as.matrix(preprocess_mat),
n_components = max_components,
metric = umap.metric,
Expand All @@ -88,14 +92,14 @@ find_gene_modules <- function(cds,
reduced_dim_res <- umap_res

if(verbose)
message("Running louvain clustering algorithm ...")
message("Running leiden clustering algorithm ...")

cluster_result <- leiden_clustering(data = reduced_dim_res,
pd = rowData(cds)[
row.names(reduced_dim_res),,drop=FALSE],
k = k,
weight = weight,
louvain_iter = louvain_iter,
num_iter = leiden_iter,
resolution_parameter = resolution,
random_seed = random_seed,
verbose = verbose, ...)
Expand Down Expand Up @@ -145,7 +149,8 @@ my.aggregate.Matrix = function (x, groupings = NULL, form = NULL, fun = "sum", .
#'
#' @param cds The cell_data_set on which this function operates
#' @param gene_group_df A dataframe in which the first column contains gene ids
#' and the second contains groups. If NULL, genes are not grouped.
#' or short gene names and the second contains groups. If NULL, genes are not
#' grouped.
#' @param cell_group_df A dataframe in which the first column contains cell ids
#' and the second contains groups. If NULL, cells are not grouped.
#' @param norm_method How to transform gene expression values before
Expand Down Expand Up @@ -185,14 +190,26 @@ aggregate_gene_expression <- function(cds,
fData(cds)$gene_short_name |
gene_group_df[,1] %in%
row.names(fData(cds)),,drop=FALSE]

# Convert gene short names to rownames if necessary. The more
# straightforward single call to recode took much longer.
# Thanks to Christopher Johnstone who posted this on github.
short_name_mask <- gene_group_df[[1]] %in% fData(cds)$gene_short_name
if (any(short_name_mask)) {
geneids <- as.character(gene_group_df[[1]])
geneids[short_name_mask] <- row.names(fData(cds))[match(
geneids[short_name_mask], fData(cds)$gene_short_name)]
gene_group_df[[1]] <- geneids
}

# gene_group_df = gene_group_df[row.names(fData(cds)),]

# FIXME: this should allow genes to be part of multiple groups. group_by
# over the second column with a call to colSum should do it.
agg_mat = as.matrix(my.aggregate.Matrix(agg_mat[gene_group_df[,1],],
as.factor(gene_group_df[,2]),
fun="sum"))
if (scale_agg_values){
if (scale_agg_values){
agg_mat <- t(scale(t(agg_mat)))
agg_mat[agg_mat < min_agg_value] <- min_agg_value
agg_mat[agg_mat > max_agg_value] <- max_agg_value
Expand All @@ -206,13 +223,13 @@ aggregate_gene_expression <- function(cds,
drop=FALSE]
agg_mat = agg_mat[,cell_group_df[,1]]
agg_mat = my.aggregate.Matrix(Matrix::t(agg_mat),
as.factor(cell_group_df[,2]),
as.factor(cell_group_df[,2]),
fun="mean")
agg_mat = Matrix::t(agg_mat)
}

if (exclude.na){
agg_mat = agg_mat[row.names(agg_mat) != "NA", colnames(agg_mat) != "NA"]
agg_mat <- agg_mat[rownames(agg_mat) != "NA", colnames(agg_mat) != "NA",drop=FALSE]
}
return(agg_mat)
}
Loading

0 comments on commit 4137e48

Please sign in to comment.