use base pipe (#112)

* drop magrittr dependency * add downsampled data for vignettes part of #101 and #114
rnabioco · Nov 16, 2022 · 52417db · 52417db
2 parents 17d1a7b + 8e9932e
commit 52417db
Show file tree

Hide file tree

Showing 50 changed files with 169,324 additions and 715 deletions.
diff --git a/R/calc-diversity.R b/R/calc-diversity.R
@@ -12,7 +12,8 @@
 #' output columns.
 #' @param downsample Downsample clusters to the same size when calculating
 #' diversity metrics
-#' @param n_boots Number of bootstrap replicates
+#' @param n_boots Number of bootstrap replicates for calculating standard error,
+#' if n_boots is 0 this will be skipped.
 #' @param chain Chain to use for calculating diversity. Set to NULL to include
 #' all chains.
 #' @param chain_col meta.data column containing chains for each cell
@@ -80,7 +81,7 @@
 #' @export
 calc_diversity <- function(input, data_col, cluster_col = NULL,
                            method = abdiv::simpson, downsample = FALSE,
-                           n_boots = 1, chain = NULL, chain_col = "chains",
+                           n_boots = 0, chain = NULL, chain_col = "chains",
                            prefix = paste0(data_col, "_"), return_df = FALSE,
                            sep = ";") {
 
@@ -96,7 +97,9 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
     method <- purrr::set_names(list(method), nm)
   }
 
-  if (n_boots < 1) stop("n_boots must be an integer >=1.")
+  if (length(n_boots) != 1 || n_boots < 0) {
+    stop("n_boots must be a single value >= 0.")
+  }
 
   # Format input data
   names(method) <- paste0(prefix, names(method))
@@ -173,19 +176,11 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
   if (downsample)            sam <- dplyr::slice_sample(sam, n = sam_sz)
 
   # Calculate diversity
-  .calc_div <- function(x, met, n_bts = n_boots) {
-    fn <- function(x, i) met(table(x[i]))
-
-    bt <- boot::boot(x, fn, R = n_bts)
-
-    broom::tidy(bt)
-  }
-
   div <- purrr::imap_dfr(method, ~ {
     dplyr::summarize(
       sam,
       met       = .y,
-      diversity = list(.calc_div(!!sym(data_col), met = .x)),
+      diversity = list(.calc_div(!!sym(data_col), met = .x, n_bts = n_boots)),
       stderr    = purrr::map_dbl(.data$diversity, pull, "std.error"),
       diversity = purrr::map_dbl(.data$diversity, pull, "statistic")
     )
@@ -219,6 +214,33 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
   res
 }
 
+# Calculate diversity
+# n_boots should be 0 to just calculate stat w/ no reps
+# n_boots must be >1 to calculate standard error
+# t0 is the calculated stat from the original data
+.calc_div <- function(x, met, n_bts) {
+  fn <- function(x, i) {
+    res <- met(table(x[i]))
+
+    if (is.na(res)) {
+      stop(
+        "The diversity metric returned NA. Some metrics will return NA if a ",
+        "cluster is composed entirely of a single clonotype, ",
+        "check your clusters and rerun."
+      )
+    }
+
+    res
+  }
+
+  bt  <- boot::boot(x, fn, R = n_bts)
+  res <- broom::tidy(bt)
+
+  if (n_bts == 0) res <- tibble::tibble(statistic = bt$t0, std.error = NA)
+
+  res
+}
+
 
 #' Plot repertoire diversity
 #'
@@ -236,7 +258,8 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
 #' list(simpson = abdiv::simpson, shannon = abdiv::shannon)
 #' @param downsample Downsample clusters to the same size when calculating
 #' diversity metrics
-#' @param n_boots Number of bootstrap replicates to use for calculating standard error
+#' @param n_boots Number of bootstrap replicates for calculating standard error,
+#' if n_boots is 0 this will be skipped.
 #' @param chain Chain to use for calculating diversity. Set to NULL to include
 #' all chains.
 #' @param chain_col meta.data column containing chains for each cell
@@ -315,7 +338,7 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
 #' @export
 plot_diversity <- function(input, data_col, cluster_col = NULL,
                            group_col = NULL, method = abdiv::simpson,
-                           downsample = FALSE, n_boots = 1, chain = NULL,
+                           downsample = FALSE, n_boots = 0, chain = NULL,
                            chain_col = "chains", plot_colors = NULL,
                            plot_lvls = names(plot_colors), panel_nrow = NULL,
                            panel_scales = "free", sep = ";", ...) {
@@ -324,17 +347,17 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
     stop("Must include names if providing a list of methods.")
   }
 
-  .chk_group_cols(cluster_col, group_col)
+  .chk_group_cols(cluster_col, group_col, input)
 
   if (length(method) == 1 && is.null(names(method))) {
     nm <- as.character(substitute(method))
     nm <- dplyr::last(nm)
 
-    method        <- list(method)
+    method <- list(method)
     names(method) <- nm
   }
 
-  if (!is.null(group_col)) n_boots <- 1
+  if (!is.null(group_col)) n_boots <- 0
 
   # Diversity columns
   div_cols <- paste0(names(method), "_", "diversity")
@@ -344,8 +367,6 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
 
   if (n_boots > 1) all_div_cols <- c(all_div_cols, err_cols)
 
-  plt_cols <- c(cluster_col, group_col, all_div_cols)
-
   # Calculate diversity
   # remove any existing diversity columns from plt_dat
   plt_dat <- .get_meta(input)
@@ -367,7 +388,11 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
 
   # Format data for plotting
   plt_dat <- dplyr::filter(plt_dat, !is.na(!!sym(data_col)))
-  plt_dat <- dplyr::distinct(plt_dat, !!!syms(plt_cols))
+
+  keep_cols <- .get_matching_clmns(plt_dat, c(data_col, cluster_col))
+  keep_cols <- c(cluster_col, data_col, keep_cols)
+  plt_dat   <- dplyr::distinct(plt_dat, !!!syms(keep_cols))
+
   plt_dat <- tidyr::pivot_longer(plt_dat, all_of(all_div_cols))
 
   re <- "^(.+)_(diversity|stderr)$"
@@ -385,6 +410,7 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
   lvls_col <- lvls_col %||% cluster_col
 
   plt_dat <- .set_lvls(plt_dat, lvls_col, plot_lvls)
+  plt_dat <- .set_lvls(plt_dat, "met", names(method))
 
   include_x_labs <- !is.null(cluster_col)
 
@@ -480,8 +506,8 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
 #' @param cluster_col meta.data column containing cluster IDs to use for
 #' grouping cells when calculating clonotype abundance
 #' @param method Method to use for calculating diversity
-#' @param n_boots Number of bootstrap replicates to use for calculating
-#' standard error
+#' @param n_boots Number of bootstrap replicates for calculating standard error,
+#' if n_boots is 0 this will be skipped.
 #' @param chain Chain to use for calculating diversity. Set to NULL to include
 #' all chains.
 #' @param chain_col meta.data column containing chains for each cell

diff --git a/R/data.R b/R/data.R
@@ -21,3 +21,9 @@
 #' A tiny SingleCellExperiment object with V(D)J data
 #' @format A SingleCellExperiment object with 200 features, 204 cells, and 1 assay
 "vdj_sce"
+
+#' A small Seurat object containing downsampled splenocyte data
+#' 
+#' A small Seurat object containing downsampled splenocyte data
+#' @format A Seurat object with 10701 features, 500 cells, and 1 assay
+"splen_so"
diff --git a/R/filter-vdj.R b/R/filter-vdj.R
@@ -119,13 +119,7 @@ filter_vdj <- function(input, filt, data_cols = NULL,
     keep_rows <- unlist(keep_rows)
     vdj_cols  <- vdj_cols[vdj_cols != CELL_COL]
 
-    vdj <- meta %>%
-      mutate(across(all_of(vdj_cols), ~ {
-        x <- .x
-        typ <- typeof(x)
-        x[!keep_rows] <- as(NA, typ)
-        x
-      }))
+    vdj <- mutate(meta, across(all_of(vdj_cols), .add_na, !keep_rows))
 
     res <- .add_meta(input, vdj)
 
@@ -158,20 +152,25 @@ filter_vdj <- function(input, filt, data_cols = NULL,
   other_cols <- vdj_cols[!vdj_cols %in% sep_cols]
   na_rows    <- purrr::map_lgl(keep_rows, ~ !any(.x))
 
-  vdj <- vdj %>%
-    mutate(across(all_of(other_cols), ~ {
-      x <- .x
-      typ <- typeof(x)
-      x[na_rows] <- as(NA, typ)
-      x
-    }))
+  vdj <- mutate(vdj, across(all_of(other_cols), .add_na, na_rows))
 
   # Format results
   res <- .add_meta(input, vdj)
 
   res
 }
 
+#' Insert NAs based on logical index
+#' @noRd
+.add_na <- function(x, lgl_idx) {
+  typ <- typeof(x)
+
+  x[lgl_idx] <- as(NA, typ)
+
+  x
+}
+
+
 
 
 # vdj <- dplyr::mutate(

diff --git a/R/import-vdj.R b/R/import-vdj.R
@@ -939,12 +939,10 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "",
 
   # If no vdj_coords, return mutation totals
   if (identical(vdj_coords, NA)) {
-    res <- all_muts %>%
-      tidyr::pivot_wider(
-        names_from  = "type",
-        values_from = "n",
-        values_fill = 0
-      )
+    res <- tidyr::pivot_wider(
+      all_muts,
+      names_from  = "type", values_from = "n", values_fill = 0
+    )
 
     res <- dplyr::mutate(
       res,

diff --git a/R/mutate-vdj.R b/R/mutate-vdj.R
@@ -389,7 +389,7 @@ summarize_vdj <- function(input, data_cols, fn = NULL, ..., chain = NULL, chain_
         empty_val = NA
       )
 
-      # Add prefix to data_cols so temporary columns are usedvdj %>% hea
+      # Add prefix to data_cols so temporary columns are used
       data_cols <- paste0(prfx, data_cols)
 
       # Set col_names so prefix is removed from columns

diff --git a/R/utils-plots.R b/R/utils-plots.R
@@ -460,7 +460,7 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
 #' @param y Variable to plot on y-axis
 #' @param .fill Variable to use for fill color
 #' @param clrs Vector of colors for plotting
-#' @param trans Method to use for tranforming data
+#' @param trans Method to use for transforming data
 #' @param y_ttl Title for y-axis
 #' @param ang Angle of x-axis text
 #' @param hjst Horizontal justification for x-axis text
@@ -472,7 +472,7 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
                          trans = "identity", y_ttl = y, ang = 45, hjst = 1,
                          ...) {
 
-  # Reverse bar order
+  # Reverse bar order... WHY???
   lvls  <- rev(levels(pull(df_in, x)))
   df_in <- .set_lvls(df_in, x, lvls)
 
@@ -820,13 +820,50 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
 
 #' Check cluster_col and group_col arguments
 #' @noRd
-.chk_group_cols <- function(cluster_col, group_col) {
+.chk_group_cols <- function(cluster_col, group_col, input = NULL) {
   if (!is.null(group_col) && is.null(cluster_col)) {
     stop("cluster_col must be provided when group_col is specified.")
   }
 
   if (!is.null(group_col) && identical(group_col, cluster_col)) {
     stop("group_col and cluster_col must specify different columns.")
   }
+
+  if (!is.null(cluster_col) && !is.null(group_col) && !is.null(input)) {
+    dat <- .get_meta(input)
+
+    chk <- .chk_matching_vals(dat[[cluster_col]], dat[[group_col]])
+
+    if (!chk) {
+      stop(
+        "There must be a single group label for each cluster, ",
+        "i.e. each cluster can only belong to one group. ",
+        "Check the values in group_col and cluster_col."
+      )
+    }
+  }
+}
+
+.chk_matching_vals <- function(x, y) {
+  if (length(x) != length(y)) stop("x and y must be the same length.")
+
+  res <- paste0(x, y)
+
+  dplyr::n_distinct(x) == dplyr::n_distinct(res)
+}
+
+.get_matching_clmns <- function(df, clmn) {
+  dat <- as.list(df)
+
+  clmns <- names(dat)
+  clmns <- clmns[!clmns %in% clmn]
+  clmns <- dat[clmns]
+
+  clmn <- dat[clmn]
+  clmn <- purrr::reduce(clmn, paste0)
+
+  mtch <- purrr::map_lgl(clmns, ~ .chk_matching_vals(clmn, .x))
+
+  names(clmns[mtch])
 }
 
diff --git a/R/utils-tidy-eval.R b/R/utils-tidy-eval.R
@@ -23,8 +23,8 @@
 #'
 #'   ```
 #'   my_function <- function(data, var, ...) {
-#'     data %>%
-#'       group_by(...) %>%
+#'     data |>
+#'       group_by(...) |>
 #'       summarise(mean = mean({{ var }}))
 #'   }
 #'   ```
@@ -43,8 +43,8 @@
 #'     dots <- enquos(...)
 #'
 #'     # Inject
-#'     data %>%
-#'       group_by(!!!dots) %>%
+#'     data |>
+#'       group_by(!!!dots) |>
 #'       summarise(mean = mean(!!var))
 #'   }
 #'   ```
@@ -60,7 +60,7 @@
 #'
 #'   ```
 #'   my_var <- "disp"
-#'   mtcars %>% summarise(mean = mean(.data[[my_var]]))
+#'   mtcars |> summarise(mean = mean(.data[[my_var]]))
 #'   ```
 #'
 #' * Another tidy eval operator is `:=`. It makes it possible to use
@@ -72,7 +72,7 @@
 #'   my_function <- function(data, var, suffix = "foo") {
 #'     # Use `{{` to tunnel function arguments and the usual glue
 #'     # operator `{` to interpolate plain strings.
-#'     data %>%
+#'     data |>
 #'       summarise("{{ var }}_mean_{suffix}" := mean({{ var }}))
 #'   }
 #'   ```
@@ -87,7 +87,7 @@
 #'   my_function <- function(data, var, suffix = "foo") {
 #'     var <- enquo(var)
 #'     prefix <- as_label(var)
-#'     data %>%
+#'     data |>
 #'       summarise("{prefix}_mean_{suffix}" := mean(!!var))
 #'   }
 #'   ```