Skip to content

Commit

Permalink
use base pipe (#112)
Browse files Browse the repository at this point in the history
* drop magrittr dependency
* add downsampled data for vignettes

part of #101 and #114
  • Loading branch information
sheridar authored Nov 16, 2022
2 parents 17d1a7b + 8e9932e commit 52417db
Show file tree
Hide file tree
Showing 50 changed files with 169,324 additions and 715 deletions.
70 changes: 48 additions & 22 deletions R/calc-diversity.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
#' output columns.
#' @param downsample Downsample clusters to the same size when calculating
#' diversity metrics
#' @param n_boots Number of bootstrap replicates
#' @param n_boots Number of bootstrap replicates for calculating standard error,
#' if n_boots is 0 this will be skipped.
#' @param chain Chain to use for calculating diversity. Set to NULL to include
#' all chains.
#' @param chain_col meta.data column containing chains for each cell
Expand Down Expand Up @@ -80,7 +81,7 @@
#' @export
calc_diversity <- function(input, data_col, cluster_col = NULL,
method = abdiv::simpson, downsample = FALSE,
n_boots = 1, chain = NULL, chain_col = "chains",
n_boots = 0, chain = NULL, chain_col = "chains",
prefix = paste0(data_col, "_"), return_df = FALSE,
sep = ";") {

Expand All @@ -96,7 +97,9 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
method <- purrr::set_names(list(method), nm)
}

if (n_boots < 1) stop("n_boots must be an integer >=1.")
if (length(n_boots) != 1 || n_boots < 0) {
stop("n_boots must be a single value >= 0.")
}

# Format input data
names(method) <- paste0(prefix, names(method))
Expand Down Expand Up @@ -173,19 +176,11 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
if (downsample) sam <- dplyr::slice_sample(sam, n = sam_sz)

# Calculate diversity
.calc_div <- function(x, met, n_bts = n_boots) {
fn <- function(x, i) met(table(x[i]))

bt <- boot::boot(x, fn, R = n_bts)

broom::tidy(bt)
}

div <- purrr::imap_dfr(method, ~ {
dplyr::summarize(
sam,
met = .y,
diversity = list(.calc_div(!!sym(data_col), met = .x)),
diversity = list(.calc_div(!!sym(data_col), met = .x, n_bts = n_boots)),
stderr = purrr::map_dbl(.data$diversity, pull, "std.error"),
diversity = purrr::map_dbl(.data$diversity, pull, "statistic")
)
Expand Down Expand Up @@ -219,6 +214,33 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
res
}

# Calculate diversity
# n_boots should be 0 to just calculate stat w/ no reps
# n_boots must be >1 to calculate standard error
# t0 is the calculated stat from the original data
.calc_div <- function(x, met, n_bts) {
fn <- function(x, i) {
res <- met(table(x[i]))

if (is.na(res)) {
stop(
"The diversity metric returned NA. Some metrics will return NA if a ",
"cluster is composed entirely of a single clonotype, ",
"check your clusters and rerun."
)
}

res
}

bt <- boot::boot(x, fn, R = n_bts)
res <- broom::tidy(bt)

if (n_bts == 0) res <- tibble::tibble(statistic = bt$t0, std.error = NA)

res
}


#' Plot repertoire diversity
#'
Expand All @@ -236,7 +258,8 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
#' list(simpson = abdiv::simpson, shannon = abdiv::shannon)
#' @param downsample Downsample clusters to the same size when calculating
#' diversity metrics
#' @param n_boots Number of bootstrap replicates to use for calculating standard error
#' @param n_boots Number of bootstrap replicates for calculating standard error,
#' if n_boots is 0 this will be skipped.
#' @param chain Chain to use for calculating diversity. Set to NULL to include
#' all chains.
#' @param chain_col meta.data column containing chains for each cell
Expand Down Expand Up @@ -315,7 +338,7 @@ calc_diversity <- function(input, data_col, cluster_col = NULL,
#' @export
plot_diversity <- function(input, data_col, cluster_col = NULL,
group_col = NULL, method = abdiv::simpson,
downsample = FALSE, n_boots = 1, chain = NULL,
downsample = FALSE, n_boots = 0, chain = NULL,
chain_col = "chains", plot_colors = NULL,
plot_lvls = names(plot_colors), panel_nrow = NULL,
panel_scales = "free", sep = ";", ...) {
Expand All @@ -324,17 +347,17 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
stop("Must include names if providing a list of methods.")
}

.chk_group_cols(cluster_col, group_col)
.chk_group_cols(cluster_col, group_col, input)

if (length(method) == 1 && is.null(names(method))) {
nm <- as.character(substitute(method))
nm <- dplyr::last(nm)

method <- list(method)
method <- list(method)
names(method) <- nm
}

if (!is.null(group_col)) n_boots <- 1
if (!is.null(group_col)) n_boots <- 0

# Diversity columns
div_cols <- paste0(names(method), "_", "diversity")
Expand All @@ -344,8 +367,6 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,

if (n_boots > 1) all_div_cols <- c(all_div_cols, err_cols)

plt_cols <- c(cluster_col, group_col, all_div_cols)

# Calculate diversity
# remove any existing diversity columns from plt_dat
plt_dat <- .get_meta(input)
Expand All @@ -367,7 +388,11 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,

# Format data for plotting
plt_dat <- dplyr::filter(plt_dat, !is.na(!!sym(data_col)))
plt_dat <- dplyr::distinct(plt_dat, !!!syms(plt_cols))

keep_cols <- .get_matching_clmns(plt_dat, c(data_col, cluster_col))
keep_cols <- c(cluster_col, data_col, keep_cols)
plt_dat <- dplyr::distinct(plt_dat, !!!syms(keep_cols))

plt_dat <- tidyr::pivot_longer(plt_dat, all_of(all_div_cols))

re <- "^(.+)_(diversity|stderr)$"
Expand All @@ -385,6 +410,7 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
lvls_col <- lvls_col %||% cluster_col

plt_dat <- .set_lvls(plt_dat, lvls_col, plot_lvls)
plt_dat <- .set_lvls(plt_dat, "met", names(method))

include_x_labs <- !is.null(cluster_col)

Expand Down Expand Up @@ -480,8 +506,8 @@ plot_diversity <- function(input, data_col, cluster_col = NULL,
#' @param cluster_col meta.data column containing cluster IDs to use for
#' grouping cells when calculating clonotype abundance
#' @param method Method to use for calculating diversity
#' @param n_boots Number of bootstrap replicates to use for calculating
#' standard error
#' @param n_boots Number of bootstrap replicates for calculating standard error,
#' if n_boots is 0 this will be skipped.
#' @param chain Chain to use for calculating diversity. Set to NULL to include
#' all chains.
#' @param chain_col meta.data column containing chains for each cell
Expand Down
6 changes: 6 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@
#' A tiny SingleCellExperiment object with V(D)J data
#' @format A SingleCellExperiment object with 200 features, 204 cells, and 1 assay
"vdj_sce"

#' A small Seurat object containing downsampled splenocyte data
#'
#' A small Seurat object containing downsampled splenocyte data
#' @format A Seurat object with 10701 features, 500 cells, and 1 assay
"splen_so"
27 changes: 13 additions & 14 deletions R/filter-vdj.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,7 @@ filter_vdj <- function(input, filt, data_cols = NULL,
keep_rows <- unlist(keep_rows)
vdj_cols <- vdj_cols[vdj_cols != CELL_COL]

vdj <- meta %>%
mutate(across(all_of(vdj_cols), ~ {
x <- .x
typ <- typeof(x)
x[!keep_rows] <- as(NA, typ)
x
}))
vdj <- mutate(meta, across(all_of(vdj_cols), .add_na, !keep_rows))

res <- .add_meta(input, vdj)

Expand Down Expand Up @@ -158,20 +152,25 @@ filter_vdj <- function(input, filt, data_cols = NULL,
other_cols <- vdj_cols[!vdj_cols %in% sep_cols]
na_rows <- purrr::map_lgl(keep_rows, ~ !any(.x))

vdj <- vdj %>%
mutate(across(all_of(other_cols), ~ {
x <- .x
typ <- typeof(x)
x[na_rows] <- as(NA, typ)
x
}))
vdj <- mutate(vdj, across(all_of(other_cols), .add_na, na_rows))

# Format results
res <- .add_meta(input, vdj)

res
}

#' Insert NAs based on logical index
#' @noRd
.add_na <- function(x, lgl_idx) {
typ <- typeof(x)

x[lgl_idx] <- as(NA, typ)

x
}




# vdj <- dplyr::mutate(
Expand Down
10 changes: 4 additions & 6 deletions R/import-vdj.R
Original file line number Diff line number Diff line change
Expand Up @@ -939,12 +939,10 @@ import_vdj <- function(input = NULL, vdj_dir = NULL, prefix = "",

# If no vdj_coords, return mutation totals
if (identical(vdj_coords, NA)) {
res <- all_muts %>%
tidyr::pivot_wider(
names_from = "type",
values_from = "n",
values_fill = 0
)
res <- tidyr::pivot_wider(
all_muts,
names_from = "type", values_from = "n", values_fill = 0
)

res <- dplyr::mutate(
res,
Expand Down
2 changes: 1 addition & 1 deletion R/mutate-vdj.R
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ summarize_vdj <- function(input, data_cols, fn = NULL, ..., chain = NULL, chain_
empty_val = NA
)

# Add prefix to data_cols so temporary columns are usedvdj %>% hea
# Add prefix to data_cols so temporary columns are used
data_cols <- paste0(prfx, data_cols)

# Set col_names so prefix is removed from columns
Expand Down
43 changes: 40 additions & 3 deletions R/utils-plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
#' @param y Variable to plot on y-axis
#' @param .fill Variable to use for fill color
#' @param clrs Vector of colors for plotting
#' @param trans Method to use for tranforming data
#' @param trans Method to use for transforming data
#' @param y_ttl Title for y-axis
#' @param ang Angle of x-axis text
#' @param hjst Horizontal justification for x-axis text
Expand All @@ -472,7 +472,7 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {
trans = "identity", y_ttl = y, ang = 45, hjst = 1,
...) {

# Reverse bar order
# Reverse bar order... WHY???
lvls <- rev(levels(pull(df_in, x)))
df_in <- .set_lvls(df_in, x, lvls)

Expand Down Expand Up @@ -820,13 +820,50 @@ trim_lab <- function(x, max_len = 25, ellipsis = "...") {

#' Check cluster_col and group_col arguments
#' @noRd
.chk_group_cols <- function(cluster_col, group_col) {
.chk_group_cols <- function(cluster_col, group_col, input = NULL) {
if (!is.null(group_col) && is.null(cluster_col)) {
stop("cluster_col must be provided when group_col is specified.")
}

if (!is.null(group_col) && identical(group_col, cluster_col)) {
stop("group_col and cluster_col must specify different columns.")
}

if (!is.null(cluster_col) && !is.null(group_col) && !is.null(input)) {
dat <- .get_meta(input)

chk <- .chk_matching_vals(dat[[cluster_col]], dat[[group_col]])

if (!chk) {
stop(
"There must be a single group label for each cluster, ",
"i.e. each cluster can only belong to one group. ",
"Check the values in group_col and cluster_col."
)
}
}
}

.chk_matching_vals <- function(x, y) {
if (length(x) != length(y)) stop("x and y must be the same length.")

res <- paste0(x, y)

dplyr::n_distinct(x) == dplyr::n_distinct(res)
}

.get_matching_clmns <- function(df, clmn) {
dat <- as.list(df)

clmns <- names(dat)
clmns <- clmns[!clmns %in% clmn]
clmns <- dat[clmns]

clmn <- dat[clmn]
clmn <- purrr::reduce(clmn, paste0)

mtch <- purrr::map_lgl(clmns, ~ .chk_matching_vals(clmn, .x))

names(clmns[mtch])
}

14 changes: 7 additions & 7 deletions R/utils-tidy-eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#'
#' ```
#' my_function <- function(data, var, ...) {
#' data %>%
#' group_by(...) %>%
#' data |>
#' group_by(...) |>
#' summarise(mean = mean({{ var }}))
#' }
#' ```
Expand All @@ -43,8 +43,8 @@
#' dots <- enquos(...)
#'
#' # Inject
#' data %>%
#' group_by(!!!dots) %>%
#' data |>
#' group_by(!!!dots) |>
#' summarise(mean = mean(!!var))
#' }
#' ```
Expand All @@ -60,7 +60,7 @@
#'
#' ```
#' my_var <- "disp"
#' mtcars %>% summarise(mean = mean(.data[[my_var]]))
#' mtcars |> summarise(mean = mean(.data[[my_var]]))
#' ```
#'
#' * Another tidy eval operator is `:=`. It makes it possible to use
Expand All @@ -72,7 +72,7 @@
#' my_function <- function(data, var, suffix = "foo") {
#' # Use `{{` to tunnel function arguments and the usual glue
#' # operator `{` to interpolate plain strings.
#' data %>%
#' data |>
#' summarise("{{ var }}_mean_{suffix}" := mean({{ var }}))
#' }
#' ```
Expand All @@ -87,7 +87,7 @@
#' my_function <- function(data, var, suffix = "foo") {
#' var <- enquo(var)
#' prefix <- as_label(var)
#' data %>%
#' data |>
#' summarise("{prefix}_mean_{suffix}" := mean(!!var))
#' }
#' ```
Expand Down
Loading

0 comments on commit 52417db

Please sign in to comment.