Skip to content

Commit

Permalink
Merge branch 'main' into internal-colname-messages
Browse files Browse the repository at this point in the history
karissawhiting committed Sep 18, 2023
2 parents 992cfb4 + 8e59e1d commit 2701e93
Showing 4 changed files with 58 additions and 11 deletions.
2 changes: 1 addition & 1 deletion R/subset-by-frequency.R
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ subset_by_frequency <- function(gene_binary, t = .1, other_vars = NULL) {

# Remove all NA columns ----------------------------------------------
all_na_alt <- apply(alt_only, 2, function(x) {
sum(is.na(x)) == nrow(alt_only)
sum(is.na(x)) == nrow(alt_only)
})

all_non_na_alt <- names(all_na_alt[!all_na_alt])
28 changes: 21 additions & 7 deletions R/summarize-by-gene.R
Original file line number Diff line number Diff line change
@@ -5,6 +5,8 @@
#' columns for mutation/cna/fusion.
#'
#' @param gene_binary a 0/1 matrix of gene alterations
#' @param other_vars One or more column names (quoted or unquoted) in data to be retained
#' in resulting data frame. Default is NULL.
#'
#' @return a binary matrix with a row for each sample and one column per gene
#' @export
@@ -19,7 +21,7 @@
#' ) %>%
#' summarize_by_gene()
#'
summarize_by_gene <- function(gene_binary) {
summarize_by_gene <- function(gene_binary, other_vars = NULL) {


# Checks ------------------------------------------------------------------
@@ -35,15 +37,25 @@ summarize_by_gene <- function(gene_binary) {
cli::cli_abort("Your {.field gene_binary} must have unique samples in {.code sample_id} column")
}

# Capture Other Columns to Retain -----------------------------------

other_vars <-
.select_to_varnames({{ other_vars }},
data = gene_binary,
arg_name = "other_vars"
)


# Create Sample Index -----------------------------------------------------

sample_index <- gene_binary %>%
select("sample_id") %>%
mutate(sample_index = paste0("samp", 1:nrow(gene_binary)))

alt_only <- as.matrix(select(gene_binary, -"sample_id"))
rownames(alt_only) <- sample_index$sample_index

# data frame of only alterations
alt_only <- as.data.frame(select(gene_binary, -"sample_id", -any_of(other_vars)))
row.names(alt_only) <- sample_index$sample_index

# check numeric class ---------
is_numeric <- apply(alt_only, 2, is.numeric)
@@ -89,13 +101,15 @@ summarize_by_gene <- function(gene_binary) {
all_bin <- as.data.frame(t(all_bin)) %>%
tibble::rownames_to_column("sample_index")

# join back to sample ID
# join back to sample ID and other vars
simp_gene_binary <- all_bin %>%
left_join(sample_index, ., by = "sample_index") %>%
select(-c("sample_index")) %>%
as.data.frame()
select(-c("sample_index"))

simp_gene_binary <- simp_gene_binary %>%
left_join(select(gene_binary, any_of(c("sample_id", other_vars))), by = "sample_id")

simp_gene_binary
return(simp_gene_binary)

}

5 changes: 4 additions & 1 deletion man/summarize_by_gene.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 32 additions & 2 deletions tests/testthat/test-summarize-by-gene.R
Original file line number Diff line number Diff line change
@@ -63,7 +63,6 @@ test_that("test that genes are properly summarized", {
mutate(name = str_remove(name, ".Amp|.Del|.fus"))%>%
tidyr::pivot_wider(names_from = name, values_from = value, values_fn = function (x) sum(x))%>%
mutate(across(!sample_id, ~ifelse(. > 0, 1, 0)))%>%
as.data.frame()%>%
relocate(colnames(sum_impact))

expect_equal(sum_impact, bin_impact_test)
@@ -95,7 +94,6 @@ test_that("test what happens to columns with all NA", {
mutate(name = str_remove(name, ".Amp|.Del|.fus"))%>%
tidyr::pivot_wider(names_from = name, values_from = value, values_fn = function (x) sum(x))%>%
mutate(across(!sample_id, ~ifelse(. > 0, 1, 0)))%>%
as.data.frame()%>%
relocate(colnames(sum_impact))%>%
mutate_if(~ all(is.na(.)), ~as.numeric(NA_integer_))

@@ -107,6 +105,37 @@ test_that("test what happens to columns with all NA", {
})


# test_that("all columns must be numeric to continue", {
#
# })

test_that("other vars are retained", {
samples <- Reduce(intersect, list(gnomeR::mutations$sampleId,
gnomeR::cna$sampleId,
gnomeR::sv$sampleId))


bin_impact <- create_gene_binary(samples = samples,
mutation = gnomeR::mutations,
cna = gnomeR::cna,
fusion = gnomeR::sv,
specify_panel = "impact") %>%
select(c(sample_id, starts_with("AR"), starts_with("PLCG2"), starts_with("PPM1D")))

set.seed(20230828)

bin_impact$random_color = sample(c("blue", "red", "yellow"),
size = 50, replace = TRUE)

expect_true("random_color" %in% names(bin_impact))
sum_impact <- summarize_by_gene(bin_impact,
other_vars = "random_color")

expect_true("random_color" %in% names(sum_impact))
expect_true("blue" %in% sum_impact$random_color)
})


test_that("no warning message thrown when only 1 alt type", {

samples <- gnomeR::mutations$sampleId
@@ -125,3 +154,4 @@ test_that("no warning message thrown when only 1 alt type", {




0 comments on commit 2701e93

Please sign in to comment.