From 25dd19be76358c644be5b61c58d007173487bdac Mon Sep 17 00:00:00 2001
From: Hannah Fuchs <fuchshannah50@gmail.com>
Date: Mon, 28 Aug 2023 14:50:35 -0400
Subject: [PATCH 1/5] update to allow other vars in summarize

---
 R/summarize-by-gene.R                   | 27 +++++++++++++++++++---
 tests/testthat/test-summarize-by-gene.R | 30 +++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/R/summarize-by-gene.R b/R/summarize-by-gene.R
index 0b875cfe..e069f8a3 100644
--- a/R/summarize-by-gene.R
+++ b/R/summarize-by-gene.R
@@ -5,6 +5,8 @@
 #' columns for mutation/cna/fusion.
 #'
 #' @param gene_binary a 0/1 matrix of gene alterations
+#' @param other_vars One or more column names (quoted or unquoted) in data to be retained
+#' in resulting data frame. Default is NULL.
 #'
 #' @return a binary matrix with a row for each sample and one column per gene
 #' @export
@@ -19,7 +21,7 @@
 #' ) %>%
 #'   summarize_by_gene()
 #'
-summarize_by_gene <- function(gene_binary) {
+summarize_by_gene <- function(gene_binary, other_vars = NULL) {
 
 
   # Checks ------------------------------------------------------------------
@@ -35,6 +37,14 @@ summarize_by_gene <- function(gene_binary) {
     cli::cli_abort("Your {.field gene_binary} must have unique samples in {.code sample_id} column")
   }
 
+  # Capture Other Columns to Retain -----------------------------------
+
+  other_vars <-
+    .select_to_varnames({{ other_vars }},
+                        data = gene_binary,
+                        arg_name = "other_vars"
+    )
+
 
   # Create Sample Index -----------------------------------------------------
 
@@ -42,8 +52,16 @@ summarize_by_gene <- function(gene_binary) {
     select("sample_id") %>%
     mutate(sample_index = paste0("samp", 1:nrow(gene_binary)))
 
+<<<<<<< Updated upstream
   alt_only <- as.matrix(select(gene_binary, -"sample_id"))
   rownames(alt_only) <- sample_index$sample_index
+=======
+  # data frame of only alterations
+  alt_only <- select(gene_binary, -"sample_id", -any_of(other_vars)) %>%
+    as.matrix()
+
+  row.names(alt_only) <- sample_index$sample_index
+>>>>>>> Stashed changes
 
   # check numeric class ---------
   is_numeric <- apply(alt_only, 2, is.numeric)
@@ -85,11 +103,14 @@ summarize_by_gene <- function(gene_binary) {
   all_bin <- as.data.frame(t(all_bin)) %>%
     tibble::rownames_to_column("sample_index")
 
-  # join back to sample ID
+  # join back to sample ID and other vars
   simp_gene_binary <- all_bin %>%
     left_join(sample_index, ., by = "sample_index") %>%
     select(-c("sample_index")) %>%
-    as.data.frame()
+    as.data.frame()%>%
+    left_join(gene_binary %>%
+                select(any_of(c("sample_id", other_vars))))%>%
+    suppressMessages()
 
   simp_gene_binary
 
diff --git a/tests/testthat/test-summarize-by-gene.R b/tests/testthat/test-summarize-by-gene.R
index 0d410056..b359b42e 100644
--- a/tests/testthat/test-summarize-by-gene.R
+++ b/tests/testthat/test-summarize-by-gene.R
@@ -106,5 +106,35 @@ test_that("test what happens to columns with all NA", {
 
   })
 
+<<<<<<< Updated upstream
+=======
+# test_that("all columns must be numeric to continue", {
+#
+# })
+
+test_that("other vars are retained", {
+  samples <- Reduce(intersect, list(gnomeR::mutations$sampleId, gnomeR::cna$sampleId,
+                                    gnomeR::sv$sampleId))
+
+
+  bin_impact <- create_gene_binary(samples=samples,
+                                    mutation = gnomeR::mutations,
+                                    cna = gnomeR::cna,
+                                    fusion = gnomeR::sv,
+                                    specify_panel = "impact") %>%
+    select(c(sample_id, starts_with("AR"), starts_with("PLCG2"), starts_with("PPM1D")))
+
+  set.seed(20230828)
+
+  bin_impact$random_color = sample(c("blue", "red",
+                                     "yellow"), size = 50, replace = TRUE)
+
+  expect_true("random_color" %in% names(bin_impact))
+  sum_impact <- summarize_by_gene(bin_impact,
+                                  other_vars = "random_color")
+
+  expect_true("random_color" %in% names(sum_impact))
+  expect_true("blue" %in% sum_impact$random_color)
+>>>>>>> Stashed changes
 
 

From 4d75bf4fc6c0712a60bc1644a07b7760a326d6b1 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Fri, 15 Sep 2023 14:49:49 -0400
Subject: [PATCH 2/5] Remove github notes and small updates

---
 R/subset-by-frequency.R                 |  2 +-
 R/summarize-by-gene.R                   | 21 +++++++--------------
 man/summarize_by_gene.Rd                |  5 ++++-
 tests/testthat/test-summarize-by-gene.R | 15 ++++++++-------
 4 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/R/subset-by-frequency.R b/R/subset-by-frequency.R
index b1eb84e9..cd6ec6ae 100644
--- a/R/subset-by-frequency.R
+++ b/R/subset-by-frequency.R
@@ -51,7 +51,7 @@ subset_by_frequency <- function(gene_binary, t = .1, other_vars = NULL) {
 
   # Remove all NA columns ----------------------------------------------
   all_na_alt <- apply(alt_only,  2, function(x) {
-    sum(is.na(x)) == nrow(alt_only)
+    # sum(is.na(x)) == nrow(alt_only)
   })
 
   all_non_na_alt <- names(all_na_alt[!all_na_alt])
diff --git a/R/summarize-by-gene.R b/R/summarize-by-gene.R
index e069f8a3..37d4c31b 100644
--- a/R/summarize-by-gene.R
+++ b/R/summarize-by-gene.R
@@ -52,16 +52,10 @@ summarize_by_gene <- function(gene_binary, other_vars = NULL) {
     select("sample_id") %>%
     mutate(sample_index = paste0("samp", 1:nrow(gene_binary)))
 
-<<<<<<< Updated upstream
-  alt_only <- as.matrix(select(gene_binary, -"sample_id"))
-  rownames(alt_only) <- sample_index$sample_index
-=======
-  # data frame of only alterations
-  alt_only <- select(gene_binary, -"sample_id", -any_of(other_vars)) %>%
-    as.matrix()
 
+  # data frame of only alterations
+  alt_only <- as.data.frame(select(gene_binary, -"sample_id", -any_of(other_vars)))
   row.names(alt_only) <- sample_index$sample_index
->>>>>>> Stashed changes
 
   # check numeric class ---------
   is_numeric <- apply(alt_only, 2, is.numeric)
@@ -106,13 +100,12 @@ summarize_by_gene <- function(gene_binary, other_vars = NULL) {
   # join back to sample ID and other vars
   simp_gene_binary <- all_bin %>%
     left_join(sample_index, ., by = "sample_index") %>%
-    select(-c("sample_index")) %>%
-    as.data.frame()%>%
-    left_join(gene_binary %>%
-                select(any_of(c("sample_id", other_vars))))%>%
-    suppressMessages()
+    select(-c("sample_index"))
+
+  simp_gene_binary <- simp_gene_binary %>%
+    left_join(select(gene_binary, any_of(c("sample_id", other_vars))), by = "sample_id")
 
-  simp_gene_binary
+  return(simp_gene_binary)
 
 }
 
diff --git a/man/summarize_by_gene.Rd b/man/summarize_by_gene.Rd
index d2bbb5cc..ff9537b0 100644
--- a/man/summarize_by_gene.Rd
+++ b/man/summarize_by_gene.Rd
@@ -4,10 +4,13 @@
 \alias{summarize_by_gene}
 \title{Simplify binary matrix to one column per gene that counts any alteration type as 1}
 \usage{
-summarize_by_gene(gene_binary)
+summarize_by_gene(gene_binary, other_vars = NULL)
 }
 \arguments{
 \item{gene_binary}{a 0/1 matrix of gene alterations}
+
+\item{other_vars}{One or more column names (quoted or unquoted) in data to be retained
+in resulting data frame. Default is NULL.}
 }
 \value{
 a binary matrix with a row for each sample and one column per gene
diff --git a/tests/testthat/test-summarize-by-gene.R b/tests/testthat/test-summarize-by-gene.R
index b359b42e..70d69055 100644
--- a/tests/testthat/test-summarize-by-gene.R
+++ b/tests/testthat/test-summarize-by-gene.R
@@ -106,18 +106,18 @@ test_that("test what happens to columns with all NA", {
 
   })
 
-<<<<<<< Updated upstream
-=======
+
 # test_that("all columns must be numeric to continue", {
 #
 # })
 
 test_that("other vars are retained", {
-  samples <- Reduce(intersect, list(gnomeR::mutations$sampleId, gnomeR::cna$sampleId,
+  samples <- Reduce(intersect, list(gnomeR::mutations$sampleId,
+                                    gnomeR::cna$sampleId,
                                     gnomeR::sv$sampleId))
 
 
-  bin_impact <- create_gene_binary(samples=samples,
+  bin_impact <- create_gene_binary(samples = samples,
                                     mutation = gnomeR::mutations,
                                     cna = gnomeR::cna,
                                     fusion = gnomeR::sv,
@@ -126,8 +126,8 @@ test_that("other vars are retained", {
 
   set.seed(20230828)
 
-  bin_impact$random_color = sample(c("blue", "red",
-                                     "yellow"), size = 50, replace = TRUE)
+  bin_impact$random_color = sample(c("blue", "red", "yellow"),
+                                   size = 50, replace = TRUE)
 
   expect_true("random_color" %in% names(bin_impact))
   sum_impact <- summarize_by_gene(bin_impact,
@@ -135,6 +135,7 @@ test_that("other vars are retained", {
 
   expect_true("random_color" %in% names(sum_impact))
   expect_true("blue" %in% sum_impact$random_color)
->>>>>>> Stashed changes
+})
+
 
 

From 8aef07ceae3af85451714fdd87c9efcba1682dda Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Fri, 15 Sep 2023 15:08:01 -0400
Subject: [PATCH 3/5] not sure why I commented this out

---
 R/subset-by-frequency.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/subset-by-frequency.R b/R/subset-by-frequency.R
index cd6ec6ae..3d77955f 100644
--- a/R/subset-by-frequency.R
+++ b/R/subset-by-frequency.R
@@ -51,7 +51,7 @@ subset_by_frequency <- function(gene_binary, t = .1, other_vars = NULL) {
 
   # Remove all NA columns ----------------------------------------------
   all_na_alt <- apply(alt_only,  2, function(x) {
-    # sum(is.na(x)) == nrow(alt_only)
+     sum(is.na(x)) == nrow(alt_only)
   })
 
   all_non_na_alt <- names(all_na_alt[!all_na_alt])

From e2ec06b05cb5ce586741f0493094f7b3c19659c0 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Fri, 15 Sep 2023 15:12:12 -0400
Subject: [PATCH 4/5] update tests

---
 tests/testthat/test-summarize-by-gene.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/testthat/test-summarize-by-gene.R b/tests/testthat/test-summarize-by-gene.R
index 96c5bcec..11cae682 100644
--- a/tests/testthat/test-summarize-by-gene.R
+++ b/tests/testthat/test-summarize-by-gene.R
@@ -63,7 +63,6 @@ test_that("test that genes are properly summarized", {
     mutate(name = str_remove(name, ".Amp|.Del|.fus"))%>%
     tidyr::pivot_wider(names_from = name, values_from = value, values_fn = function (x) sum(x))%>%
     mutate(across(!sample_id, ~ifelse(. > 0, 1, 0)))%>%
-    as.data.frame()%>%
     relocate(colnames(sum_impact))
 
   expect_equal(sum_impact, bin_impact_test)

From 69936c85950c953cbef2ad7f4eecff655cfd0399 Mon Sep 17 00:00:00 2001
From: karissawhiting <whitingk@mskcc.org>
Date: Fri, 15 Sep 2023 15:16:43 -0400
Subject: [PATCH 5/5] update test

---
 tests/testthat/test-summarize-by-gene.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/testthat/test-summarize-by-gene.R b/tests/testthat/test-summarize-by-gene.R
index 11cae682..4e3cb0b8 100644
--- a/tests/testthat/test-summarize-by-gene.R
+++ b/tests/testthat/test-summarize-by-gene.R
@@ -94,7 +94,6 @@ test_that("test what happens to columns with all NA", {
     mutate(name = str_remove(name, ".Amp|.Del|.fus"))%>%
     tidyr::pivot_wider(names_from = name, values_from = value, values_fn = function (x) sum(x))%>%
     mutate(across(!sample_id, ~ifelse(. > 0, 1, 0)))%>%
-    as.data.frame()%>%
     relocate(colnames(sum_impact))%>%
     mutate_if(~ all(is.na(.)), ~as.numeric(NA_integer_))