update with main branch

Merge branch 'main' of https://github.com/karissawhiting/cbioportalR into get-study-id-for-samples # Conflicts: # vignettes/overview-of-workflow.Rmd
karissawhiting · Jun 1, 2023 · c1964d4 · c1964d4
2 parents df49cd5 + 617fd75
commit c1964d4
Show file tree

Hide file tree

Showing 22 changed files with 403 additions and 147 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cbioportalR
 Title: Browse and Query Clinical and Genomic Data from cBioPortal
-Version: 1.0.1.9000
+Version: 1.0.1.9002
 Authors@R: 
     c(person(given = "Karissa",
            family = "Whiting",

diff --git a/NAMESPACE b/NAMESPACE
@@ -42,6 +42,8 @@ export(get_mutations_by_sample)
 export(get_mutations_by_study)
 export(get_panel_by_sample)
 export(get_samples_by_patient)
+export(get_segments_by_sample)
+export(get_segments_by_study)
 export(get_structural_variants_by_sample)
 export(get_structural_variants_by_study)
 export(get_study_info)

diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,7 @@
 * Added `available_sample_lists()` function which returns all available sample list IDs for a given study ID
 * Added `sample_list_id` argument to `available_samples()` which returns all samples IDs in specific sample list within a study (#53).
 * Fixed {cli} errors to make package compatible with {cli} v3.4.1.
+* Added CNA segmentation retrieval endpoint accessible via `get_segments_by_sample()` and `get_segments_by_study()`). You can use `get_genomics_by_*(return_segments = TRUE)` as well to access this data.
 
 # cbioportalR 1.0.1
 

diff --git a/R/by_database.R b/R/by_database.R
@@ -18,7 +18,7 @@ available_studies <- function(base_url =  NULL) {
   url_path <- paste0("studies/")
   res <- cbp_api(url_path, base_url = base_url)$content %>%
     dplyr::bind_rows(.) %>%
-    select(.data$studyId, everything())
+    select("studyId", everything())
 
   return(res)
 }

diff --git a/R/by_genes_or_panel.R b/R/by_genes_or_panel.R
@@ -101,8 +101,8 @@ get_alias <- function(hugo_symbol = NULL,
     mutate(alias = purrr::map(.data$url_path,
                    ~cbp_api(.x, base_url = base_url)$content)) %>%
     mutate(alias = purrr::simplify_all(.data$alias)) %>%
-    tidyr::unnest(.data$alias) %>%
-    select(-.data$url_path)
+    tidyr::unnest("alias") %>%
+    select(-"url_path")
 
   return(res)
 }
@@ -133,8 +133,8 @@ get_gene_panel <- function(panel_id = NULL, base_url = NULL) {
 
     tib %>%
       mutate(data = purrr::map(.data$genes, ~as_tibble(.x))) %>%
-      select(.data$genePanelId, .data$data, .data$description) %>%
-      tidyr::unnest(cols = .data$data)
+      select("genePanelId", "data", "description") %>%
+      tidyr::unnest(cols = "data")
 
   })
 

diff --git a/R/by_patients.R b/R/by_patients.R
@@ -40,8 +40,8 @@ get_samples_by_patient <- function(patient_id = NULL,
     res <- cbp_api(url_path = x, base_url = base_url)
     res$content
     df <- as.data.frame(res$content) %>%
-      select(.data$patientId, .data$sampleId,
-             .data$sampleType, .data$studyId)
+      select("patientId", "sampleId",
+             "sampleType", "studyId")
     df
   })
 
@@ -109,10 +109,8 @@ get_clinical_by_patient <- function(study_id = NULL,
 
 
     # get study ID
-    resolved_study_id <- study_id %>%
-      purrr::when(!is.null(.) ~ .,
-                  ~ suppressMessages(.guess_study_id(study_id, resolved_url)))
-
+    resolved_study_id <- study_id %||%
+      suppressMessages(.guess_study_id(study_id, resolved_url))
 
 
     # create lookup dataframe -
@@ -127,7 +125,7 @@ get_clinical_by_patient <- function(study_id = NULL,
   # Prep data frame for Query ------------------------------------------------
   patient_study_pairs_nest <- patient_study_pairs %>%
     group_by(.data$study_id) %>%
-    tidyr::nest(sample_id_nest = .data$patient_id)
+    tidyr::nest(sample_id_nest = "patient_id")
 
   # Query --------------------------------------------------------------------
   df <- purrr::map2_dfr(patient_study_pairs_nest$study_id, patient_study_pairs_nest$sample_id_nest,
@@ -164,7 +162,7 @@ get_clinical_by_patient <- function(study_id = NULL,
 
   resolved_clinical_attributes <- clinical_attribute %||%
     (available_clinical_attributes(study_id, base_url = base_url) %>%
-       pull(.data$clinicalAttributeId) %>%
+       dplyr::pull(.data$clinicalAttributeId) %>%
        unique())
 
   if(is.null(clinical_attribute)) {

diff --git a/R/by_samples.R b/R/by_samples.R
@@ -51,9 +51,8 @@ get_clinical_by_sample <- function(study_id = NULL,
 
 
     # get study ID
-    resolved_study_id <- study_id %>%
-      purrr::when(!is.null(.) ~ .,
-                  ~ suppressMessages(.guess_study_id(study_id, resolved_url)))
+    resolved_study_id <- study_id %||%
+      suppressMessages(.guess_study_id(study_id, resolved_url))
 
 
 
@@ -69,7 +68,7 @@ get_clinical_by_sample <- function(study_id = NULL,
   # Prep data frame for Query ------------------------------------------------
   sample_study_pairs_nest <- sample_study_pairs %>%
     group_by(.data$study_id) %>%
-    tidyr::nest(sample_id_nest = .data$sample_id)
+    tidyr::nest(sample_id_nest = "sample_id")
 
   # Query --------------------------------------------------------------------
   df <- purrr::map2_dfr(sample_study_pairs_nest$study_id, sample_study_pairs_nest$sample_id_nest,
@@ -102,7 +101,7 @@ get_clinical_by_sample <- function(study_id = NULL,
 
   resolved_clinical_attributes <- clinical_attribute %||%
     (available_clinical_attributes(study_id, base_url = base_url) %>%
-    pull(.data$clinicalAttributeId) %>%
+    dplyr::pull(.data$clinicalAttributeId) %>%
     unique())
 
   if(is.null(clinical_attribute)) {
@@ -159,13 +158,13 @@ get_panel_by_sample <- function(study_id = NULL,
                          base_url = base_url)
 
 
-  res %>%
-    purrr::when(nrow(.) < 1 ~ cli::cli_abort("No gene panel data found. Did you specify the correct {.code study_id} for your {.code sample_id}?
-                                        Is {.val GENE_PANEL} an available clinical attribute in your queried studies?"),
-
-              TRUE ~ transmute(.,  .data$sampleId, .data$studyId, genePanel = .data$value))
-
+  if(nrow(res) < 1) {
+    cli::cli_abort(c("No gene panel data found. Did you specify the correct {.code study_id} for your {.code sample_id}? ,",
+                   "Is {.val GENE_PANEL} an available clinical attribute in your queried studies?"))
+  }
 
+  res <- transmute(res,  .data$sampleId, .data$studyId, genePanel = .data$value)
+  return(res)
 
 }
 
diff --git a/R/by_studies.R b/R/by_studies.R
@@ -98,11 +98,17 @@ get_clinical_by_study <- function(study_id = NULL,
   df_samp <- purrr::map_df(res$content, ~ tibble::as_tibble(.x))
 
   # filter selected clinical attributes if not NULL
+  df_samp <-
+    if(nrow(df_samp) > 0 & is_not_null(clinical_attribute)) {
+    filter(df_samp, .data$clinicalAttributeId %in% clinical_attribute)
+
+  } else {
+    cli_alert_warning("Sample Level Clinical Data: No {.var clinical_attribute} passed. Defaulting to returning all clinical attributes in {.val {study_id}} study")
+    df_samp
+
+  }
+
   df_samp <- df_samp %>%
-    purrr::when(
-      nrow(df_samp) > 0 & !is.null(clinical_attribute) ~ filter(., clinicalAttributeId %in% clinical_attribute),
-      ~{cli_alert_warning("Sample Level Clinical Data: No {.var clinical_attribute} passed. Defaulting to returning all clinical attributes in {.val {study_id}} study")
-      .}) %>%
     mutate(dataLevel = "SAMPLE")
 
 
@@ -117,11 +123,15 @@ get_clinical_by_study <- function(study_id = NULL,
   df_pat <- purrr::map_df(res$content, ~ tibble::as_tibble(.x))
 
   # filter selected clinical attributes if not NULL
+  df_pat <-
+    if(nrow(df_pat) > 0 & !is.null(clinical_attribute)) {
+      filter(df_pat, .data$clinicalAttributeId %in% clinical_attribute)
+    } else {
+      cli_alert_warning("Patient Level Clinical Data: No {.var clinical_attribute} passed. Defaulting to returning all clinical attributes in {.val {study_id}} study")
+      df_pat
+    }
+
   df_pat <- df_pat %>%
-    purrr::when(
-      nrow(df_pat) > 0 & !is.null(clinical_attribute) ~ filter(., clinicalAttributeId %in% clinical_attribute),
-      ~{cli_alert_warning("Patient Level Clinical Data: No {.var clinical_attribute} passed. Defaulting to returning all clinical attributes in {.val {study_id}} study")
-        .})%>%
     mutate(dataLevel = "PATIENT",
            sampleId = NA_character_)
 
@@ -213,8 +223,8 @@ available_samples <- function(study_id = NULL, sample_list_id = NULL,
       res$content
       df <- bind_rows(res$content) %>%
         select(
-          .data$patientId, .data$sampleId,
-          .data$sampleType, .data$studyId
+          "patientId", "sampleId",
+          "sampleType", "studyId"
         )
       df
     })
@@ -254,7 +264,7 @@ available_patients <- function(study_id = NULL,
     res <- cbp_api(url_path = x, base_url = base_url)
     res$content
     df <- bind_rows(res$content) %>%
-      select(.data$patientId, .data$studyId)
+      select("patientId", "studyId")
     df
   })