diff --git a/DESCRIPTION b/DESCRIPTION index 26d9f85..62ef696 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cbioportalR Title: Browse and Query Clinical and Genomic Data from cBioPortal -Version: 1.0.1.9002 +Version: 1.1.0 Authors@R: c(person(given = "Karissa", family = "Whiting", diff --git a/NAMESPACE b/NAMESPACE index cd88902..c60d32f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,20 +1,10 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(.check_for_patient_id) -export(.check_for_study_id) -export(.check_input_pair_df) -export(.get_cbioportal_url) export(.get_clinical_by_list_item) export(.get_clinical_pat_by_list_item) export(.get_data_by_sample) export(.get_data_by_study) -export(.get_panel_entrez) -export(.guess_study_id) -export(.lookup_hugo) -export(.lookup_profile_name) -export(.lookup_study_name) -export(.resolve_url) export(available_clinical_attributes) export(available_gene_panels) export(available_patients) diff --git a/NEWS.md b/NEWS.md index 303bf15..b5a42c6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,8 @@ -# cbioportalR (development version) +# cbioportalR 1.1.0 * Added `available_sample_lists()` function which returns all available sample list IDs for a given study ID * Added `sample_list_id` argument to `available_samples()` which returns all samples IDs in specific sample list within a study (#53). -* Fixed {cli} errors to make package compatible with {cli} v3.4.1. +* Fixed {cli} errors to make package compatible with {cli} ≥ v3.4.1. * Added CNA segmentation retrieval endpoint accessible via `get_segments_by_sample()` and `get_segments_by_study()`). You can use `get_genomics_by_*(return_segments = TRUE)` as well to access this data. # cbioportalR 1.0.1 diff --git a/R/authenticate.R b/R/authenticate.R index 04de8e0..f63e06d 100644 --- a/R/authenticate.R +++ b/R/authenticate.R @@ -101,7 +101,6 @@ test_cbioportal_db <- function() { #' Pulls the set URL from the internal package environment #' #' @return a string indicating the saved URL in the `cbioportal_env` environment or an error if no URL found. -#' @export #' @author Karissa Whiting, Daniel D. Sjoberg #' @keywords internal #' @noRd @@ -126,9 +125,7 @@ test_cbioportal_db <- function() { #' Process and make a best guess of URL string passed to authentication functions #' #' @param raw_url The URL passed to a function by a user -#' #' @return A string with a final URL to be used in the given query. -#' @export #' @author Karissa Whiting, Daniel D. Sjoberg #' @keywords internal #' @noRd diff --git a/R/utils.R b/R/utils.R index 2693b3c..2cde6e2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -5,7 +5,6 @@ #' @return stop if no arg #' @keywords internal #' @noRd -#' @export #' .check_for_study_id <- function(study_id) { @@ -27,7 +26,6 @@ #' @return stop if no sample_id arg #' @keywords internal #' @noRd -#' @export #' .check_for_patient_id <- function(patient_id) { if (is.null(patient_id)) { @@ -43,7 +41,6 @@ #' @return A valid `sample_study_pairs` or `patient_study_pairs` data frame. If `input_df` is NULL, it will return NULL. #' @keywords internal #' @noRd -#' @export #' .check_input_pair_df <- function(input_df) { @@ -113,7 +110,6 @@ #' @return a guess at which study_id a user may want to use #' @keywords internal #' @noRd -#' @export #' .guess_study_id <- function(study_id, resolved_url) { @@ -125,7 +121,7 @@ study_id_guess %||% cli::cli_abort("Unable to guess a {.code study_id} for your database. Please provide a {.code study_id}.") - cli::cli_alert_info("No {.code study_id} provided. Using {.val {study_id_guess}} as default study") + study_id %||% cli::cli_alert_info("No {.code study_id} provided. Using {.val {study_id_guess}} as default study") return(study_id_guess) } @@ -144,7 +140,6 @@ #' @return find molecular profile name for a specified data type #' @keywords internal #' @noRd -#' @export #' .lookup_profile_name <- function(data_type, study_id, base_url) { @@ -178,7 +173,6 @@ #' @return find study ID for a specified molecular profile #' @keywords internal #' @noRd -#' @export #' .lookup_study_name <- function(molecular_profile_id, study_id, base_url) { @@ -217,7 +211,6 @@ #' @return a dataframe that matches input data frame but with hugoGeneSymbol column #' @keywords internal #' @noRd -#' @export #' .lookup_hugo <- function(df, base_url) { @@ -262,7 +255,6 @@ #' @return a vector of Entrez Gene IDs #' @keywords internal #' @noRd -#' @export #' .get_panel_entrez <- function(panel_id, base_url) { @@ -286,7 +278,6 @@ #' Check if NULL #' #' @param x any R object or expression -#' #' @noRd #' @keywords internal #' diff --git a/README.Rmd b/README.Rmd index 083da8b..97cbe69 100644 --- a/README.Rmd +++ b/README.Rmd @@ -32,7 +32,7 @@ library(tidyverse) This package was created to work with both the public [cBioPortal website](https://www.cbioportal.org/), as well as private institutional cBioPortal instances (e.g. MSKCC, GENIE) with appropriate credentials and [authentication]. -This package is compatible with cBioPortal v5.0, but is subject to change as [cBioPortal updates are released](https://github.com/cBioPortal/cbioportal/releases). For more information on cBioPortal, see the following publications: +This package is compatible with cBioPortal v5, but is subject to change as [cBioPortal updates are released](https://github.com/cBioPortal/cbioportal/releases). To see if your cBioPortal instance is compatible, look for its version in the footer of the homepage or check `portalVersion` in the output of `YOUR_CBIOPORTAL_INSTANCE/api/info`. For more information on cBioPortal, see the following publications: - [Gao et al. Sci. Signal. 2013](https://pubmed.ncbi.nlm.nih.gov/23550210/) - [ Cerami et al. Cancer Discov. 2012](https://aacrjournals.org/cancerdiscovery/article/2/5/401/3246/The-cBio-Cancer-Genomics-Portal-An-Open-Platform) diff --git a/README.md b/README.md index bfa9c26..b20eef4 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,13 @@ website](https://www.cbioportal.org/), as well as private institutional cBioPortal instances (e.g. MSKCC, GENIE) with appropriate credentials and [authentication](#authentication). -This package is compatible with cBioPortal v5.0, but is subject to -change as [cBioPortal updates are -released](https://github.com/cBioPortal/cbioportal/releases). For more -information on cBioPortal, see the following publications: +This package is compatible with cBioPortal v5, but is subject to change +as [cBioPortal updates are +released](https://github.com/cBioPortal/cbioportal/releases). To see if +your cBioPortal instance is compatible, look for its version in the +footer of the homepage or check `portalVersion` in the output of +`YOUR_CBIOPORTAL_INSTANCE/api/info`. For more information on cBioPortal, +see the following publications: - [Gao et al. Sci. Signal. 2013](https://pubmed.ncbi.nlm.nih.gov/23550210/) @@ -145,21 +148,20 @@ To see available studies in your database you can use: available_studies() %>% head(n = 10) #> # A tibble: 10 × 13 -#> studyId name descr…¹ publi…² groups status impor…³ allSa…⁴ readP…⁵ cance…⁶ -#> -#> 1 acc_tcga Adre… "TCGA … TRUE "PUBL… 0 2022-0… 92 TRUE acc -#> 2 bcc_unig… Basa… "Whole… TRUE "PUBL… 0 2022-0… 293 TRUE bcc -#> 3 ampca_bc… Ampu… "Exome… TRUE "PUBL… 0 2022-0… 160 TRUE ampca -#> 4 blca_dfa… Blad… "Whole… TRUE "PUBL… 0 2022-0… 50 TRUE blca -#> 5 blca_msk… Blad… "Compr… TRUE "PUBL… 0 2022-0… 97 TRUE blca -#> 6 blca_bgi Blad… "Whole… TRUE "PUBL… 0 2022-0… 99 TRUE blca -#> 7 blca_msk… Blad… "Genom… TRUE "PUBL… 0 2022-0… 109 TRUE blca -#> 8 all_stju… Hypo… "Whole… TRUE "" 0 2022-0… 44 TRUE myeloid -#> 9 acyc_fmi… Aden… "Targe… TRUE "ACYC… 0 2022-0… 28 TRUE acyc -#> 10 acyc_san… Aden… "Whole… TRUE "ACYC… 0 2022-0… 24 TRUE acyc -#> # … with 3 more variables: referenceGenome , pmid , citation , -#> # and abbreviated variable names ¹​description, ²​publicStudy, ³​importDate, -#> # ⁴​allSampleCount, ⁵​readPermission, ⁶​cancerTypeId +#> studyId name description publicStudy groups status importDate allSampleCount +#> +#> 1 acc_tc… Adre… "TCGA Adre… TRUE PUBLIC 0 2023-06-1… 92 +#> 2 laml_t… Acut… "TCGA Acut… TRUE PUBLIC 0 2023-06-1… 200 +#> 3 blca_t… Blad… "TCGA Blad… TRUE PUBLIC 0 2023-06-1… 413 +#> 4 brca_t… Brea… "TCGA Brea… TRUE PUBLIC 0 2023-06-1… 1108 +#> 5 kirc_t… Kidn… "TCGA Kidn… TRUE PUBLIC 0 2023-06-1… 538 +#> 6 cesc_t… Cerv… "TCGA Cerv… TRUE PUBLIC 0 2023-06-1… 310 +#> 7 chol_t… Chol… "TCGA Chol… TRUE PUBLIC 0 2023-06-1… 51 +#> 8 kich_t… Kidn… "TCGA Kidn… TRUE PUBLIC 0 2023-06-1… 113 +#> 9 coadre… Colo… "TCGA Colo… TRUE PUBLIC 0 2023-06-1… 640 +#> 10 dlbc_t… Lymp… "TCGA Lymp… TRUE PUBLIC 0 2023-06-1… 48 +#> # ℹ 5 more variables: readPermission , cancerTypeId , +#> # referenceGenome , pmid , citation ``` To view study metadata on a particular study you can use: @@ -173,7 +175,7 @@ get_study_info("acc_tcga") %>% #> publicStudy "TRUE" #> groups "PUBLIC" #> status "0" -#> importDate "2022-03-04 17:47:56" +#> importDate "2023-06-19 09:42:47" #> allSampleCount "92" #> sequencedSampleCount "90" #> cnaSampleCount "90" @@ -186,6 +188,7 @@ get_study_info("acc_tcga") %>% #> massSpectrometrySampleCount "0" #> completeSampleCount "75" #> readPermission "TRUE" +#> treatmentCount "0" #> studyId "acc_tcga" #> cancerTypeId "acc" #> cancerType.name "Adrenocortical Carcinoma" @@ -213,36 +216,36 @@ indicated by the function message. ``` r df$mutation %>% head() -#> # A tibble: 6 × 33 -#> hugoG…¹ entre…² uniqu…³ uniqu…⁴ molec…⁵ sampl…⁶ patie…⁷ studyId center mutat…⁸ -#> -#> 1 KRT8 3856 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… broad… Somatic -#> 2 LCE1B 353132 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… hgsc.… Somatic -#> 3 SLC9C2 284525 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… broad… Somatic -#> 4 DNAH14 127602 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… broad… Somatic -#> 5 OPN4 94233 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… hgsc.… Somatic -#> 6 DNAJC4 3338 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… hgsc.… Somatic -#> # … with 23 more variables: validationStatus , tumorAltCount , -#> # tumorRefCount , normalAltCount , normalRefCount , -#> # startPosition , endPosition , referenceAllele , -#> # proteinChange , mutationType , functionalImpactScore , -#> # fisValue , linkXvar , linkPdb , linkMsa , -#> # ncbiBuild , variantType , keyword , chr , -#> # variantAllele , refseqMrnaId , proteinPosStart , … +#> # A tibble: 6 × 28 +#> hugoGeneSymbol entrezGeneId uniqueSampleKey uniquePatientKey +#> +#> 1 KRT8 3856 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 2 LCE1B 353132 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 3 SLC9C2 284525 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 4 DNAH14 127602 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 5 OPN4 94233 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 6 DNAJC4 3338 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> # ℹ 24 more variables: molecularProfileId , sampleId , +#> # patientId , studyId , center , mutationStatus , +#> # validationStatus , tumorAltCount , tumorRefCount , +#> # normalAltCount , normalRefCount , startPosition , +#> # endPosition , referenceAllele , proteinChange , +#> # mutationType , ncbiBuild , variantType , keyword , +#> # chr , variantAllele , refseqMrnaId , … df$cna %>% head() #> # A tibble: 6 × 9 -#> hugoGeneSymbol entre…¹ uniqu…² uniqu…³ molec…⁴ sampl…⁵ patie…⁶ studyId alter…⁷ -#> -#> 1 MEOX1 4222 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… 2 -#> 2 NUFIP2 57532 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… 2 -#> 3 OSBPL7 114881 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… 2 -#> 4 TP53I13 90313 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… 2 -#> 5 TAOK1 57551 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… 2 -#> 6 SPOP 8405 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… 2 -#> # … with abbreviated variable names ¹​entrezGeneId, ²​uniqueSampleKey, -#> # ³​uniquePatientKey, ⁴​molecularProfileId, ⁵​sampleId, ⁶​patientId, ⁷​alteration +#> hugoGeneSymbol entrezGeneId uniqueSampleKey uniquePatientKey +#> +#> 1 RERE 473 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 2 ENO1 2023 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 3 CA6 765 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 4 RN7SL451P 106480377 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 5 SLC2A7 155184 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 6 SLC2A5 6518 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> # ℹ 5 more variables: molecularProfileId , sampleId , +#> # patientId , studyId , alteration ``` You can also pull data by specific sample IDs but the API requires a bit @@ -288,22 +291,22 @@ mutations <- get_mutations_by_sample(sample_id = samples, mutations %>% head() -#> # A tibble: 6 × 33 -#> hugoG…¹ entre…² uniqu…³ uniqu…⁴ molec…⁵ sampl…⁶ patie…⁷ studyId center mutat…⁸ -#> -#> 1 KRT8 3856 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… broad… Somatic -#> 2 LCE1B 353132 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… hgsc.… Somatic -#> 3 SLC9C2 284525 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… broad… Somatic -#> 4 DNAH14 127602 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… broad… Somatic -#> 5 OPN4 94233 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… hgsc.… Somatic -#> 6 DNAJC4 3338 VENHQS… VENHQS… acc_tc… TCGA-O… TCGA-O… acc_tc… hgsc.… Somatic -#> # … with 23 more variables: validationStatus , tumorAltCount , -#> # tumorRefCount , normalAltCount , normalRefCount , -#> # startPosition , endPosition , referenceAllele , -#> # proteinChange , mutationType , functionalImpactScore , -#> # fisValue , linkXvar , linkPdb , linkMsa , -#> # ncbiBuild , variantType , keyword , chr , -#> # variantAllele , refseqMrnaId , proteinPosStart , … +#> # A tibble: 6 × 28 +#> hugoGeneSymbol entrezGeneId uniqueSampleKey uniquePatientKey +#> +#> 1 KRT8 3856 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 2 LCE1B 353132 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 3 SLC9C2 284525 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 4 DNAH14 127602 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 5 OPN4 94233 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> 6 DNAJC4 3338 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… +#> # ℹ 24 more variables: molecularProfileId , sampleId , +#> # patientId , studyId , center , mutationStatus , +#> # validationStatus , tumorAltCount , tumorRefCount , +#> # normalAltCount , normalRefCount , startPosition , +#> # endPosition , referenceAllele , proteinChange , +#> # mutationType , ncbiBuild , variantType , keyword , +#> # chr , variantAllele , refseqMrnaId , … ``` Lastly, you can also pull clinical data or sample metadata (e.g. tumor @@ -314,37 +317,36 @@ available, you can use: available_clinical_attributes(study_id = "acc_tcga") %>% head() #> # A tibble: 6 × 7 -#> displayName descr…¹ datat…² patie…³ prior…⁴ clini…⁵ studyId -#> -#> 1 Diagnosis Age Age at… NUMBER TRUE 1 AGE acc_tc… -#> 2 Neoplasm Disease Stage Americ… The ex… STRING TRUE 1 AJCC_P… acc_tc… -#> 3 American Joint Committee on C… The ve… STRING TRUE 1 AJCC_S… acc_tc… -#> 4 Atypical Mitotic Figures Atypic… STRING TRUE 1 ATYPIC… acc_tc… -#> 5 Cancer Type Cancer… STRING FALSE 1 CANCER… acc_tc… -#> 6 Cancer Type Detailed Cancer… STRING FALSE 1 CANCER… acc_tc… -#> # … with abbreviated variable names ¹​description, ²​datatype, ³​patientAttribute, -#> # ⁴​priority, ⁵​clinicalAttributeId +#> displayName description datatype patientAttribute priority clinicalAttributeId +#> +#> 1 Diagnosis … Age at whi… NUMBER TRUE 1 AGE +#> 2 Neoplasm D… The extent… STRING TRUE 1 AJCC_PATHOLOGIC_TU… +#> 3 American J… The versio… STRING TRUE 1 AJCC_STAGING_EDITI… +#> 4 Atypical M… Atypical M… STRING TRUE 1 ATYPICAL_MITOTIC_F… +#> 5 Cancer Type Cancer type STRING FALSE 1 CANCER_TYPE +#> 6 Cancer Typ… Cancer typ… STRING FALSE 1 CANCER_TYPE_DETAIL… +#> # ℹ 1 more variable: studyId ``` ``` r get_clinical_by_study("acc_tcga") #> ! Sample Level Clinical Data: No `clinical_attribute` passed. Defaulting to returning all clinical attributes in "acc_tcga" study #> ! Patient Level Clinical Data: No `clinical_attribute` passed. Defaulting to returning all clinical attributes in "acc_tcga" study -#> # A tibble: 6,292 × 6 -#> uniquePatientKey patientId studyId clinicalAt…¹ value dataL…² -#> -#> 1 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga AGE 58 PATIENT -#> 2 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga AJCC_PATHOL… Stag… PATIENT -#> 3 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga ATYPICAL_MI… Atyp… PATIENT -#> 4 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga CAPSULAR_IN… Inva… PATIENT -#> 5 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga CLIN_M_STAGE M0 PATIENT -#> 6 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga CT_SCAN_PRE… [Unk… PATIENT -#> 7 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga CYTOPLASM_P… Cyto… PATIENT -#> 8 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga DAYS_TO_INI… 0 PATIENT -#> 9 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga DFS_MONTHS 24.77 PATIENT -#> 10 VENHQS1PUi1BNUoxOmFjY190Y2dh TCGA-OR-A5J1 acc_tcga DFS_STATUS 1:Re… PATIENT -#> # … with 6,282 more rows, and abbreviated variable names ¹​clinicalAttributeId, -#> # ²​dataLevel +#> # A tibble: 6,292 × 7 +#> uniquePatientKey patientId studyId clinicalAttributeId value dataLevel +#> +#> 1 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… AGE 58 PATIENT +#> 2 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… AJCC_PATHOLOGIC_TU… Stag… PATIENT +#> 3 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… ATYPICAL_MITOTIC_F… Atyp… PATIENT +#> 4 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… CAPSULAR_INVASION Inva… PATIENT +#> 5 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… CLIN_M_STAGE M0 PATIENT +#> 6 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… CT_SCAN_PREOP_RESU… [Unk… PATIENT +#> 7 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… CYTOPLASM_PRESENCE… Cyto… PATIENT +#> 8 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… DAYS_TO_INITIAL_PA… 0 PATIENT +#> 9 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… DFS_MONTHS 24.77 PATIENT +#> 10 VENHQS1PUi1BNUoxOmFjY1… TCGA-OR-… acc_tc… DFS_STATUS 1:Re… PATIENT +#> # ℹ 6,282 more rows +#> # ℹ 1 more variable: sampleId ``` ``` r @@ -353,20 +355,19 @@ get_clinical_by_sample(sample_id = samples, study_id = "acc_tcga") %>% #> ! No `clinical_attribute` passed. Defaulting to returning #> all clinical attributes in "acc_tcga" study #> # A tibble: 10 × 7 -#> uniqueSampleKey uniqu…¹ sampl…² patie…³ studyId clini…⁴ value -#> -#> 1 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… CANCER… Adre… -#> 2 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… CANCER… Adre… -#> 3 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… DAYS_T… 4691 -#> 4 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… FRACTI… 0.05… -#> 5 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… IS_FFPE NO -#> 6 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… MUTATI… 39 -#> 7 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… OCT_EM… TRUE -#> 8 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… ONCOTR… ACC -#> 9 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… OTHER_… E403… -#> 10 VENHQS1PUi1BNUoxLTAxOmFjY190Y2… VENHQS… TCGA-O… TCGA-O… acc_tc… PATHOL… TCGA… -#> # … with abbreviated variable names ¹​uniquePatientKey, ²​sampleId, ³​patientId, -#> # ⁴​clinicalAttributeId +#> uniqueSampleKey uniquePatientKey sampleId patientId studyId +#> +#> 1 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 2 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 3 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 4 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 5 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 6 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 7 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 8 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 9 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> 10 VENHQS1PUi1BNUoxLTAxOmFjY190Y2dh VENHQS1PUi1BNUox… TCGA-OR… TCGA-OR-… acc_tc… +#> # ℹ 2 more variables: clinicalAttributeId , value ``` ``` r diff --git a/codemeta.json b/codemeta.json index 95cd4a4..1a12195 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,7 +8,7 @@ "codeRepository": "https://github.com/karissawhiting/cbioportalR", "issueTracker": "https://github.com/karissawhiting/cbioportalR/issues", "license": "https://spdx.org/licenses/MIT", - "version": "1.0.1.9002", + "version": "1.1.0", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -258,8 +258,9 @@ }, "SystemRequirements": null }, - "fileSize": "2286.409KB", + "fileSize": "2281.862KB", "releaseNotes": "https://github.com/karissawhiting/cbioportalR/blob/master/NEWS.md", "readme": "https://github.com/karissawhiting/cbioportalR/blob/main/README.md", - "contIntegration": ["https://github.com/karissawhiting/cbioportalR/actions", "https://app.codecov.io/gh/karissawhiting/cbioportalR?branch=master"] + "contIntegration": ["https://github.com/karissawhiting/cbioportalR/actions", "https://app.codecov.io/gh/karissawhiting/cbioportalR?branch=master"], + "keywords": "r-package" } diff --git a/cran-comments.md b/cran-comments.md index eb9ca8a..cf8ad84 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,5 +1,6 @@ ## Test environments + - Ubuntu 16.04.6 LTS (on github actions), devel, release, oldrel - macOS (on github actions), release - win-builder devel diff --git a/vignettes/overview-of-workflow.Rmd b/vignettes/overview-of-workflow.Rmd index a605f64..779b533 100644 --- a/vignettes/overview-of-workflow.Rmd +++ b/vignettes/overview-of-workflow.Rmd @@ -53,6 +53,7 @@ Now that we are successfully connected, we may want to view all studies availabl ```r all_studies <- available_studies() all_studies +<<<<<<< HEAD #> # A tibble: 375 × 13 #> studyId name description publicStudy groups status importDate allSampleCount readPermission cancerTypeId #> @@ -68,6 +69,28 @@ all_studies #> 10 all_phase… Pedi… "Whole gen… TRUE "NCI-… 0 2022-10-2… 1978 TRUE bll #> # ℹ 365 more rows #> # ℹ 3 more variables: referenceGenome , pmid , citation +======= +#> # A tibble: 391 × 13 +#> studyId name description publicStudy +#> +#> 1 acc_tcga Adre… "TCGA Adre… TRUE +#> 2 laml_tcga Acut… "TCGA Acut… TRUE +#> 3 blca_tcga Blad… "TCGA Blad… TRUE +#> 4 brca_tcga Brea… "TCGA Brea… TRUE +#> 5 kirc_tcga Kidn… "TCGA Kidn… TRUE +#> 6 cesc_tcga Cerv… "TCGA Cerv… TRUE +#> 7 chol_tcga Chol… "TCGA Chol… TRUE +#> 8 kich_tcga Kidn… "TCGA Kidn… TRUE +#> 9 coadread… Colo… "TCGA Colo… TRUE +#> 10 dlbc_tcga Lymp… "TCGA Lymp… TRUE +#> # ℹ 381 more rows +#> # ℹ 9 more variables: groups , +#> # status , importDate , +#> # allSampleCount , +#> # readPermission , +#> # cancerTypeId , +#> # referenceGenome , pmid , … +>>>>>>> 74ef3b3deea09a4e124a8bb14badeb476cf7816a ``` By inspecting this data frame, we see the unique `study_id` for the NMIBC data set is `"blca_nmibc_2017"` and the unique `study_id` for the prostate cancer data set is `"prad_msk_2019"`. To get more information on our studies we can do the following: @@ -79,11 +102,25 @@ By inspecting this data frame, we see the unique `study_id` for the NMIBC data s all_studies %>% filter(studyId %in% c("blca_nmibc_2017", "prad_msk_2019")) #> # A tibble: 2 × 13 +<<<<<<< HEAD #> studyId name description publicStudy groups status importDate allSampleCount readPermission cancerTypeId #> #> 1 blca_nmibc… Nonm… IMPACT seq… TRUE PUBLIC 0 2022-10-2… 105 TRUE blca #> 2 prad_msk_2… Pros… MSK-IMPACT… TRUE PUBLIC 0 2022-10-2… 18 TRUE prostate #> # ℹ 3 more variables: referenceGenome , pmid , citation +======= +#> studyId name description publicStudy +#> +#> 1 prad_msk_… Pros… MSK-IMPACT… TRUE +#> 2 blca_nmib… Nonm… IMPACT seq… TRUE +#> # ℹ 9 more variables: groups , +#> # status , importDate , +#> # allSampleCount , +#> # readPermission , +#> # cancerTypeId , +#> # referenceGenome , pmid , +#> # citation +>>>>>>> 74ef3b3deea09a4e124a8bb14badeb476cf7816a ``` More in-depth information about the study can be found with `get_study_info()` @@ -100,7 +137,7 @@ get_study_info("blca_nmibc_2017") %>% #> citation "Pietzak et al. Eur Urol 2017" #> groups "PUBLIC" #> status "0" -#> importDate "2022-10-26 15:22:21" +#> importDate "2023-07-19 17:59:06" #> allSampleCount "105" #> sequencedSampleCount "105" #> cnaSampleCount "105" @@ -136,7 +173,7 @@ get_study_info("prad_msk_2019") %>% #> citation "Granlund et al. Cell Metab 2020" #> groups "PUBLIC" #> status "0" -#> importDate "2022-10-27 16:47:38" +#> importDate "2023-06-20 12:45:47" #> allSampleCount "18" #> sequencedSampleCount "18" #> cnaSampleCount "18" @@ -168,6 +205,7 @@ We can check available genomic data with `available_profiles()`. ```r available_profiles(study_id = "blca_nmibc_2017") #> # A tibble: 3 × 8 +<<<<<<< HEAD #> molecularAlterationType datatype name description showProfileInAnalysi…¹ patientLevel molecularProfileId #> #> 1 COPY_NUMBER_ALTERATION DISCRETE Putative… Copy Numbe… TRUE FALSE blca_nmibc_2017_c… @@ -175,12 +213,25 @@ available_profiles(study_id = "blca_nmibc_2017") #> 3 STRUCTURAL_VARIANT SV Structur… Structural… TRUE FALSE blca_nmibc_2017_s… #> # ℹ abbreviated name: ¹​showProfileInAnalysisTab #> # ℹ 1 more variable: studyId +======= +#> molecularAlterationType datatype name +#> +#> 1 COPY_NUMBER_ALTERATION DISCRETE Putati… +#> 2 MUTATION_EXTENDED MAF Mutati… +#> 3 STRUCTURAL_VARIANT SV Struct… +#> # ℹ 5 more variables: description , +#> # showProfileInAnalysisTab , +#> # patientLevel , +#> # molecularProfileId , +#> # studyId +>>>>>>> 74ef3b3deea09a4e124a8bb14badeb476cf7816a ``` ```r available_profiles(study_id = "prad_msk_2019") #> # A tibble: 3 × 8 +<<<<<<< HEAD #> molecularAlterationType datatype name description showProfileInAnalysi…¹ patientLevel molecularProfileId #> #> 1 COPY_NUMBER_ALTERATION DISCRETE Putative… Putative c… TRUE FALSE prad_msk_2019_cna @@ -188,6 +239,18 @@ available_profiles(study_id = "prad_msk_2019") #> 3 STRUCTURAL_VARIANT SV Structur… Structural… TRUE FALSE prad_msk_2019_str… #> # ℹ abbreviated name: ¹​showProfileInAnalysisTab #> # ℹ 1 more variable: studyId +======= +#> molecularAlterationType datatype name +#> +#> 1 COPY_NUMBER_ALTERATION DISCRETE Putati… +#> 2 MUTATION_EXTENDED MAF Mutati… +#> 3 STRUCTURAL_VARIANT SV Struct… +#> # ℹ 5 more variables: description , +#> # showProfileInAnalysisTab , +#> # patientLevel , +#> # molecularProfileId , +#> # studyId +>>>>>>> 74ef3b3deea09a4e124a8bb14badeb476cf7816a ``` Luckily, in this example our studies have mutation, copy number alteration and fusion (structural variant) data available. Each of these data types has a unique molecular profile ID. The molecular profile ID usually takes the form of `_mutations`, `_structural_variants`, `_cna`. @@ -196,7 +259,8 @@ Luckily, in this example our studies have mutation, copy number alteration and f ```r available_profiles(study_id = "blca_nmibc_2017") %>% pull(molecularProfileId) -#> [1] "blca_nmibc_2017_cna" "blca_nmibc_2017_mutations" +#> [1] "blca_nmibc_2017_cna" +#> [2] "blca_nmibc_2017_mutations" #> [3] "blca_nmibc_2017_structural_variants" ``` @@ -304,19 +368,30 @@ Now we pass this to `get_genetics_by_sample()` ```r all_genomic <- get_genetics_by_sample(sample_study_pairs = df_pairs) #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_mutations and prad_msk_2019_mutations +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: +#> blca_nmibc_2017_mutations and +#> prad_msk_2019_mutations #> Genes: "All available genes" #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_cna and prad_msk_2019_cna +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: blca_nmibc_2017_cna +#> and prad_msk_2019_cna #> Genes: "All available genes" #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_structural_variants and prad_msk_2019_structural_variants +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: +#> blca_nmibc_2017_structural_variants and +#> prad_msk_2019_structural_variants #> Genes: "All available genes" mut_sample <- all_genomic$mutation @@ -328,9 +403,13 @@ Like with querying by study ID, you can also pull data individually by genomic d ```r mut_only <- get_mutations_by_sample(sample_study_pairs = df_pairs) #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_mutations and prad_msk_2019_mutations +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: +#> blca_nmibc_2017_mutations and +#> prad_msk_2019_mutations #> Genes: "All available genes" identical(mut_only, mut_sample) @@ -372,15 +451,23 @@ When pulling by sample IDs, we can also limit our results to a specific set of g ```r by_hugo <- get_mutations_by_sample(sample_study_pairs = df_pairs, genes = "TP53") #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_mutations and prad_msk_2019_mutations +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: +#> blca_nmibc_2017_mutations and +#> prad_msk_2019_mutations #> Genes: "TP53" by_gene_id <- get_mutations_by_sample(sample_study_pairs = df_pairs, genes = 7157) #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_mutations and prad_msk_2019_mutations +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: +#> blca_nmibc_2017_mutations and +#> prad_msk_2019_mutations #> Genes: 7157 identical(by_hugo, by_gene_id) @@ -394,11 +481,16 @@ get_mutations_by_sample( panel = "IMPACT468") %>% head() #> Joining with `by = join_by(study_id)` -#> The following parameters were used in query: -#> Study ID: "blca_nmibc_2017" and "prad_msk_2019" -#> Molecular Profile ID: blca_nmibc_2017_mutations and prad_msk_2019_mutations +#> The following parameters were used in +#> query: +#> Study ID: "blca_nmibc_2017" and +#> "prad_msk_2019" +#> Molecular Profile ID: +#> blca_nmibc_2017_mutations and +#> prad_msk_2019_mutations #> Genes: "IMPACT468" #> # A tibble: 6 × 28 +<<<<<<< HEAD #> hugoGeneSymbol entrezGeneId uniqueSampleKey uniquePatientKey molecularProfileId sampleId patientId studyId #> #> 1 TERT 7015 UC0wMDAxNDUzLVQwM… UC0wMDAxNDUzOmJ… blca_nmibc_2017_m… P-00014… P-0001453 blca_n… @@ -412,6 +504,23 @@ get_mutations_by_sample( #> # referenceAllele , proteinChange , mutationType , ncbiBuild , variantType , #> # chr , variantAllele , refseqMrnaId , proteinPosStart , proteinPosEnd , #> # keyword +======= +#> hugoGeneSymbol entrezGeneId +#> +#> 1 SMAD4 4089 +#> 2 TERT 7015 +#> 3 ERBB4 2066 +#> 4 CUL3 8452 +#> 5 PBRM1 55193 +#> 6 APC 324 +#> # ℹ 26 more variables: +#> # uniqueSampleKey , +#> # uniquePatientKey , +#> # molecularProfileId , +#> # sampleId , patientId , +#> # studyId , center , +#> # mutationStatus , … +>>>>>>> 74ef3b3deea09a4e124a8bb14badeb476cf7816a ``` ## Pulling Clinical Data & Sample Metadata @@ -427,6 +536,7 @@ attr_prad <- available_clinical_attributes("prad_msk_2019") attr_prad #> # A tibble: 13 × 7 +<<<<<<< HEAD #> displayName description datatype patientAttribute priority clinicalAttributeId studyId #> #> 1 Cancer Type Cancer Type STRING FALSE 1 CANCER_TYPE prad_m… @@ -442,6 +552,28 @@ attr_prad #> 11 Somatic Status Somatic Status STRING FALSE 1 SOMATIC_STATUS prad_m… #> 12 Specimen Preservation Type The method use… STRING FALSE 1 SPECIMEN_PRESERVAT… prad_m… #> 13 TMB (nonsynonymous) TMB (nonsynony… NUMBER FALSE 1 TMB_NONSYNONYMOUS prad_m… +======= +#> displayName description datatype +#> +#> 1 Cancer Type Cancer Type STRING +#> 2 Cancer Type Detai… Cancer Typ… STRING +#> 3 Fraction Genome A… Fraction G… NUMBER +#> 4 Gene Panel Gene Panel. STRING +#> 5 Mutation Count Mutation C… NUMBER +#> 6 Oncotree Code Oncotree C… STRING +#> 7 Sample Class The sample… STRING +#> 8 Number of Samples… Number of … STRING +#> 9 Sample Type The type o… STRING +#> 10 Sex Sex STRING +#> 11 Somatic Status Somatic St… STRING +#> 12 Specimen Preserva… The method… STRING +#> 13 TMB (nonsynonymou… TMB (nonsy… NUMBER +#> # ℹ 4 more variables: +#> # patientAttribute , +#> # priority , +#> # clinicalAttributeId , +#> # studyId +>>>>>>> 74ef3b3deea09a4e124a8bb14badeb476cf7816a ``` There are a select set available for both studies: @@ -469,14 +601,17 @@ all_clinical %>% select(-contains("unique")) %>% head() #> # A tibble: 6 × 5 -#> sampleId patientId studyId clinicalAttributeId value -#> -#> 1 P-0001453-T01-IM3 P-0001453 blca_nmibc_2017 CANCER_TYPE Bladder Cancer -#> 2 P-0001453-T01-IM3 P-0001453 blca_nmibc_2017 CANCER_TYPE_DETAILED Bladder Urothelial Carcinoma -#> 3 P-0001453-T01-IM3 P-0001453 blca_nmibc_2017 FRACTION_GENOME_ALTERED 0.4448 -#> 4 P-0001453-T01-IM3 P-0001453 blca_nmibc_2017 MUTATION_COUNT 11 -#> 5 P-0001453-T01-IM3 P-0001453 blca_nmibc_2017 ONCOTREE_CODE BLCA -#> 6 P-0001453-T01-IM3 P-0001453 blca_nmibc_2017 SOMATIC_STATUS Matched +#> sampleId patientId studyId +#> +#> 1 P-0001453-T01-IM3 P-0001453 blca_nmibc_… +#> 2 P-0001453-T01-IM3 P-0001453 blca_nmibc_… +#> 3 P-0001453-T01-IM3 P-0001453 blca_nmibc_… +#> 4 P-0001453-T01-IM3 P-0001453 blca_nmibc_… +#> 5 P-0001453-T01-IM3 P-0001453 blca_nmibc_… +#> 6 P-0001453-T01-IM3 P-0001453 blca_nmibc_… +#> # ℹ 2 more variables: +#> # clinicalAttributeId , +#> # value ``` The below pulls data at the patient level: @@ -499,14 +634,15 @@ all_clinical %>% select(-contains("unique")) %>% head() #> # A tibble: 6 × 4 -#> patientId studyId clinicalAttributeId value -#> -#> 1 P-0001453 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 2 P-0001453 blca_nmibc_2017 SEX Male -#> 3 P-0002166 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 4 P-0002166 blca_nmibc_2017 SEX Male -#> 5 P-0003238 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 6 P-0003238 blca_nmibc_2017 SEX Male +#> patientId studyId clinicalAttributeId +#> +#> 1 P-0001453 blca_nmib… SAMPLE_COUNT +#> 2 P-0001453 blca_nmib… SEX +#> 3 P-0002166 blca_nmib… SAMPLE_COUNT +#> 4 P-0002166 blca_nmib… SEX +#> 5 P-0003238 blca_nmib… SAMPLE_COUNT +#> 6 P-0003238 blca_nmib… SEX +#> # ℹ 1 more variable: value ``` Like with the genomic data pull functions, you can also pull clinical data by a data frame of sample ID - study ID pairs, or a data frame of patient ID - study ID pairs. Below, we will pull by patient ID - study ID pairs. @@ -532,17 +668,18 @@ all_patient_clinical <- get_clinical_by_patient(patient_study_pairs = df_pairs, all_patient_clinical %>% select(-contains("unique")) #> # A tibble: 34 × 4 -#> patientId studyId clinicalAttributeId value -#> -#> 1 P-0001453 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 2 P-0001453 blca_nmibc_2017 SEX Male -#> 3 P-0002166 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 4 P-0002166 blca_nmibc_2017 SEX Male -#> 5 P-0003238 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 6 P-0003238 blca_nmibc_2017 SEX Male -#> 7 P-0003257 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 8 P-0003257 blca_nmibc_2017 SEX Female -#> 9 P-0003261 blca_nmibc_2017 SAMPLE_COUNT 1 -#> 10 P-0003261 blca_nmibc_2017 SEX Male +#> patientId studyId clinicalAttributeId +#> +#> 1 P-0001453 blca_nmi… SAMPLE_COUNT +#> 2 P-0001453 blca_nmi… SEX +#> 3 P-0002166 blca_nmi… SAMPLE_COUNT +#> 4 P-0002166 blca_nmi… SEX +#> 5 P-0003238 blca_nmi… SAMPLE_COUNT +#> 6 P-0003238 blca_nmi… SEX +#> 7 P-0003257 blca_nmi… SAMPLE_COUNT +#> 8 P-0003257 blca_nmi… SEX +#> 9 P-0003261 blca_nmi… SAMPLE_COUNT +#> 10 P-0003261 blca_nmi… SEX #> # ℹ 24 more rows +#> # ℹ 1 more variable: value ```