diff --git a/DESCRIPTION b/DESCRIPTION index 3e63c01..3180928 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BayesSurvive Title: Bayesian Survival Models for High-Dimensional Data -Version: 0.0.4 -Date: 2024-07-10 +Version: 0.0.5 +Date: 2024-09-27 Authors@R: c(person("Zhi", "Zhao", role=c("aut","cre"), email = "zhi.zhao@medisin.uio.no"), person("Waldir", "Leoncio", role=c("aut")), person("Katrin", "Madjar", role=c("aut")), @@ -22,7 +22,7 @@ Imports: Rcpp, ggplot2, GGally, mvtnorm, survival, riskRegression, Suggests: knitr, testthat LazyData: true NeedsCompilation: yes -Packaged: 2024-07-10 08:34:27 UTC; zhiz +Packaged: 2024-09-27 15:26:16 UTC; zhiz Author: Zhi Zhao [aut, cre], Waldir Leoncio [aut], Katrin Madjar [aut], diff --git a/NEWS.md b/NEWS.md index b9218f2..2dd9ad5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * Rename the output of MPM coefficients in function `coef.BayesSurvive()` * Added `cpp` argument to `BayesSurvive()` to allow for faster computation using `Rcpp` * Added validation to some `BayesSurvive()` arguments +* Allow function `VS()` for an input with a matrix or array or a list consisting of matrices/arrays # BayesSurvive 0.0.3 diff --git a/R/BayesSurvive-package.R b/R/BayesSurvive-package.R index 76d5897..551d32c 100755 --- a/R/BayesSurvive-package.R +++ b/R/BayesSurvive-package.R @@ -14,7 +14,6 @@ NULL .onLoad <- function(libname, pkgname) { # CRAN OMP THREAD LIMIT Sys.setenv("OMP_THREAD_LIMIT" = 1) - } -## nocov end \ No newline at end of file +## nocov end diff --git a/R/RcppExports.R b/R/RcppExports.R index cbb6c63..0195cc2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,14 +9,6 @@ settingInterval_cpp <- function(y, delta_, s_, J_) { .Call(`_BayesSurvive_settingInterval_cpp`, y, delta_, s_, J_) } -matProdVec <- function(x, y) { - .Call(`_BayesSurvive_matProdVec`, x, y) -} - -sumMatProdVec <- function(x, y) { - .Call(`_BayesSurvive_sumMatProdVec`, x, y) -} - updateBH_cpp <- function(x_, beta_, J_, ind_r_d_, hPriorSh_, d_, c0_) { .Call(`_BayesSurvive_updateBH_cpp`, x_, beta_, J_, ind_r_d_, hPriorSh_, d_, c0_) } diff --git a/R/VS.R b/R/VS.R index a2a3f88..d5b5503 100644 --- a/R/VS.R +++ b/R/VS.R @@ -8,7 +8,8 @@ #' #' @name VS #' -#' @param x fitted object obtained with \code{BayesSurvive} +#' @param x fitted object obtained with \code{BayesSurvive}, or a matrix/array, +#' or a list consisting of matrices and arrays #' @param method variable selection method to choose from #' \code{c("CI", "SNC", "MPM", "FDR")}. Default is "FDR" #' @param threshold SNC threshold value (default 0.5) or the Bayesian expected false @@ -66,65 +67,202 @@ #' #' @export VS <- function(x, method = "FDR", threshold = NA, subgroup = 1) { - if (!inherits(x, "BayesSurvive")) { - stop("Use only with 'BayesSurvive' object!") + if (!(inherits(x, "BayesSurvive") || is.array(x) || is.list(x))) { + stop("Use only with 'BayesSurvive' object or a matrix or an array or a list!") } - if (!method %in% c("CI", "SNC", "SNC-BIC", "MPM", "FDR")) { - stop("'method' should be one of c('CI', 'SNC', 'SNC-BIC', 'MPM', 'FDR')!") + if (!inherits(x, "BayesSurvive") && is.list(x)) { + if (any(!sapply(x, function(xx) is.array(xx)))) { + stop("The list input has to consist of matrices and/or arrays!") + } + # If the input is a matrix or array, reformat it to be an list + x <- list(x) + } + if (!method %in% c("CI", "SNC", "MPM", "FDR")) { # "SNC-BIC", + stop("'method' should be one of c('CI', 'SNC', 'MPM', 'FDR')!") } if (method == "CI") { - betas <- coef.BayesSurvive(x, - type = "mean", CI = 95, - subgroup = subgroup - ) - ret <- (betas$CI.lower > 0) | (betas$CI.upper < 0) + # for an input from "BayesSurvive" + if (inherits(x, "BayesSurvive")) { + betas <- coef.BayesSurvive(x, + type = "mean", CI = 95, + subgroup = subgroup + ) + ret <- (betas$CI.lower > 0) | (betas$CI.upper < 0) + } else { + # for an output from a matrix or array or list + if (!is.list(x)) { + betas <- list() + # if (is.matrix(x)) { + # betas$CI.lower <- apply(x, 2, quantile, 0.025) + # betas$CI.upper <- apply(x, 2, quantile, 0.975) + # } + # if (is.array(x)) { + # betas$CI.lower <- apply(x, c(2,3), quantile, 0.025) + # betas$CI.upper <- apply(x, c(2,3), quantile, 0.975) + # } + # ret <- (betas$CI.lower > 0) | (betas$CI.upper < 0) + } else { + betas <- rep(list(NULL), length(x)) + ret <- rep(list(NULL), length(x)) + for (l in seq_len(length(x))) { + # if (length(dim(x[[l]])) == 2) { + # betas[[l]]$CI.lower <- apply(x[[l]], 2, quantile, 0.025) + # betas[[l]]$CI.upper <- apply(x[[l]], 2, quantile, 0.975) + # } + # if (length(dim(x[[l]])) == 3) { + # betas[[l]]$CI.lower <- apply(x[[l]], c(2,3), quantile, 0.025) + # betas[[l]]$CI.upper <- apply(x[[l]], c(2,3), quantile, 0.975) + # } + betas[[l]]$CI.lower <- apply(x[[l]], seq_len(length(dim(x[[l]])))[-1], quantile, 0.025) + betas[[l]]$CI.upper <- apply(x[[l]], seq_len(length(dim(x[[l]])))[-1], quantile, 0.975) + ret[[l]] <- (betas[[l]]$CI.lower > 0) | (betas[[l]]$CI.upper < 0) + } + } + } } if (method == "SNC") { if (is.na(threshold)) { threshold <- 0.5 } - if (x$input$S > 1 || !x$input$MRF.G) { - x$output$beta.p <- x$output$beta.p[[subgroup]] - } - beta_p <- x$output$beta.p[-(1:(x$input$burnin / x$input$thin + 1)), ] - ret <- rep(FALSE, NCOL(beta_p)) + if (inherits(x, "BayesSurvive")) { + if (x$input$S > 1 || !x$input$MRF.G) { + x$output$beta.p <- x$output$beta.p[[subgroup]] + } + beta_p <- x$output$beta.p[-(1:(x$input$burnin / x$input$thin + 1)), ] + + ret <- rep(FALSE, NCOL(beta_p)) - for (j in seq_len(NCOL(beta_p))) { - if (sum(abs(beta_p[, j]) > sd(beta_p[, j])) / NROW(beta_p) > threshold) { - ret[j] <- TRUE + for (j in seq_len(NCOL(beta_p))) { + if (sum(abs(beta_p[, j]) > sd(beta_p[, j])) / NROW(beta_p) > threshold) { + ret[j] <- TRUE + } + } + } else { + # count the total number of parameters in the list + total_num <- 0 + for (l in seq_len(length(x))) { + total_num <- total_num + prod(dim(x[[l]])[-1]) + } + + ret <- rep(list(NULL), length(x)) + for (l in seq_len(length(x))) { + # define the size of each component in the list + ret[[l]] <- array(FALSE, dim = dim(x[[l]])[-1]) + + if (is.matrix(x[[l]])) { # for an matrix + for (j in seq_len(NCOL(x[[l]]))) { + if (sum(abs(x[[l]][, j]) > sd(x[[l]][, j])) / total_num > threshold) { + ret[[l]][j] <- TRUE + } + } + } else { # for an array + for (j in seq_len(dim(x[[l]])[2])) { + for (k in seq_len(dim(x[[l]])[3])) { + if (sum(abs(x[[l]][, j, k]) > sd(x[[l]][, j, k])) / total_num > threshold) { + ret[[l]][j, k] <- TRUE + } + } + } + } } } } if (method == "MPM") { - if (x$input$S > 1 || !x$input$MRF.G) { - x$output$gamma.margin <- x$output$gamma.margin[[subgroup]] + if (inherits(x, "BayesSurvive")) { + if (x$input$S > 1 || !x$input$MRF.G) { + x$output$gamma.margin <- x$output$gamma.margin[[subgroup]] + } + ret <- (x$output$gamma.margin >= 0.5) + } else { + ret <- rep(list(NULL), length(x)) + + for (l in seq_len(length(x))) { + # define the size of each component in the list + ret[[l]] <- array(FALSE, dim = dim(x[[l]])[-1]) + + if (is.matrix(x[[l]])) { # for an matrix + for (j in seq_len(NCOL(x[[l]]))) { + ret[[l]][j] <- (mean(x[[l]][, j] != 0) >= 0.5) + } + } else { # for an array + for (j in seq_len(dim(x[[l]])[2])) { + for (k in seq_len(dim(x[[l]])[3])) { + ret[[l]][j, k] <- (mean(x[[l]][, j, k] != 0) >= 0.5) + } + } + } + } } - ret <- (x$output$gamma.margin >= 0.5) } if (method == "FDR") { if (is.na(threshold)) { threshold <- 0.05 } - if (x$input$S > 1 || !x$input$MRF.G) { - x$output$gamma.margin <- x$output$gamma.margin[[subgroup]] - } - gammas <- x$output$gamma.margin - - sorted_gammas <- sort(gammas, decreasing = TRUE) - # computing the fdr - fdr <- cumsum((1 - sorted_gammas)) / seq_len(length(sorted_gammas)) - # determine index of the largest fdr less than threshold - if (min(fdr) >= threshold) { - ret <- rep(FALSE, length(gammas)) + if (inherits(x, "BayesSurvive")) { + if (x$input$S > 1 || !x$input$MRF.G) { + x$output$gamma.margin <- x$output$gamma.margin[[subgroup]] + } + gammas <- x$output$gamma.margin + + sorted_gammas <- sort(gammas, decreasing = TRUE) + # computing the fdr + fdr <- cumsum((1 - sorted_gammas)) / seq_len(length(sorted_gammas)) + # determine index of the largest fdr less than threshold + if (min(fdr) >= threshold) { + ret <- rep(FALSE, length(gammas)) + } else { + thecut.index <- max(which(fdr < threshold)) + gammas_threshold <- sorted_gammas[thecut.index] + ret <- (gammas > gammas_threshold) + } } else { - thecut.index <- max(which(fdr < threshold)) - gammas_threshold <- sorted_gammas[thecut.index] - ret <- (gammas > gammas_threshold) + ret <- gammas <- rep(list(NULL), length(x)) + gammas_vec <- NULL + + # compute mPIPs + for (l in seq_len(length(x))) { + # define the size of each component in the list + gammas[[l]] <- array(FALSE, dim = dim(x[[l]])[-1]) + + if (is.matrix(x[[l]])) { # for an matrix + for (j in seq_len(NCOL(x[[l]]))) { + gammas[[l]][j] <- mean(x[[l]][, j] != 0) + } + } else { # for an array + for (j in seq_len(dim(x[[l]])[2])) { + for (k in seq_len(dim(x[[l]])[3])) { + gammas[[l]][j, k] <- mean(x[[l]][, j, k] != 0) + } + } + } + # save all mPIPs into a vector + gammas_vec <- c(gammas_vec, as.vector(gammas[[l]])) + } + + sorted_gammas <- sort(gammas_vec, decreasing = TRUE) + # computing the fdr + fdr <- cumsum((1 - sorted_gammas)) / seq_len(length(sorted_gammas)) + # determine index of the largest fdr less than threshold + if (min(fdr) >= threshold) { + ret <- rep(FALSE, length(gammas_vec)) + } else { + thecut.index <- max(which(fdr < threshold)) + gammas_threshold <- sorted_gammas[thecut.index] + ret_vec <- (gammas_vec > gammas_threshold) + } + + # reformat the results into a list + for (l in seq_len(length(x))) { + ret[[l]] <- array(FALSE, dim = dim(x[[l]])[-1]) + len <- prod(dim(x[[l]])[-1]) + ret[[l]] <- array(ret_vec[1:len], dim = dim(x[[l]])[-1]) + ret_vec <- ret_vec[-c(1:len)] + } } } diff --git a/R/coef.BayesSurvive.R b/R/coef.BayesSurvive.R index 7e06036..25d5350 100644 --- a/R/coef.BayesSurvive.R +++ b/R/coef.BayesSurvive.R @@ -105,7 +105,7 @@ coef.BayesSurvive <- function(object, MPM = FALSE, type = "mean", CI = 95, ) names(tbl)[2] <- type if (SD) tbl <- data.frame(tbl, SD = apply(beta_p, 2, sd)) - #tbl$term <- factor(tbl$term, levels = tbl$term) + # tbl$term <- factor(tbl$term, levels = tbl$term) } else { # MPM coefficients if (object$input$S > 1 || !object$input$MRF.G) { @@ -121,11 +121,10 @@ coef.BayesSurvive <- function(object, MPM = FALSE, type = "mean", CI = 95, term = x_names, estimate = object$output$beta.margin ) - tbl$estimate <- tbl$estimate / object$output$gamma.margin * + tbl$estimate <- tbl$estimate / object$output$gamma.margin * (object$output$gamma.margin >= 0.5) tbl$estimate[is.na(tbl$estimate)] <- 0 } tbl - } diff --git a/R/plot.BayesSurvive.R b/R/plot.BayesSurvive.R index 2ed72df..f0ffc29 100644 --- a/R/plot.BayesSurvive.R +++ b/R/plot.BayesSurvive.R @@ -107,9 +107,8 @@ plot.BayesSurvive <- function(x, type = "mean", interval = TRUE, # pdf("psbcBeta.pdf", height = 5, width = 3.5) # Sys.setenv(`_R_S3_METHOD_REGISTRATION_NOTE_OVERWRITES_` = "false") - pCoef <- ggcoef(tbl, conf.int = interval, ...) + + pCoef <- ggcoef(tbl, conf.int = interval, ...) + xlab(expression(Posterior ~ ~beta)) + ylab("") pCoef # dev.off() - } diff --git a/R/predict.BayesSurvive.R b/R/predict.BayesSurvive.R index 7c81246..982e27e 100644 --- a/R/predict.BayesSurvive.R +++ b/R/predict.BayesSurvive.R @@ -28,10 +28,10 @@ #' @param \dots not used #' #' @return A list object with 5 components if \code{type="brier"} including -#'"model", "times", "Brier", "IBS" and "IPA" (Index of Prediction Accuracy), +#' "model", "times", "Brier", "IBS" and "IPA" (Index of Prediction Accuracy), #' otherwise a list of 7 components with the first component as -#' the specified argument \code{type} and "se", "band", "type", "diag", -#' "baseline" and "times", see function \code{riskRegression::predictCox} for +#' the specified argument \code{type} and "se", "band", "type", "diag", +#' "baseline" and "times", see function \code{riskRegression::predictCox} for #' details #' #' @examples @@ -215,7 +215,7 @@ predict.BayesSurvive <- function(object, survObj.new, type = "brier", metrics = "brier", summary = c("ibs", "ipa"), null.model = TRUE, times = times )$Brier$score - + # Brier <- BrierScore[BrierScore$model != "Null model", ] # extract scores for Null model and the Bayesian Cox model ibs[1, ] <- Brier$Brier[c(length(times), length(times) * 2)] @@ -251,7 +251,7 @@ predict.BayesSurvive <- function(object, survObj.new, type = "brier", # Brier scores of NULL.model do not change Brier.null <- BrierScore[1:(nrow(BrierScore) / 2), -c(1:2)] Brier <- BrierScore[-c(1:(nrow(BrierScore) / 2)), -c(1:2)] - + # calculate Brier scores based on other MCMC estimates for (i in 2:nrow(betas)) { data_train$lp <- survObj$lp.all[, i] # lp_all_train[, i] @@ -271,18 +271,20 @@ predict.BayesSurvive <- function(object, survObj.new, type = "brier", Brier <- Brier + BrierScore[-c(1:(nrow(BrierScore) / 2)), -c(1:2)] } Brier <- rbind(Brier.null, Brier / nrow(betas)) - + # extract IBS for Null model and the Bayesian Cox model ibs[1, ] <- Brier$Brier[c(length(times), length(times) * 2)] ibs[2, ] <- Brier$IBS[c(length(times), length(times) * 2)] ibs[3, ] <- Brier$IPA[c(length(times), length(times) * 2)] } - - #ibs <- rbind(rep(max(times), 2), ibs) + + # ibs <- rbind(rep(max(times), 2), ibs) t0 <- round(max(times), digits = 1) - rownames(ibs) <- c(paste0("Brier(t=", t0, ")"), - paste0("IBS(t:0~", t0, ")"), - paste0("IPA(t=", t0, ")")) + rownames(ibs) <- c( + paste0("Brier(t=", t0, ")"), + paste0("IBS(t:0~", t0, ")"), + paste0("IPA(t=", t0, ")") + ) if (verbose) { # cat(" IBS\n", # " Null model ", ibs[1, 1], @@ -294,5 +296,4 @@ predict.BayesSurvive <- function(object, survObj.new, type = "brier", invisible(Brier) # return(Brier) } - } diff --git a/build/partial.rdb b/build/partial.rdb new file mode 100644 index 0000000..939802b Binary files /dev/null and b/build/partial.rdb differ diff --git a/build/vignette.rds b/build/vignette.rds new file mode 100644 index 0000000..1ba30b1 Binary files /dev/null and b/build/vignette.rds differ diff --git a/inst/doc/BayesCox.R b/inst/doc/BayesCox.R new file mode 100644 index 0000000..0402d91 --- /dev/null +++ b/inst/doc/BayesCox.R @@ -0,0 +1,4 @@ +## ----setup, include=FALSE----------------------------------------------------- +knitr::opts_chunk$set(echo = TRUE, eval = FALSE) +options(rmarkdown.html_vignette.check_title = FALSE) + diff --git a/inst/doc/BayesCox.Rmd b/inst/doc/BayesCox.Rmd new file mode 100755 index 0000000..1dd7402 --- /dev/null +++ b/inst/doc/BayesCox.Rmd @@ -0,0 +1,165 @@ +--- +title: "Bayesian Cox Models with graph-structure priors" +output: rmarkdown::html_vignette +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{Bayesian Cox Models with graph-structure priors} + \usepackage[utf8]{inputenc} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, eval = FALSE) +options(rmarkdown.html_vignette.check_title = FALSE) +``` + +This is a R/Rcpp package **BayesSurvive** for Bayesian survival models with graph-structured selection priors for sparse identification of high-dimensional features predictive of survival ([Madjar et al., 2021](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-021-04483-z)) and its extensions with the use of a fixed graph via a Markov Random Field (MRF) prior for capturing known structure of high-dimensional features, e.g. disease-specific pathways from the Kyoto Encyclopedia of Genes and Genomes (KEGG) database. + +## Installation + +Install the latest released version from [CRAN](https://CRAN.R-project.org/package=BayesSurvive) + +```r +install.packages("BayesSurvive") +``` + +Install the latest development version from [GitHub](https://github.com/ocbe-uio/BayesSurvive) + +```r +#install.packages("remotes") +remotes::install_github("ocbe-uio/BayesSurvive") +``` + + +## Examples + +### Simulate data + +```r +library("BayesSurvive") +# Load the example dataset +data("simData", package = "BayesSurvive") +dataset = list("X" = simData[[1]]$X, + "t" = simData[[1]]$time, + "di" = simData[[1]]$status) +``` + +### Run a Bayesian Cox model + +```r +## Initial value: null model without covariates +initial = list("gamma.ini" = rep(0, ncol(dataset$X))) +# Prior parameters +hyperparPooled = list( + "c0" = 2, # prior of baseline hazard + "tau" = 0.0375, # sd (spike) for coefficient prior + "cb" = 20, # sd (slab) for coefficient prior + "pi.ga" = 0.02, # prior variable selection probability for standard Cox models + "a" = -4, # hyperparameter in MRF prior + "b" = 0.1, # hyperparameter in MRF prior + "G" = simData$G # hyperparameter in MRF prior +) + +## run Bayesian Cox with graph-structured priors +set.seed(123) +fit <- BayesSurvive(survObj = dataset, model.type = "Pooled", MRF.G = TRUE, + hyperpar = hyperparPooled, initial = initial, + nIter = 200, burnin = 100) + +## show posterior mean of coefficients and 95% credible intervals +library("GGally") +plot(fit) + + coord_flip() + + theme(axis.text.x = element_text(angle = 90, size = 7)) +``` + + + +Show the index of selected variables by controlling Bayesian false discovery rate (FDR) at the level $\alpha = 0.05$ + +```r +which( VS(fit, method = "FDR", threshold = 0.05) ) +``` +```{ .text .no-copy } +#[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 194 +``` + + +### Plot time-dependent Brier scores + +The function `BayesSurvive::plotBrier()` can show the time-dependent Brier scores based on posterior mean of coefficients or Bayesian model averaging. + +```r +plotBrier(fit, survObj.new = dataset) +``` + + + +We can also use the function `BayesSurvive::predict()` to obtain the Brier score at time 8.5, the integrated Brier score (IBS) from time 0 to 8.5 and the index of prediction accuracy (IPA). + +```r +predict(fit, survObj.new = dataset, times = 8.5) +``` +```{ .text .no-copy } +## Brier(t=8.5) IBS(t:0~8.5) IPA(t=8.5) +## Null.model 0.2290318 0.08185316 0.0000000 +## Bayesian.Cox 0.1013692 0.02823275 0.5574011 +``` + +### Predict survival probabilities and cumulative hazards + +The function `BayesSurvive::predict()` can estimate the survival probabilities and cumulative hazards. + +```r +predict(fit, survObj.new = dataset, type = c("cumhazard", "survival")) +``` +```{ .text .no-copy } +# observation times cumhazard survival +## +## 1: 1 3.3 7.41e-05 1.00e+00 +## 2: 2 3.3 2.51e-01 7.78e-01 +## 3: 3 3.3 9.97e-07 1.00e+00 +## 4: 4 3.3 1.84e-03 9.98e-01 +## 5: 5 3.3 3.15e-04 1.00e+00 +## --- +## 9996: 96 9.5 7.15e+00 7.88e-04 +## 9997: 97 9.5 3.92e+02 7.59e-171 +## 9998: 98 9.5 2.81e+00 6.02e-02 +## 9999: 99 9.5 3.12e+00 4.42e-02 +## 10000: 100 9.5 1.97e+01 2.79e-09 +``` + +### Run a 'Pooled' Bayesian Cox model with graphical learning + +```r +hyperparPooled <- append(hyperparPooled, list("lambda" = 3, "nu0" = 0.05, "nu1" = 5)) +fit2 <- BayesSurvive(survObj = list(dataset), model.type = "Pooled", MRF.G = FALSE, + hyperpar = hyperparPooled, initial = initial, nIter = 10) +``` + +### Run a Bayesian Cox model with subgroups using fixed graph + +```r +# specify a fixed joint graph between two subgroups +hyperparPooled$G <- Matrix::bdiag(simData$G, simData$G) +dataset2 <- simData[1:2] +dataset2 <- lapply(dataset2, setNames, c("X", "t", "di", "X.unsc", "trueB")) +fit3 <- BayesSurvive(survObj = dataset2, + hyperpar = hyperparPooled, initial = initial, + model.type="CoxBVSSL", MRF.G = TRUE, + nIter = 10, burnin = 5) +``` + +### Run a Bayesian Cox model with subgroups using graphical learning + +```r +fit4 <- BayesSurvive(survObj = dataset2, + hyperpar = hyperparPooled, initial = initial, + model.type="CoxBVSSL", MRF.G = FALSE, + nIter = 3, burnin = 0) +``` + +## References + +> Katrin Madjar, Manuela Zucknick, Katja Ickstadt, Jörg Rahnenführer (2021). +> Combining heterogeneous subgroups with graph‐structured variable selection priors for Cox regression. +> _BMC Bioinformatics_, 22(1):586. DOI: [10.1186/s12859-021-04483-z](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-021-04483-z). diff --git a/inst/doc/BayesCox.html b/inst/doc/BayesCox.html new file mode 100644 index 0000000..5dac1a4 --- /dev/null +++ b/inst/doc/BayesCox.html @@ -0,0 +1,489 @@ + + + + + + + + + + + + + + +Bayesian Cox Models with graph-structure priors + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bayesian Cox Models with graph-structure +priors

+ + + +

This is a R/Rcpp package BayesSurvive for Bayesian +survival models with graph-structured selection priors for sparse +identification of high-dimensional features predictive of survival (Madjar +et al., 2021) and its extensions with the use of a fixed graph via a +Markov Random Field (MRF) prior for capturing known structure of +high-dimensional features, e.g. disease-specific pathways from the Kyoto +Encyclopedia of Genes and Genomes (KEGG) database.

+
+

Installation

+

Install the latest released version from CRAN

+
install.packages("BayesSurvive")
+

Install the latest development version from GitHub

+
#install.packages("remotes")
+remotes::install_github("ocbe-uio/BayesSurvive")
+
+
+

Examples

+
+

Simulate data

+
library("BayesSurvive")
+# Load the example dataset
+data("simData", package = "BayesSurvive")
+dataset = list("X" = simData[[1]]$X, 
+               "t" = simData[[1]]$time,
+               "di" = simData[[1]]$status)
+
+
+

Run a Bayesian Cox model

+
## Initial value: null model without covariates
+initial = list("gamma.ini" = rep(0, ncol(dataset$X)))
+# Prior parameters
+hyperparPooled = list(
+  "c0"     = 2,                      # prior of baseline hazard
+  "tau"    = 0.0375,                 # sd (spike) for coefficient prior
+  "cb"     = 20,                     # sd (slab) for coefficient prior
+  "pi.ga"  = 0.02,                   # prior variable selection probability for standard Cox models
+  "a"      = -4,                     # hyperparameter in MRF prior
+  "b"      = 0.1,                    # hyperparameter in MRF prior
+  "G"      = simData$G               # hyperparameter in MRF prior
+)   
+
+## run Bayesian Cox with graph-structured priors
+set.seed(123)
+fit <- BayesSurvive(survObj = dataset, model.type = "Pooled", MRF.G = TRUE, 
+                    hyperpar = hyperparPooled, initial = initial, 
+                    nIter = 200, burnin = 100)
+
+## show posterior mean of coefficients and 95% credible intervals
+library("GGally")
+plot(fit) + 
+  coord_flip() + 
+  theme(axis.text.x = element_text(angle = 90, size = 7))
+

+

Show the index of selected variables by controlling Bayesian false +discovery rate (FDR) at the level \(\alpha = +0.05\)

+
which( VS(fit, method = "FDR", threshold = 0.05) )
+
#[1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 194
+
+
+

Plot time-dependent Brier scores

+

The function BayesSurvive::plotBrier() can show the +time-dependent Brier scores based on posterior mean of coefficients or +Bayesian model averaging.

+
plotBrier(fit, survObj.new = dataset)
+

+

We can also use the function BayesSurvive::predict() to +obtain the Brier score at time 8.5, the integrated Brier score (IBS) +from time 0 to 8.5 and the index of prediction accuracy (IPA).

+
predict(fit, survObj.new = dataset, times = 8.5)
+
##               Brier(t=8.5) IBS(t:0~8.5) IPA(t=8.5)
+## Null.model      0.2290318   0.08185316  0.0000000
+## Bayesian.Cox    0.1013692   0.02823275  0.5574011
+
+
+

Predict survival probabilities and cumulative hazards

+

The function BayesSurvive::predict() can estimate the +survival probabilities and cumulative hazards.

+
predict(fit, survObj.new = dataset, type = c("cumhazard", "survival"))
+
#        observation times cumhazard  survival
+##              <int> <num>     <num>     <num>
+##     1:           1   3.3  7.41e-05  1.00e+00
+##     2:           2   3.3  2.51e-01  7.78e-01
+##     3:           3   3.3  9.97e-07  1.00e+00
+##     4:           4   3.3  1.84e-03  9.98e-01
+##     5:           5   3.3  3.15e-04  1.00e+00
+##    ---                                      
+##  9996:          96   9.5  7.15e+00  7.88e-04
+##  9997:          97   9.5  3.92e+02 7.59e-171
+##  9998:          98   9.5  2.81e+00  6.02e-02
+##  9999:          99   9.5  3.12e+00  4.42e-02
+## 10000:         100   9.5  1.97e+01  2.79e-09
+
+
+

Run a ‘Pooled’ Bayesian Cox model with graphical learning

+
hyperparPooled <- append(hyperparPooled, list("lambda" = 3, "nu0" = 0.05, "nu1" = 5))
+fit2 <- BayesSurvive(survObj = list(dataset), model.type = "Pooled", MRF.G = FALSE,
+                     hyperpar = hyperparPooled, initial = initial, nIter = 10)
+
+
+

Run a Bayesian Cox model with subgroups using fixed graph

+
# specify a fixed joint graph between two subgroups
+hyperparPooled$G <- Matrix::bdiag(simData$G, simData$G)
+dataset2 <- simData[1:2]
+dataset2 <- lapply(dataset2, setNames, c("X", "t", "di", "X.unsc", "trueB"))
+fit3 <- BayesSurvive(survObj = dataset2, 
+                     hyperpar = hyperparPooled, initial = initial, 
+                     model.type="CoxBVSSL", MRF.G = TRUE, 
+                     nIter = 10, burnin = 5)
+
+
+

Run a Bayesian Cox model with subgroups using graphical +learning

+
fit4 <- BayesSurvive(survObj = dataset2, 
+                     hyperpar = hyperparPooled, initial = initial, 
+                     model.type="CoxBVSSL", MRF.G = FALSE, 
+                     nIter = 3, burnin = 0)
+
+
+
+

References

+
+

Katrin Madjar, Manuela Zucknick, Katja Ickstadt, Jörg Rahnenführer +(2021). Combining heterogeneous subgroups with graph‐structured variable +selection priors for Cox regression. BMC Bioinformatics, +22(1):586. DOI: 10.1186/s12859-021-04483-z.

+
+
+ + + + + + + + + + + diff --git a/man/VS.Rd b/man/VS.Rd index b1fee5d..1c8aae2 100644 --- a/man/VS.Rd +++ b/man/VS.Rd @@ -7,7 +7,8 @@ VS(x, method = "FDR", threshold = NA, subgroup = 1) } \arguments{ -\item{x}{fitted object obtained with \code{BayesSurvive}} +\item{x}{fitted object obtained with \code{BayesSurvive}, or a matrix/array, +or a list consisting of matrices and arrays} \item{method}{variable selection method to choose from \code{c("CI", "SNC", "MPM", "FDR")}. Default is "FDR"} diff --git a/man/predict.BayesSurvive.Rd b/man/predict.BayesSurvive.Rd index f5a101b..793b88a 100644 --- a/man/predict.BayesSurvive.Rd +++ b/man/predict.BayesSurvive.Rd @@ -43,10 +43,10 @@ Cox model} } \value{ A list object with 5 components if \code{type="brier"} including -"model", "times", "Brier", "IBS" and "IPA" (Index of Prediction Accuracy), +"model", "times", "Brier", "IBS" and "IPA" (Index of Prediction Accuracy), otherwise a list of 7 components with the first component as -the specified argument \code{type} and "se", "band", "type", "diag", -"baseline" and "times", see function \code{riskRegression::predictCox} for +the specified argument \code{type} and "se", "band", "type", "diag", +"baseline" and "times", see function \code{riskRegression::predictCox} for details } \description{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a93b284..cc51396 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -41,30 +41,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// matProdVec -arma::mat matProdVec(const arma::mat x, const arma::vec y); -RcppExport SEXP _BayesSurvive_matProdVec(SEXP xSEXP, SEXP ySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat >::type x(xSEXP); - Rcpp::traits::input_parameter< const arma::vec >::type y(ySEXP); - rcpp_result_gen = Rcpp::wrap(matProdVec(x, y)); - return rcpp_result_gen; -END_RCPP -} -// sumMatProdVec -arma::vec sumMatProdVec(const arma::mat x, const arma::vec y); -RcppExport SEXP _BayesSurvive_sumMatProdVec(SEXP xSEXP, SEXP ySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const arma::mat >::type x(xSEXP); - Rcpp::traits::input_parameter< const arma::vec >::type y(ySEXP); - rcpp_result_gen = Rcpp::wrap(sumMatProdVec(x, y)); - return rcpp_result_gen; -END_RCPP -} // updateBH_cpp arma::vec updateBH_cpp(const arma::mat x_, const arma::vec beta_, const unsigned int J_, const arma::mat ind_r_d_, const arma::vec hPriorSh_, const arma::vec d_, const double c0_); RcppExport SEXP _BayesSurvive_updateBH_cpp(SEXP x_SEXP, SEXP beta_SEXP, SEXP J_SEXP, SEXP ind_r_d_SEXP, SEXP hPriorSh_SEXP, SEXP d_SEXP, SEXP c0_SEXP) { diff --git a/src/init.c b/src/init.c index d03c0ac..acdebc3 100644 --- a/src/init.c +++ b/src/init.c @@ -3,29 +3,25 @@ #include // for NULL #include -/* FIXME: +/* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP _BayesSurvive_calJpost_helper_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _BayesSurvive_matProdVec(SEXP, SEXP); +extern SEXP _BayesSurvive_func_MCMC_graph_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP _BayesSurvive_settingInterval_cpp(SEXP, SEXP, SEXP, SEXP); -extern SEXP _BayesSurvive_sumMatProdVec(SEXP, SEXP); extern SEXP _BayesSurvive_updateBH_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP _BayesSurvive_updateBH_list_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP _BayesSurvive_updateRP_genomic_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); -extern SEXP _BayesSurvive_func_MCMC_graph_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"_BayesSurvive_calJpost_helper_cpp", (DL_FUNC) &_BayesSurvive_calJpost_helper_cpp, 9}, - {"_BayesSurvive_matProdVec", (DL_FUNC) &_BayesSurvive_matProdVec, 2}, + {"_BayesSurvive_func_MCMC_graph_cpp", (DL_FUNC) &_BayesSurvive_func_MCMC_graph_cpp, 6}, {"_BayesSurvive_settingInterval_cpp", (DL_FUNC) &_BayesSurvive_settingInterval_cpp, 4}, - {"_BayesSurvive_sumMatProdVec", (DL_FUNC) &_BayesSurvive_sumMatProdVec, 2}, {"_BayesSurvive_updateBH_cpp", (DL_FUNC) &_BayesSurvive_updateBH_cpp, 7}, {"_BayesSurvive_updateBH_list_cpp", (DL_FUNC) &_BayesSurvive_updateBH_list_cpp, 7}, {"_BayesSurvive_updateRP_genomic_cpp", (DL_FUNC) &_BayesSurvive_updateRP_genomic_cpp, 12}, - {"_BayesSurvive_func_MCMC_graph_cpp", (DL_FUNC) &_BayesSurvive_func_MCMC_graph_cpp, 6}, {NULL, NULL, 0} }; diff --git a/src/updateRP_genomic_cpp.cpp b/src/updateRP_genomic_cpp.cpp index 5c6b628..8afef17 100644 --- a/src/updateRP_genomic_cpp.cpp +++ b/src/updateRP_genomic_cpp.cpp @@ -3,7 +3,6 @@ #include "updateRP_genomic_cpp.h" -// [[Rcpp::export]] arma::mat matProdVec(const arma::mat x, const arma::vec y) { // multiply (element-wise) a matrix to a expanded vector @@ -14,7 +13,6 @@ arma::mat matProdVec(const arma::mat x, const arma::vec y) return spanMat; } -// [[Rcpp::export]] arma::vec sumMatProdVec(const arma::mat x, const arma::vec y) { // compute "arma::sum( matProdVec( ind_r_d_, exp_xbeta ).t(), 1 );" diff --git a/tests/testthat/test-func_MCMC_graph_cpp.R b/tests/testthat/test-func_MCMC_graph_cpp.R index 509f82a..9d95b9e 100644 --- a/tests/testthat/test-func_MCMC_graph_cpp.R +++ b/tests/testthat/test-func_MCMC_graph_cpp.R @@ -12,13 +12,13 @@ initial <- list("gamma.ini" = rep(0, ncol(dataset$X))) # Prior parameters hyperparPooled <- list( - "c0" = 2, # prior of baseline hazard - "tau" = 0.0375, # sd (spike) for coefficient prior - "cb" = 20, # sd (slab) for coefficient prior - "pi.ga" = 0.02, # prior variable selection probability for standard Cox models - "a" = -4, # hyperparameter in MRF prior - "b" = 0.1, # hyperparameter in MRF prior - "G" = simData$G, # hyperparameter in MRF prior + "c0" = 2, # prior of baseline hazard + "tau" = 0.0375, # sd (spike) for coefficient prior + "cb" = 20, # sd (slab) for coefficient prior + "pi.ga" = 0.02, # prior variable selection probability for standard Cox models + "a" = -4, # hyperparameter in MRF prior + "b" = 0.1, # hyperparameter in MRF prior + "G" = simData$G, # hyperparameter in MRF prior "lambda" = 3, "nu0" = 0.05, "nu1" = 5 diff --git a/tests/testthat/test-validation.R b/tests/testthat/test-validation.R index 130da2f..b18c58f 100644 --- a/tests/testthat/test-validation.R +++ b/tests/testthat/test-validation.R @@ -10,14 +10,14 @@ dataset <- list( initial <- list("gamma.ini" = rep(0, ncol(dataset$X))) # Prior parameters -hyperparPooled = list( - "c0" = 2, # prior of baseline hazard - "tau" = 0.0375, # sd (spike) for coefficient prior - "cb" = 20, # sd (slab) for coefficient prior - "pi.ga" = 0.02, # prior variable selection probability for standard Cox models - "a" = -4, # hyperparameter in MRF prior - "b" = 0.1, # hyperparameter in MRF prior - "G" = simData$G # hyperparameter in MRF prior +hyperparPooled <- list( + "c0" = 2, # prior of baseline hazard + "tau" = 0.0375, # sd (spike) for coefficient prior + "cb" = 20, # sd (slab) for coefficient prior + "pi.ga" = 0.02, # prior variable selection probability for standard Cox models + "a" = -4, # hyperparameter in MRF prior + "b" = 0.1, # hyperparameter in MRF prior + "G" = simData$G # hyperparameter in MRF prior ) test_that("burnin must be less than nIter", {