Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lasso sims and func edits #31

Open
wants to merge 13 commits into
base: lasso
Choose a base branch
from
Next Next commit
update gitignore
clean up and document lasso functions, add new predict and brier functions, add code to run and evaluate simulations

run simulations script
  • Loading branch information
karissawhiting committed Feb 28, 2024
commit 272fb289c5ac5a66362a7416e6dbdbeeead5a15e
6 changes: 5 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@
^.*[.]Rproj.*$
^\.Rproj\.user$
^.Rhistory$
[~]
[~]
^R/cv.cureitlasso.R
^R/simulasso.R
^R/coxsplit.R
^R/run-simulations.R
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
inst/doc
inst/model-fits
inst/cure-model-fits
inst/cureit-simulation-results/*
inst/archive
docs/
revdep/
Expand Down
158 changes: 158 additions & 0 deletions R/brier2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# we should use this: https://www.tidymodels.org/learn/statistics/survival-metrics/

library(survival)
library(tidyverse)

# Prep Data ---------------------------------------------------------------

xi = dat$x
final_fit <- fit$fit[[fit$index$`min`[1]]][[fit$index$`min`[2]]]

fitcure <- final_fit$fitcure
predcure <- predict(fitcure, newx = xi,
s = min(fitcure$lambda), type = "response")

fitcox <- fi nal_fit$fitcox
predsurvexp <- predict(fitcox, newx = xi, s = min(fitcox$lambda), type = "response")


haz <- final_fit$haz
cumhaz <- final_fit$cumhaz

predsurv <- exp(-cumhaz*predsurvexp)

# Create Data frame to pass ----------------------------------------------

data = data.frame(dat$x) %>%
mutate(times = dat$t,
event = dat$d,
"truth_survival" = Surv(dat$t, dat$d))

# Predict Function (glmnet cure) ---------------------------------------------

# Trying to get predictions in similar format as tidymodels yardstick
# Eventually will integratethis with existing predict method function

predict_cure <- function(final_fit,
data = NULL,
eval_timepoints = NULL) {

# Get values from observed data
survival_times = data$times
sorted_survival_times <- sort(survival_times)
sorted_event_times = sort(data$times[data$event==1])
censoring_weights = survfit(Surv(data$times, 1 - data$event) ~ 1)$surv

data_x <- data %>%
select(-c(times, event, truth_survival)) %>%
as.matrix()

# Extract cumulative hazard from final fit (for event times)
haz <- final_fit$haz
cumhaz <- final_fit$cumhaz

# default eval timepoints are observed event times if not supplied by user
if(is.null(eval_timepoints)) {
eval_timepoints = sorted_event_times
}


# Get Relative Cure and Cox Risk ------------------------------------------

# Cure probability risk prediction (independent of timepoint)
fitcure <- final_fit$fitcure
predcure <- predict(fitcure, newx = data_x,
s = min(fitcure$lambda), type = "response")

# Cox survival probability risk prediction (independent of timepoint)
fitcox <- final_fit$fitcox
predsurvexp <- predict(fitcox,
newx = data_x,
s = min(fitcox$lambda),
type = "response")



# Get Predicted probabilities at select timepoints -------------------

# vector for conditional survival prob for all patients at sorted_survival_times[l]
all_preds_all_tps <- list()

# for each eval timepoint, get predicted survival values
for (l in 1:length(eval_timepoints)) {
all_preds <- data.frame("eval_timepoint" = NA,
"preds" = NA,
"pred_cure" = NA,
"preds_surv" = NA,
"weights" = NA)

# vector for conditional survival prob for all patients at sorted_survival_times[l]
predsurv <- rep(NA,length(survival_times))


if (eval_timepoints[l] >= min(sorted_event_times)){

ids <- which(
sorted_event_times ==
max(sorted_event_times[sorted_event_times <= eval_timepoints[l]])
)

# Conditional survival prob at given patient
predsurv <- exp(-cumhaz[ids]*predsurvexp)

# inverse probability of the censoring weights
ipw <- as.numeric(survival_times > eval_timepoints[l])*censoring_weights[l] +
as.numeric(survival_times <= eval_timepoints[l])*censoring_weights

# if no events occurred before eval time
} else if (eval_timepoints[l] < min(sorted_event_times)) {

predsurv <- rep(1,length(survival_times))
ipw <- 1

}

preds <- 1 - predcure + predcure*predsurv

all_preds <- tibble(

"preds" = preds,
"predcure" = predcure,
"predsurv" = predsurv,
"weights" = ipw) %>%
mutate(id_num = 1:nrow(.))

names(all_preds) <- c(
"preds",
"predcure", "predsurv", "weights",
"id_num")

x <- all_preds %>% nest(.preds = -c(id_num))

all_preds_all_tps[[l]] <- all_preds

# names(all_preds) = c("pred", "pred_cure","pred_surv")
}
# all_pred_df <- tibble(
# times = data$times,
# events = data$event,
# .pred = all_preds_all_tps
# )
return(all_pred_df)
}

predict_df <- predict_cure(final_fit = final_fit, data = data)


# Brier Calculation -------------------------------------------------------

brier_cure <- function(predict_df, times, event) {

eval_timepoints = unique(predict_df$eval_timepoint)

for (l in 1:length(eval_timepoints)) {
brier[l] <- mean(I(di==0)*(1 - preds)^2/(ipw+0.001) + I(di==1)*(0 - preds)^2/(ipw+0.001))


}

65 changes: 63 additions & 2 deletions R/cureitlasso.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@

# mu is penlanlity for cure
# lamda is penality for cox
# touch is posterior probabilty of being cured fr uncured (maybe don't need) (Entropy)
# NOTE maybe change name of alphs
# calculated linear peredictor of that iteration


#' Lasso for Cure Models
#'
#' @param t Survival times
#' @param d Event indicator
#' @param x data n x p matrix or data.frame
#' @param minmu.ratio minimum penalty value in the logistic model
#' @param minlambda.ratio minimum penalty value in the cox model
#' @param adaptive
#' @param length.grid
#' @param mus penalty for cure
#' @param lambdas is penalty for cox
#' @param tol posterior probability of being cured for uncured (maybe don't need) (Entropy)
#' @param maxiter
#' @param progress verbose iteration counter
#'
#' @return
#' - `all_fits`
#' - `t`,
#' - `tj` - unique event times sorted,
#' - `d`- event indicator,
#' - `x`- data n x p matrix or data.frame,
#' - `alpha0` fitcure$a0[length(fitcure$lambda)],
#' - `alpha` =alpha,
#' - `beta` =beta,
#' - `haz`=haz,
#' - `cumhaz`=cumhaz,
#' - `tau`=as.vector(tau),
#' - `predsurv`=as.vector(predsurv_iter),
#' - `predcure`=as.vector(predcure_iter),
#' - `fitcure`=fitcure,
#' - `fitcox` =fitcox
#' - `mus` - mus that were used as penalties for cure models in cross validation
#' - `lambda` - lambdas that were used as penalities in cix model cross validation
#' @export
#'
#' @examples
cureitlasso <- function(t,
d,
x,
Expand All @@ -15,6 +59,7 @@ cureitlasso <- function(t,
require(glmnet)
require(survival)

# initialize initial weights
tau <- matrix(0.5,nrow=length(t),ncol=2) # First column: cured; second column: uncured
tau[d==1,1] = 0
tau[d==1,2] = 1
Expand All @@ -24,6 +69,7 @@ cureitlasso <- function(t,

# CV: Be careful for fold split! Two replicates from the same subject should always be assigned to the same fold

#this is for later integreation with tuning
penalty.factor.cure=rep(1,ncol(x))
penalty.factor.cox=rep(1,ncol(x))

Expand Down Expand Up @@ -51,6 +97,8 @@ cureitlasso <- function(t,
#
# }

# So this section is finding the set of optimal hyperparemters to test ----
# tau is uncure probability
fitcure0 <- glmnet(x=do.call("rbind", rep(list(x), 2)),
y=lab_class,
family="binomial",
Expand All @@ -72,8 +120,10 @@ cureitlasso <- function(t,
lambdas <- fitcox0$lambda[idx_lambdas]
}


musmax <- fitcure0$lambda[1]
lambdasmax <- fitcox0$lambda[1]
# -----------------

predcure <- predict(fitcure0,newx=x,s=mus,type="response") #Prob of uncured
predsurvexp <- predict(fitcox0,newx=x,s=lambdas,type="response") # exp(betaX)
Expand Down Expand Up @@ -104,6 +154,7 @@ cureitlasso <- function(t,

fit[[i]] <- list()


for (j in 1:length(lambdas)){

tau <- d + (1-d) * (predcure[,i] * predsurv[,j])/( (1 - predcure[,i]) + predcure[,i] * predsurv[,j] )
Expand Down Expand Up @@ -309,14 +360,24 @@ cureitlasso <- function(t,
# num_alpha[i,j] <- sum(alpha!=0)
# num_beta[i,j] <- sum(beta!=0)

# names(fit[i][j]) <- paste0("mu_fit_", i, "_lambda_fit_", "j")

}


names(fit[[i]]) <- paste0("lambda_fit_", 1:length(lambdas))
}

return(list(fit=fit,
names(fit) <- paste0("mu_fit_", 1:length(mus))

return(list(fit = fit,
# num_alpha=num_alpha,
# num_beta=num_beta,
mus=mus,
lambdas=lambdas))

}
}

# tj - is unique event times sorted
# haz

19 changes: 15 additions & 4 deletions R/cv.cureitlasso.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Returns:
# fit -
# arg: length.grid -

cv.cureitlasso <- function(t,
d,
x,
minmu.ratio=0.05, # minimum penalty value in the logistic model
minlambda.ratio=0.05, # minimum penalty value in the cox model
adaptive=FALSE,
length.grid=10,
length.grid=10, # tells how many hyperparameters to test. default is 10 for each
nfolds=5,
tol=1e-2,
maxiter=100,
Expand All @@ -22,13 +26,15 @@ cv.cureitlasso <- function(t,
require(survcomp)
require(pracma)

# order data by times
order_idx <- order(t)
t <- t[order_idx]
d <- d[order_idx]
x <- x[order_idx,]

if (progress) print("Fitting by EM algorithm ...")

# main model with no cross validation
fit <- cureitlasso(t,d,x,
minmu.ratio,
minlambda.ratio,
Expand All @@ -38,14 +44,16 @@ cv.cureitlasso <- function(t,
lambdas=NULL,
tol,
maxiter,
progress)
progress = TRUE)

if (progress) print("Running cross validations ...")

if (is.null(seed)) seed <- as.numeric(Sys.Date())

# Fold split
foldid <- coxsplit(as.matrix(Surv(t,d)), nfolds)

# grid/array of values
cv_brier <- array(NA,dim=c(length.grid,length.grid,nfolds))

# Run CVs
Expand All @@ -57,6 +65,7 @@ cv.cureitlasso <- function(t,

if (progress) print(i)


cv.fit[[i]] <- cureitlasso(t[foldid != i],
d[foldid != i],
x[foldid != i,],
Expand Down Expand Up @@ -114,6 +123,7 @@ cv.cureitlasso <- function(t,

}

# calcualte trapazoidal area for brier score
cv_brier[j,k,i] <- trapz(tbrier,brier)


Expand All @@ -130,7 +140,7 @@ cv.cureitlasso <- function(t,

cv.fit <- foreach(i = 1:nfolds) %dopar% {

source("~/Projects/Whiting-Qin-cureit/cureit/R/cureitlasso.R")
source(here::here("R/cureitlasso.R"))

cureitlasso(t[foldid != i],
d[foldid != i],
Expand Down Expand Up @@ -206,6 +216,7 @@ cv.cureitlasso <- function(t,

}

# summarize - take average across all folds
cv_brier_mean <- apply(cv_brier,c(1,2),function(x) mean(x))
cv_brier_se <- apply(cv_brier,c(1,2),function(x) sd(x)/sqrt(nfolds))
# pheatmap::pheatmap(cv_brier_mean,cluster_cols = F,cluster_rows = F)
Expand Down Expand Up @@ -235,4 +246,4 @@ cv.cureitlasso <- function(t,
)
)

}
}
Loading