From 7c08809d3312fe50b6d75a720ec2cebeb527a254 Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Tue, 16 Apr 2024 16:26:33 +0200 Subject: [PATCH] change time to date in the R code, for groundtruth and python read files --- flepimop/R_packages/flepicommon/R/DataUtils.R | 6 +- .../flepiconfig/R/create_config_data.R | 8 +- flepimop/R_packages/inference/R/functions.R | 6 +- .../inference/R/inference_slot_runner_funcs.R | 4 +- .../inference/R/inference_to_forecast.R | 20 +-- .../inference/archive/InferenceTest.R | 138 +++++++++--------- .../test-aggregate_and_calc_loc_likelihoods.R | 3 +- .../tests/testthat/test-cum_death_forecast.R | 10 +- flepimop/main_scripts/inference_slot.R | 4 +- postprocessing/groundtruth_source.R | 4 +- postprocessing/plot_predictions.R | 4 +- postprocessing/processing_diagnostics.R | 3 +- postprocessing/processing_diagnostics_AWS.R | 3 +- postprocessing/processing_diagnostics_SLURM.R | 3 +- postprocessing/sim_processing_source.R | 12 +- 15 files changed, 112 insertions(+), 116 deletions(-) diff --git a/flepimop/R_packages/flepicommon/R/DataUtils.R b/flepimop/R_packages/flepicommon/R/DataUtils.R index 26cb4f6b9..dc1a1dafe 100755 --- a/flepimop/R_packages/flepicommon/R/DataUtils.R +++ b/flepimop/R_packages/flepicommon/R/DataUtils.R @@ -92,7 +92,7 @@ read_file_of_type <- function(extension,...){ if(extension == 'csv'){ return(function(x){suppressWarnings(readr::read_csv(x,,col_types = cols( .default = col_double(), - time=col_date(), + darw=col_date(), uid=col_character(), comp=col_character(), subpop=col_character() @@ -101,8 +101,8 @@ read_file_of_type <- function(extension,...){ if(extension == 'parquet'){ return(function(x){ tmp <- arrow::read_parquet(x) - if("POSIXct" %in% class(tmp$time)){ - tmp$time <- lubridate::as_date(tz="GMT",tmp$time) + if("POSIXct" %in% class(tmp$date)){ + tmp$date <- lubridate::as_date(tz="GMT",tmp$date) } tmp }) diff --git a/flepimop/R_packages/flepiconfig/R/create_config_data.R b/flepimop/R_packages/flepiconfig/R/create_config_data.R index 187d5cba7..9f8187c7c 100644 --- a/flepimop/R_packages/flepiconfig/R/create_config_data.R +++ b/flepimop/R_packages/flepiconfig/R/create_config_data.R @@ -1146,7 +1146,7 @@ daily_mean_reduction <- function(dat, ) %>% dplyr::select(USPS, subpop, start_date, end_date, mean) - timeline <- tidyr::crossing(time = seq(from=min(dat$start_date), to=max(dat$end_date), by = 1), + timeline <- tidyr::crossing(date = seq(from=min(dat$start_date), to=max(dat$end_date), by = 1), subpop = unique(dat$subpop)) if(any(stringr::str_detect(dat$subpop, '", "'))){ @@ -1175,12 +1175,12 @@ daily_mean_reduction <- function(dat, dplyr::select(subpop, start_date, end_date, mean) %>% dplyr::bind_rows(dat %>% dplyr::filter(subpop!="all") %>% dplyr::ungroup() %>% dplyr::select(-USPS)) %>% dplyr::left_join(timeline) %>% - dplyr::filter(time >= start_date & time <= end_date) %>% - dplyr::group_by(subpop, time) %>% + dplyr::filter(date >= start_date & date <= end_date) %>% + dplyr::group_by(subpop, date) %>% dplyr::summarize(mean = prod(1-mean)) if(plot){ - dat<- ggplot2::ggplot(data= dat, ggplot2::aes(x=time, y=mean))+ + dat<- ggplot2::ggplot(data= dat, ggplot2::aes(x=date, y=mean))+ ggplot2::geom_line()+ ggplot2::facet_wrap(~subpop)+ ggplot2::theme_bw()+ diff --git a/flepimop/R_packages/inference/R/functions.R b/flepimop/R_packages/inference/R/functions.R index e3b754d51..80f29c9a6 100644 --- a/flepimop/R_packages/inference/R/functions.R +++ b/flepimop/R_packages/inference/R/functions.R @@ -300,9 +300,9 @@ calc_prior_likadj <- function(params, ##' compute_cumulative_counts <- function(sim_hosp) { res <- sim_hosp %>% - gather(var, value, -time, -subpop) %>% + gather(var, value, -date, -subpop) %>% group_by(subpop, var) %>% - arrange(time) %>% + arrange(date) %>% mutate(cumul = cumsum(value)) %>% ungroup() %>% pivot_wider(names_from = "var", values_from = c("value", "cumul")) %>% @@ -324,7 +324,7 @@ compute_cumulative_counts <- function(sim_hosp) { ##' compute_totals <- function(sim_hosp) { sim_hosp %>% - group_by(time) %>% + group_by(date) %>% summarise_if(is.numeric, sum, na.rm = TRUE) %>% mutate(subpop = "all") %>% select(all_of(colnames(sim_hosp))) %>% diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R index a8d7ddece..9d01bf483 100644 --- a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R +++ b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R @@ -52,11 +52,11 @@ aggregate_and_calc_loc_likelihoods <- function( dplyr::filter( modeled_outcome, !!rlang::sym(obs_subpop) == location, - time %in% unique(obs$date[obs$subpop == location]) + date %in% unique(obs$date[obs$subpop == location]) ) %>% ## Reformat into form the algorithm is looking for inference::getStats( - "time", + "date", "sim_var", stat_list = targets_config, start_date = start_date, diff --git a/flepimop/R_packages/inference/R/inference_to_forecast.R b/flepimop/R_packages/inference/R/inference_to_forecast.R index da7846ef3..13431f056 100644 --- a/flepimop/R_packages/inference/R/inference_to_forecast.R +++ b/flepimop/R_packages/inference/R/inference_to_forecast.R @@ -17,7 +17,7 @@ cum_death_forecast <- function (sim_data, require(dplyr) rc <- sim_data %>% - filter(time>start_date)%>% + filter(date>start_date)%>% inner_join(cum_dat)%>% group_by(sim_num, !!sym(loc_column))%>% mutate(cum_deaths_corr = cumsum(incidD)+cumDeaths)%>% @@ -39,7 +39,7 @@ cum_death_forecast <- function (sim_data, ##' @param weights if not NA, the weights for the mean ##' @param loc_col the name of the location column ##' -##' @return a forecast with columns time (end day), quantile steps_ahead and deaths +##' @return a forecast with columns date (end day), quantile steps_ahead and deaths ##' ##' @export create_cum_death_forecast <- function(sim_data, @@ -51,15 +51,15 @@ create_cum_death_forecast <- function(sim_data, loc_column="USPS") { ##Sanity checks - if(forecast_date>max(obs_data$time)+1) {stop("forecast date must be within one day after the range of observed times")} - if(forecast_date+1max(obs_data$date)+1) {stop("forecast date must be within one day after the range of observed times")} + if(forecast_date+1% - filter(time==forecast_date)%>% + filter(date==forecast_date)%>% select(!!sym(loc_column),cumDeaths) forecast_sims <- cum_death_forecast(sim_data, @@ -70,7 +70,7 @@ create_cum_death_forecast <- function(sim_data, ## CSSE data updates at midnight so forecasts will not typically have a forecast date one day after the end of the obs_data print(glue::glue("Accumulate deaths through {forecast_date-1}, typically for CSSE aggregation.")) start_deaths <- obs_data%>% - filter(time==forecast_date-1)%>% + filter(date==forecast_date-1)%>% select(!!sym(loc_column),cumDeaths) forecast_sims <- cum_death_forecast(sim_data, @@ -88,7 +88,7 @@ create_cum_death_forecast <- function(sim_data, } rc <- forecast_sims%>% - group_by(time, !!sym(loc_column))%>% + group_by(date, !!sym(loc_column))%>% summarize(x=list(enframe(c(quantile(cum_deaths_corr, probs=c(0.01, 0.025, seq(0.05, 0.95, by = 0.05), 0.975, 0.99)), mean=mean(cum_deaths_corr)), @@ -99,11 +99,11 @@ create_cum_death_forecast <- function(sim_data, ##Append on the the other deaths. rc<-dplyr::bind_rows(rc, obs_data%>% - select(time, !!sym(loc_column), cumDeaths)%>% + select(date, !!sym(loc_column), cumDeaths)%>% mutate(quantile="data")) rc<- rc%>% - mutate(steps_ahead=as.numeric(time-forecast_date)) + mutate(steps_ahead=as.numeric(date-forecast_date)) return(rc) diff --git a/flepimop/R_packages/inference/archive/InferenceTest.R b/flepimop/R_packages/inference/archive/InferenceTest.R index 68c875732..dfa52efc0 100644 --- a/flepimop/R_packages/inference/archive/InferenceTest.R +++ b/flepimop/R_packages/inference/archive/InferenceTest.R @@ -1,6 +1,6 @@ ##' ##' Function that does a rapid testing of the inference procedures on a single -##' time series. +##' date series. ##' ##' @param seeding the initial seeding ##' @param config the config file with inference info. @@ -44,8 +44,8 @@ single_loc_inference_test <- function(to_fit, # Data to fit obs <- to_fit - # Times based on config - sim_times <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") + # dates based on config + sim_dates <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") # Get unique geonames geonames <- unique(obs[[obs_subpop]]) @@ -83,7 +83,7 @@ single_loc_inference_test <- function(to_fit, seeding_init <- seeding for (i in 1:nrow(seeding_init)) { - seeding_init$date[i] <- sample(sim_times[1:20], 1) + seeding_init$date[i] <- sample(sim_dates[1:20], 1) # TODO change amount based on data seeding_init$amount[i] <- rpois(1, 10) } @@ -102,7 +102,7 @@ single_loc_inference_test <- function(to_fit, write_csv(npi_file, append = file.exists(npi_file)) # Simulate initial hospitalizatoins - initial_sim_hosp <- simulate_single_epi(times = sim_times, + initial_sim_hosp <- simulate_single_epi(dates = sim_dates, seeding = initial_seeding, R0 = R0, S0 = S0, @@ -110,13 +110,13 @@ single_loc_inference_test <- function(to_fit, sigma = sigma, beta_mults = 1-initial_npis$value) %>% single_hosp_run(config) %>% - dplyr::filter(time %in% obs$date) + dplyr::filter(date %in% obs$date) write_csv(initial_sim_hosp, glue::glue("{epi_dir}sim_slot_{s}_index_0.csv")) initial_sim_stats <- getStats( initial_sim_hosp, - "time", + "date", "sim_var", end_date = max(obs$date), config$inference$statistics @@ -156,7 +156,7 @@ single_loc_inference_test <- function(to_fit, current_npis <- perturb_expand_npis(initial_npis, config$interventions$settings) # Simulate hospitalizatoins - sim_hosp <- simulate_single_epi(times = sim_times, + sim_hosp <- simulate_single_epi(dates = sim_dates, seeding = current_seeding, R0 = R0, S0 = S0, @@ -164,11 +164,11 @@ single_loc_inference_test <- function(to_fit, sigma = sigma, beta_mults = 1-current_npis$reduction) %>% single_hosp_run(config) %>% - dplyr::filter(time %in% obs$date) + dplyr::filter(date %in% obs$date) sim_stats <- getStats( sim_hosp, - "time", + "date", "sim_var", end_date = max(obs$date), config$inference$statistics @@ -239,7 +239,7 @@ single_loc_inference_test <- function(to_fit, ##' ##' Function that does a rapid testing of the inference procedures on a single -##' time series. +##' date series. ##' ##' @param seeding the initial seeding ##' @param config the config file with inference info. @@ -287,8 +287,8 @@ multi_loc_inference_test <- function(to_fit, # Data to fit obs <- to_fit - # Times based on config - sim_times <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") + # dates based on config + sim_dates <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") # Get unique geonames geonames <- unique(obs[[obs_subpop]]) @@ -332,7 +332,7 @@ multi_loc_inference_test <- function(to_fit, seeding_init <- seedings for (i in 1:nrow(seeding_init)) { - seeding_init$date[i] <- sample(sim_times[1:20], 1) + seeding_init$date[i] <- sample(sim_dates[1:20], 1) # TODO change amount based on data seeding_init$amount[i] <- rpois(1, 10) } @@ -354,7 +354,7 @@ multi_loc_inference_test <- function(to_fit, pivot_wider(values_from = "value", names_from = "subpop", id_cols = "date") # Simulate epi - initial_sim_hosp <- simulate_multi_epi(times = sim_times, + initial_sim_hosp <- simulate_multi_epi(dates = sim_dates, seedings = initial_seeding, R0 = R0, S0s = S0s, @@ -364,7 +364,7 @@ multi_loc_inference_test <- function(to_fit, mob = mob, beta_mults = 1-as.matrix(npi_mat[,-1])) %>% multi_hosp_run(N, config) %>% - dplyr::filter(time %in% obs$date) + dplyr::filter(date %in% obs$date) write_csv(initial_sim_hosp, glue::glue("{epi_dir}sim_slot_{s}_index_0_multi.csv")) @@ -372,10 +372,10 @@ multi_loc_inference_test <- function(to_fit, for(location in all_locations) { local_sim_hosp <- dplyr::filter(initial_sim_hosp, !!rlang::sym(obs_subpop) == location) %>% - dplyr::filter(time %in% unique(obs$date[obs$subpop == location])) + dplyr::filter(date %in% unique(obs$date[obs$subpop == location])) initial_sim_stats <- inference::getStats( local_sim_hosp, - "time", + "date", "sim_var", #end_date = max(obs$date[obs[[obs_subpop]] == location]), stat_list = config$inference$statistics @@ -427,7 +427,7 @@ multi_loc_inference_test <- function(to_fit, pivot_wider(values_from = "reduction", names_from = "subpop", id_cols = "date") # Simulate hospitalizatoins - sim_hosp <- simulate_multi_epi(times = sim_times, + sim_hosp <- simulate_multi_epi(dates = sim_dates, seedings = current_seeding, R0 = R0, S0s = S0s, @@ -437,16 +437,16 @@ multi_loc_inference_test <- function(to_fit, mob = mob, beta_mults = 1-as.matrix(npi_mat[,-1])) %>% multi_hosp_run(N, config) %>% - dplyr::filter(time %in% obs$date) + dplyr::filter(date %in% obs$date) current_likelihood_data <- list() for(location in all_locations) { local_sim_hosp <- dplyr::filter(sim_hosp, !!rlang::sym(obs_subpop) == location) %>% - dplyr::filter(time %in% unique(obs$date[obs$subpop == location])) + dplyr::filter(date %in% unique(obs$date[obs$subpop == location])) sim_stats <- inference::getStats( local_sim_hosp, - "time", + "date", "sim_var", #end_date = max(obs$date[obs[[obs_subpop]] == location]), stat_list = config$inference$statistics @@ -533,11 +533,11 @@ multi_loc_inference_test <- function(to_fit, ##' ##' TODO: Modify to just call python code ##' -##' @param times the times to run the epidemic on +##' @param dates the dates to run the epidemic on ##' @param seeding the seeding to use ##' @param R0 the reproductive number ##' @param S0 the initial number susceptible -##' @param gamma the time between compartments +##' @param gamma the date between compartments ##' @param sigma the incubation period/latent period ##' @param beta_mults multipliers to capture the impact of interventions ##' @param alpha defaults to 1 @@ -545,13 +545,13 @@ multi_loc_inference_test <- function(to_fit, ##' @return the epimic states ##' ##' @export -simulate_single_epi <- function(times, +simulate_single_epi <- function(dates, seeding, S0, R0, gamma, sigma, - beta_mults = rep(1, length(times)), + beta_mults = rep(1, length(dates)), alpha = 1) { require(tidyverse) @@ -560,7 +560,7 @@ simulate_single_epi <- function(times, beta <- R0 * gamma/3 ##set up return matrix - epi <- matrix(0, nrow=length(times), ncol=7) + epi <- matrix(0, nrow=length(dates), ncol=7) colnames(epi) <- c("S","E","I1","I2","I3","R","incidI") @@ -577,12 +577,12 @@ simulate_single_epi <- function(times, epi[1,S] <- S0 ##get the indices where seeding occurs - seed_times <- which(times%in%seeding$date) + seed_dates <- which(dates%in%seeding$date) - for (i in 1:(length(times)-1)) { + for (i in 1:(length(dates)-1)) { ##Seed if possible - if(i%in% seed_times) { - tmp <- seeding$amount[which(i==seed_times)] + if(i%in% seed_dates) { + tmp <- seeding$amount[which(i==seed_dates)] epi[i,S] <- epi[i,S] - tmp epi[i,E] <- epi[i,E] + tmp } @@ -607,8 +607,8 @@ simulate_single_epi <- function(times, } epi <- as.data.frame(epi) %>% - mutate(time=times)%>% - pivot_longer(-time,values_to="N", names_to="comp") + mutate(date=dates)%>% + pivot_longer(-date,values_to="N", names_to="comp") return(epi) } @@ -620,22 +620,22 @@ simulate_single_epi <- function(times, ##' ##' TODO: Modify to just call python code ##' -##' @param times the times to run the epidemic on +##' @param dates the dates to run the epidemic on ##' @param seedings the seeding to use ##' @param R0s the reproductive number ##' @param S0s the initial number susceptibles -##' @param gamma the time between compartments +##' @param gamma the date between compartments ##' @param sigma the incubation period/latent period ##' @param N the number of nodes ##' @param mob mobility matrix N x N -##' @param pa proportion of time away +##' @param pa proportion of date away ##' @param beta_mults multipliers to capture the impact of interventions ##' @param alpha defaults to 1 ##' ##' @return the epimic states ##' ##' @export -simulate_multi_epi <- function(times, +simulate_multi_epi <- function(dates, seedings, S0s, R0, @@ -644,12 +644,12 @@ simulate_multi_epi <- function(times, N, mob, pa = .5, - beta_mults = matrix(rep(1, N*length(times)), ncol = N), + beta_mults = matrix(rep(1, N*length(dates)), ncol = N), alpha = 1) { require(tidyverse) - # proportion time away + # proportion date away paoverh <- pa/S0s oneminusp <- 1-paoverh*rowSums(mob) @@ -657,7 +657,7 @@ simulate_multi_epi <- function(times, beta <- R0 * gamma/3 ##set up return matrix - epi <- array(0, dim = c(length(times), 7, N)) + epi <- array(0, dim = c(length(dates), 7, N)) colnames(epi) <- c("S","E","I1","I2","I3","R","incidI") ##column indices for convenience @@ -673,17 +673,17 @@ simulate_multi_epi <- function(times, epi[1,S,] <- S0s ##get the indices where seeding occurs - seed_times <- which(times%in%seedings$date) + seed_dates <- which(dates%in%seedings$date) - for (i in 1:(length(times)-1)) { + for (i in 1:(length(dates)-1)) { betaIh <- matrix(beta*beta_mults[i,]*(colSums(epi[i,I1:I3,])^alpha)/S0s, ncol = 1) mobbetaIh <- mob %*% betaIh for (j in 1:N) { ##Seed if possible - if(i%in% seed_times) { - ind <- which(i==seed_times) + if(i%in% seed_dates) { + ind <- which(i==seed_dates) if (ind == j) { tmp <- seedings$amount[ind] epi[i,S,j] <- epi[i,S,j] - tmp @@ -714,8 +714,8 @@ simulate_multi_epi <- function(times, epi <- lapply(1:N, function(x) as.data.frame(epi[,,x]) %>% mutate(subpop = x)) %>% bind_rows() %>% - mutate(time=rep(times, N)) %>% - pivot_longer(cols = c(-time, -subpop), values_to="N", names_to="comp") + mutate(date=rep(dates, N)) %>% + pivot_longer(cols = c(-date, -subpop), values_to="N", names_to="comp") return(epi) } @@ -736,9 +736,9 @@ single_hosp_run <- function(epi, config) { p_death <- config$hospitalization$parameters$p_death p_death_rate <- config$hospitalization$parameters$p_death_rate p_hosp <- p_death/p_death_rate - time_hosp_pars <- as_evaled_expression(config$hospitalization$parameters$time_hosp) - time_disch_pars <- as_evaled_expression(config$hospitalization$parameters$time_disch) - time_hosp_death_pars <- as_evaled_expression(config$hospitalization$parameters$time_hosp_death) + date_hosp_pars <- as_evaled_expression(config$hospitalization$parameters$date_hosp) + date_disch_pars <- as_evaled_expression(config$hospitalization$parameters$date_disch) + date_hosp_death_pars <- as_evaled_expression(config$hospitalization$parameters$date_hosp_death) dat_ <- dplyr::filter(epi, comp == "incidI") %>% select(-comp) %>% rename(incidI = N) %>% @@ -749,9 +749,9 @@ single_hosp_run <- function(epi, config) { dat_ <- select(dat_, -subpop) } - dat_H <- hosp_create_delay_frame('incidI',p_hosp,dat_,time_hosp_pars,"H") - data_D <- hosp_create_delay_frame('incidH',p_death_rate,dat_H,time_hosp_death_pars,"D") - R_delay_ <- round(exp(time_disch_pars[1])) + dat_H <- hosp_create_delay_frame('incidI',p_hosp,dat_,date_hosp_pars,"H") + data_D <- hosp_create_delay_frame('incidH',p_death_rate,dat_H,date_hosp_death_pars,"D") + R_delay_ <- round(exp(date_disch_pars[1])) res <- Reduce(function(x, y, ...) merge(x, y, all = TRUE, ...), list(dat_, dat_H, data_D)) %>% tidyr::replace_na( @@ -759,7 +759,7 @@ single_hosp_run <- function(epi, config) { incidH = 0, incidD = 0, hosp_curr = 0)) %>% - dplyr::mutate(date_inds = as.integer(time - min(time) + 1)) %>% + dplyr::mutate(date_inds = as.integer(date - min(date) + 1)) %>% dplyr::arrange(date_inds) %>% split(.$uid) %>% purrr::map_dfr(function(.x){ @@ -781,14 +781,14 @@ single_hosp_run <- function(epi, config) { multi_hosp_run <- function(epi, N, config) { map_df(1:N, ~ single_hosp_run(dplyr::filter(epi, subpop == .), config)) %>% - dplyr::filter(time >= config$start_date, - time <= config$end_date) + dplyr::filter(date >= config$start_date, + date <= config$end_date) } ##' ##' Create NPIs dataframe for a single location for input to SEIR code ##' -##' @param times vector of times to use +##' @param dates vector of dates to use ##' @param config configuration file ##' ##' @return a dataframe with npi reduction values by date @@ -797,8 +797,8 @@ multi_hosp_run <- function(epi, N, config) { ##' @export npis_dataframe <- function(config, random = F, subpop = 1, offset = 0, intervention_multi = 1) { - times <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") - npis <- tibble(date = times, value = 0, modifier_name = "local_variation", subpop = subpop) + dates <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") + npis <- tibble(date = dates, value = 0, modifier_name = "local_variation", subpop = subpop) interventions <- config$interventions$settings date_changes <- map_chr(interventions[1:2], ~ifelse(is.null(.$period_start_date), @@ -809,17 +809,17 @@ npis_dataframe <- function(config, random = F, subpop = 1, offset = 0, intervent # Apply interventions for (d in 1:length(date_changes)) { - npis$value[times >= date_changes[d]] <- interventions[[d]]$value$mean * intervention_multi - npis$modifier_name[times >= date_changes[d]] <- names(interventions)[d] + npis$value[dates >= date_changes[d]] <- interventions[[d]]$value$mean * intervention_multi + npis$modifier_name[dates >= date_changes[d]] <- names(interventions)[d] } if(random) { # Randomly assign interventions for (d in 1:length(date_changes)) { if (names(interventions)[d] == "local_variation") { - npis$value[times >= date_changes[d]] <- runif(1, -.5, .5) + npis$value[dates >= date_changes[d]] <- runif(1, -.5, .5) } else { - npis$value[times >= date_changes[d]] <- runif(1, 0, 1) + npis$value[dates >= date_changes[d]] <- runif(1, 0, 1) } } } @@ -840,14 +840,14 @@ npis_dataframe <- function(config, random = F, subpop = 1, offset = 0, intervent synthetic_data <- function(S0, seeding, config) { # Simulate single epidemic - times <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") + dates <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") npis <- npis_dataframe(config) R0 <- flepicommon::as_evaled_expression(config$seir$parameters$R0s$value) gamma <- flepicommon::as_evaled_expression(config$seir$parameters$gamma$value) sigma <- flepicommon::as_evaled_expression(config$seir$parameters$sigma) # Simulate epi - epi <- simulate_single_epi(times = times, + epi <- simulate_single_epi(dates = dates, seeding = seeding, R0 = R0, S0 = S0, @@ -858,7 +858,7 @@ synthetic_data <- function(S0, seeding, config) { # - - - - # Setup fake data fake_data <- single_hosp_run(epi, config) %>% - rename(date = time) %>% + rename(date = date) %>% dplyr::filter(date >= config$start_date, date <= config$end_date) @@ -882,7 +882,7 @@ synthetic_data_multi <- function(S0s, seedings, mob, config, offsets, interventi N <- length(S0s) # number of nodes # Simulate single epidemic - times <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") + dates <- seq.Date(as.Date(config$start_date), as.Date(config$end_date), by = "1 days") npis <- pmap(list(x = 1:N, y = offsets, z = interventions_multi), function(x,y,z) npis_dataframe(config, @@ -899,7 +899,7 @@ synthetic_data_multi <- function(S0s, seedings, mob, config, offsets, interventi pivot_wider(values_from = "value", names_from = "subpop", id_cols = "date") # Simulate epi - epi <- simulate_multi_epi(times = times, + epi <- simulate_multi_epi(dates = dates, seedings = seedings, R0 = R0, S0s = S0s, @@ -913,7 +913,7 @@ synthetic_data_multi <- function(S0s, seedings, mob, config, offsets, interventi # Setup fake data fake_data <- map_df(1:N, ~ single_hosp_run(dplyr::filter(epi, subpop == .), config)) %>% - rename(date = time) %>% + rename(date = date) %>% dplyr::filter(date >= config$start_date, date <= config$end_date) @@ -921,7 +921,7 @@ synthetic_data_multi <- function(S0s, seedings, mob, config, offsets, interventi } ##' -##' Expands the perturb npi into a dataframe with time +##' Expands the perturb npi into a dataframe with date ##' ##' @param npis the npis in "long" version (one value by npi, date and place) ##' @param intervention_settings intervention_settings from the config file diff --git a/flepimop/R_packages/inference/tests/testthat/test-aggregate_and_calc_loc_likelihoods.R b/flepimop/R_packages/inference/tests/testthat/test-aggregate_and_calc_loc_likelihoods.R index a4f2a961e..704e096fe 100644 --- a/flepimop/R_packages/inference/tests/testthat/test-aggregate_and_calc_loc_likelihoods.R +++ b/flepimop/R_packages/inference/tests/testthat/test-aggregate_and_calc_loc_likelihoods.R @@ -80,8 +80,7 @@ get_minimal_setup <- function () { sim_hosp <- obs %>% dplyr::rename(incidD = death_incid, incidC = confirmed_incid) %>% dplyr::mutate(incidD = incidD + rpois(length(incidD), incidD))%>% - dplyr::mutate(incidC = incidC + rpois(length(incidC), incidC))%>% - dplyr::rename(time=date) + dplyr::mutate(incidC = incidC + rpois(length(incidC), incidC)) ##the observed node name. obs_subpop <- "subpop" diff --git a/flepimop/R_packages/inference/tests/testthat/test-cum_death_forecast.R b/flepimop/R_packages/inference/tests/testthat/test-cum_death_forecast.R index ad9b9f87d..496ff8b6e 100644 --- a/flepimop/R_packages/inference/tests/testthat/test-cum_death_forecast.R +++ b/flepimop/R_packages/inference/tests/testthat/test-cum_death_forecast.R @@ -3,7 +3,7 @@ context("cu_death_forecast") test_that("cum_death_forecast gives correct results, for one loc 100 and on 0", { sim_data <- dplyr::tibble(incidD=rpois(200, 10), - time=rep(as.Date("2020-01-01")+1:100,2), + date=rep(as.Date("2020-01-01")+1:100,2), sim_num=1, USPS=rep(c("NY","WA"), each=100)) @@ -28,7 +28,7 @@ test_that("cum_death_forecast gives correct results, for one loc 100 and on 0", test_that("Giving a date results in an appropriate forecast only for the future", { sim_data <- dplyr::tibble(incidD=rpois(200, 10), - time=rep(as.Date("2020-01-01")+0:99,2), + date=rep(as.Date("2020-01-01")+0:99,2), sim_num=1, USPS=rep(c("NY","WA"), each=100)) @@ -43,11 +43,11 @@ test_that("Giving a date results in an appropriate forecast only for the future" "USPS") - expect_that(min(rc$time), equals(as.Date("2020-01-16"))) + expect_that(min(rc$date), equals(as.Date("2020-01-16"))) expect_that(nrow(rc),equals(2*85)) expect_that(max(rc$cum_deaths_corr[rc$USPS=="NY"]), - equals(sum(sim_data$incidD[sim_data$time>"2020-01-15" & sim_data$USPS=="NY"]))) + equals(sum(sim_data$incidD[sim_data$date>"2020-01-15" & sim_data$USPS=="NY"]))) expect_that(max(rc$cum_deaths_corr[rc$USPS=="WA"]), - equals(sum(sim_data$incidD[sim_data$time>"2020-01-15" & sim_data$USPS=="WA"])+100)) + equals(sum(sim_data$incidD[sim_data$date>"2020-01-15" & sim_data$USPS=="WA"])+100)) }) diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index b164489b8..5e45d0e19 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -290,7 +290,7 @@ if (config$inference$do_inference){ # function to calculate the likelihood when comparing simulation output (sim_hosp) to ground truth data likelihood_calculation_fun <- function(sim_hosp){ - sim_hosp <- dplyr::filter(sim_hosp,sim_hosp$time >= min(obs$date),sim_hosp$time <= max(obs$date)) + sim_hosp <- dplyr::filter(sim_hosp,sim_hosp$date >= min(obs$date),sim_hosp$date <= max(obs$date)) lhs <- unique(sim_hosp[[obs_subpop]]) rhs <- unique(names(data_stats)) all_locations <- rhs[rhs %in% lhs] @@ -588,7 +588,7 @@ for(seir_modifiers_scenario in seir_modifiers_scenarios) { # run if (config$inference$do_inference){ sim_hosp <- flepicommon::read_file_of_type(gsub(".*[.]","",this_global_files[['hosp_filename']]))(this_global_files[['hosp_filename']]) %>% - dplyr::filter(time >= min(obs$date),time <= max(obs$date)) + dplyr::filter(date >= min(obs$date),date <= max(obs$date)) lhs <- unique(sim_hosp[[obs_subpop]]) rhs <- unique(names(data_stats)) all_locations <- rhs[rhs %in% lhs] diff --git a/postprocessing/groundtruth_source.R b/postprocessing/groundtruth_source.R index d92f191d5..a1f03c39f 100644 --- a/postprocessing/groundtruth_source.R +++ b/postprocessing/groundtruth_source.R @@ -100,12 +100,12 @@ clean_gt_forplots <- function(gt_data){ pivot_wider(names_from = target, values_from = incid) gt_long <- gt_long %>% - rename(time=date, USPS=source) + rename(USPS=source) gt_long <- gt_long %>% rename(outcome_name = target, outcome = incid) gt_data <- gt_data %>% - rename(time=date, USPS=source) + rename(USPS=source) return(gt_data) } diff --git a/postprocessing/plot_predictions.R b/postprocessing/plot_predictions.R index 5397966f5..d512a2d93 100644 --- a/postprocessing/plot_predictions.R +++ b/postprocessing/plot_predictions.R @@ -39,7 +39,7 @@ state_cw <- fips_us_county %>% # GROUND TRUTH ------------------------------------------------------------ gt_data <- gt_data %>% - mutate(time = lubridate::as_date(time)) %>% mutate(date = time) + mutate(date = lubridate::as_date(date)) %>% colnames(gt_data) <- gsub("incidI", "incidC", colnames(gt_data)) gt_outcomes <- outcomes_[outcomes_ != "I" & sapply(X = paste0("incid", outcomes_), FUN = function(x=X, y) any(grepl(pattern = x, x = y)), y = colnames(gt_data)) ] outcomes_gt_ <- outcomes_[outcomes_ %in% gt_outcomes] @@ -56,7 +56,7 @@ gt_data_2 <- gt_data_2 %>% mutate(cumH = 0) # incidH is only cumulative from sta gt_cl <- NULL if (any(outcomes_time_=="weekly")) { # Incident - gt_data_st_week <- get_weekly_incid(gt_data %>% dplyr::select(time, subpop, USPS, paste0("incid", outcomes_gt_[outcomes_time_gt_=="weekly"])) %>% mutate(sim_num = 0), + gt_data_st_week <- get_weekly_incid(gt_data %>% dplyr::select(date, subpop, USPS, paste0("incid", outcomes_gt_[outcomes_time_gt_=="weekly"])) %>% mutate(sim_num = 0), # gt_data_st_week <- get_weekly_incid(gt_data %>% dplyr::select(time, subpop, USPS, paste0("incid", outcomes_gt_[outcomes_time_gt_=="weekly"])) %>% mutate(sim_num = 0), outcomes = outcomes_gt_[outcomes_time_gt_=="weekly"]) diff --git a/postprocessing/processing_diagnostics.R b/postprocessing/processing_diagnostics.R index ace858422..74bc68032 100644 --- a/postprocessing/processing_diagnostics.R +++ b/postprocessing/processing_diagnostics.R @@ -283,9 +283,8 @@ for(i in 1:length(USPS)){ filter_gt_data <- gt_data %>% filter(USPS == state) %>% - select(USPS, subpop, time, dplyr::contains("incid") & !dplyr::contains("_")) %>% + select(USPS, subpop, date, dplyr::contains("incid") & !dplyr::contains("_")) %>% pivot_longer(dplyr::contains('incid'), names_to = "outcome", values_to = "value") %>% - rename(date = time) %>% mutate(week = lubridate::week(date)) %>% group_by(outcome, week) %>% mutate(rollmean = zoo::rollmean(x = value, k = 7, fill = NA)) diff --git a/postprocessing/processing_diagnostics_AWS.R b/postprocessing/processing_diagnostics_AWS.R index fd1853542..5a8af5c27 100644 --- a/postprocessing/processing_diagnostics_AWS.R +++ b/postprocessing/processing_diagnostics_AWS.R @@ -282,9 +282,8 @@ for(i in 1:length(USPS)){ filter_gt_data <- gt_data %>% filter(USPS == state) %>% - select(USPS, subpop, time, dplyr::contains("incid") & !dplyr::contains("_")) %>% + select(USPS, subpop, date, dplyr::contains("incid") & !dplyr::contains("_")) %>% pivot_longer(dplyr::contains('incid'), names_to = "outcome", values_to = "value") %>% - rename(date = time) %>% mutate(week = lubridate::week(date)) %>% group_by(outcome, week) %>% mutate(rollmean = zoo::rollmean(x = value, k = 7, fill = NA)) diff --git a/postprocessing/processing_diagnostics_SLURM.R b/postprocessing/processing_diagnostics_SLURM.R index 98c7da65e..03e0ebb3e 100644 --- a/postprocessing/processing_diagnostics_SLURM.R +++ b/postprocessing/processing_diagnostics_SLURM.R @@ -231,9 +231,8 @@ for(i in 1:length(USPS)){ filter_gt_data <- gt_data %>% filter(USPS == state) %>% - select(USPS, subpop, time, dplyr::contains("incid") & !dplyr::contains("_")) %>% + select(USPS, subpop, date, dplyr::contains("incid") & !dplyr::contains("_")) %>% pivot_longer(dplyr::contains('incid'), names_to = "outcome", values_to = "value") %>% - rename(date = time) %>% mutate(week = lubridate::week(date)) %>% group_by(outcome, week) %>% mutate(rollmean = zoo::rollmean(x = value, k = 7, fill = NA)) diff --git a/postprocessing/sim_processing_source.R b/postprocessing/sim_processing_source.R index 001cdec08..9dda42208 100644 --- a/postprocessing/sim_processing_source.R +++ b/postprocessing/sim_processing_source.R @@ -35,14 +35,14 @@ combine_and_format_sims <- function(outcome_vars = "incid", dirs <- dirs[str_detect(dirs, '/hosp')][1] res_subpop_all <- arrow::open_dataset(dirs, partitioning = c("lik_type", "is_final")) %>% - select(time, subpop, starts_with(outcome_vars)) %>% - # select(time, subpop, outcome_modifiers_scenario, starts_with(outcome_vars)) %>% - filter(time>=forecast_date & time<=end_date) %>% + select(date, subpop, starts_with(outcome_vars)) %>% + # select(date, subpop, outcome_modifiers_scenario, starts_with(outcome_vars)) %>% + filter(date>=forecast_date & date<=end_date) %>% collect() %>% # filter(stringr::str_detect(outcome_modifiers_scenario, death_filter)) %>% - mutate(time=as.Date(time)) %>% - # group_by(time, subpop, outcome_modifiers_scenario) %>% - group_by(time, subpop) %>% + mutate(date=as.Date(date)) %>% + # group_by(date, subpop, outcome_modifiers_scenario) %>% + group_by(date, subpop) %>% dplyr::mutate(sim_num = as.character(seq_along(subpop))) %>% ungroup()