From 31b04189fdcffde0dd75912c38e2db9a18ac8fbf Mon Sep 17 00:00:00 2001 From: saraloo <45245630+saraloo@users.noreply.github.com> Date: Thu, 8 Aug 2024 13:30:26 -0400 Subject: [PATCH 1/4] draft fix --- .../R_packages/inference/R/inference_slot_runner_funcs.R | 8 ++++++-- flepimop/main_scripts/inference_slot.R | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R index a708b2460..c85a65e56 100644 --- a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R +++ b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R @@ -67,12 +67,16 @@ aggregate_and_calc_loc_likelihoods <- function( this_location_log_likelihood <- 0 for (var in names(ground_truth_data[[location]])) { + obs_tmp1 <- ground_truth_data[[location]][[var]] + obs_tmp <- obs_tmp1[!is.na(obs_tmp1$data_var) & !is.na(obs_tmp1$date),] + sim_tmp <- this_location_modeled_outcome[[var]][!is.na(obs_tmp1$data_var) & !is.na(obs_tmp1$date),] %>% + dplyr::filter(date >= min(obs_tmp1$date) & date <= max(obs_tmp1$date)) this_location_log_likelihood <- this_location_log_likelihood + ## Actually compute likelihood for this location and statistic here: sum(inference::logLikStat( - obs = ground_truth_data[[location]][[var]]$data_var, - sim = this_location_modeled_outcome[[var]]$sim_var, + obs = as.numeric(obs_tmp$data_var), + sim = as.numeric(sim_tmp$sim_var), dist = targets_config[[var]]$likelihood$dist, param = targets_config[[var]]$likelihood$param, add_one = targets_config[[var]]$add_one diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index 0e8c32bb5..3ebc4097d 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -268,12 +268,12 @@ if (config$inference$do_inference){ obs <- suppressMessages( readr::read_csv(config$inference$gt_data_path, col_types = readr::cols(date = readr::col_date(), - source = readr::col_character(), + # source = readr::col_character(), subpop = readr::col_character(), .default = readr::col_double()), )) %>% dplyr::filter(subpop %in% subpops_, date >= gt_start_date, date <= gt_end_date) %>% - dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) %>% - dplyr::mutate_if(is.numeric, dplyr::coalesce, 0) + dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) #%>% + # dplyr::mutate_if(is.numeric, dplyr::coalesce, 0) # add aggregate groundtruth to the obs data for the likelihood calc From 26054e089f3e32f58f1968a98b34931d076ce0a3 Mon Sep 17 00:00:00 2001 From: saraloo <45245630+saraloo@users.noreply.github.com> Date: Mon, 12 Aug 2024 14:28:57 -0400 Subject: [PATCH 2/4] change aggregation date to last date instead of first date --- flepimop/R_packages/inference/R/functions.R | 6 ++++-- .../R_packages/inference/R/inference_slot_runner_funcs.R | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/flepimop/R_packages/inference/R/functions.R b/flepimop/R_packages/inference/R/functions.R index 80f29c9a6..2f7cfbc5e 100644 --- a/flepimop/R_packages/inference/R/functions.R +++ b/flepimop/R_packages/inference/R/functions.R @@ -39,11 +39,13 @@ periodAggregate <- function(data, dates, start_date = NULL, end_date = NULL, per tmp <- tmp %>% tidyr::unite("time_unit", names(tmp)[grepl("time_unit_", names(tmp))]) %>% dplyr::group_by(time_unit) %>% - dplyr::summarize(first_date = min(date), value = aggregator(value), valid = period_unit_validator(date,time_unit)) %>% + dplyr::summarize(last_date = max(date),first_date = min(date), value = aggregator(value), valid = period_unit_validator(date,time_unit)) %>% dplyr::ungroup() %>% dplyr::arrange(first_date) %>% dplyr::filter(valid) - return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$first_date)))) + # return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$first_date)))) + return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$last_date)))) + } diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R index c85a65e56..1cc975a1e 100644 --- a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R +++ b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R @@ -69,9 +69,11 @@ aggregate_and_calc_loc_likelihoods <- function( obs_tmp1 <- ground_truth_data[[location]][[var]] obs_tmp <- obs_tmp1[!is.na(obs_tmp1$data_var) & !is.na(obs_tmp1$date),] - sim_tmp <- this_location_modeled_outcome[[var]][!is.na(obs_tmp1$data_var) & !is.na(obs_tmp1$date),] %>% - dplyr::filter(date >= min(obs_tmp1$date) & date <= max(obs_tmp1$date)) - + sim_tmp1 <- this_location_modeled_outcome[[var]] + sim_tmp <- sim_tmp1[match(lubridate::as_date(sim_tmp1$date), + lubridate::as_date(obs_tmp$date)),] %>% na.omit() + + this_location_log_likelihood <- this_location_log_likelihood + ## Actually compute likelihood for this location and statistic here: sum(inference::logLikStat( From 8c4dab0271e1eab84e4cbdcbb43c562243f923a8 Mon Sep 17 00:00:00 2001 From: saraloo Date: Fri, 23 Aug 2024 08:38:15 -0400 Subject: [PATCH 3/4] change default aggregation to false --- flepimop/main_scripts/inference_slot.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index 3ebc4097d..04ce4316e 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -53,7 +53,7 @@ option_list = list( optparse::make_option(c("-M", "--memory_profiling"), action = "store", default = Sys.getenv("FLEPI_MEM_PROFILE", FALSE), type = 'logical', help = 'Should the memory profiling be run during iterations'), optparse::make_option(c("-P", "--memory_profiling_iters"), action = "store", default = Sys.getenv("FLEPI_MEM_PROF_ITERS", 100), type = 'integer', help = 'If doing memory profiling, after every X iterations run the profiler'), optparse::make_option(c("-g", "--subpop_len"), action="store", default=Sys.getenv("SUBPOP_LENGTH", 5), type='integer', help = "number of digits in subpop"), - optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", TRUE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.') + optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", FALSE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.') ) parser=optparse::OptionParser(option_list=option_list) From 5e17dd3f09a3d85eb711790fdfdbc13736ac3b3d Mon Sep 17 00:00:00 2001 From: saraloo <45245630+saraloo@users.noreply.github.com> Date: Wed, 11 Sep 2024 11:50:03 -0400 Subject: [PATCH 4/4] add plugin option IC --- .../inference/R/inference_slot_runner_funcs.R | 109 +++++++++--------- 1 file changed, 57 insertions(+), 52 deletions(-) diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R index 1cc975a1e..fbd20ba93 100644 --- a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R +++ b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R @@ -621,72 +621,77 @@ initialize_mcmc_first_block <- function( ## initial conditions (init) if (!is.null(config$initial_conditions)){ + if(config$initial_conditions$method != "plugin"){ + if ("init_filename" %in% global_file_names) { - - if (config$initial_conditions$method %in% c("FromFile", "SetInitialConditions")){ - - if (is.null(config$initial_conditions$initial_conditions_file)) { - stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") - } - initial_init_file <- config$initial_conditions$initial_conditions_file - - } else if (config$initial_conditions$method %in% c("InitialConditionsFolderDraw", "SetInitialConditionsFolderDraw", "plugin")) { - print("Initial conditions in inference has not been fully implemented yet for the 'folder draw' methods, - and no copying to global or chimeric files is being done.") - - if (is.null(config$initial_conditions$initial_file_type)) { - stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") - } - initial_init_file <- global_files[[paste0(config$initial_conditions$initial_file_type, "_filename")]] - } - - - if (!file.exists(initial_init_file)) { - stop("ERROR: Initial conditions file specified but does not exist.") - } + + if (config$initial_conditions$method %in% c("FromFile", "SetInitialConditions")){ - if (grepl(".csv", initial_init_file)){ - initial_init <- readr::read_csv(initial_init_file,show_col_types = FALSE) - }else{ - initial_init <- arrow::read_parquet(initial_init_file) + if (is.null(config$initial_conditions$initial_conditions_file)) { + stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") } + initial_init_file <- config$initial_conditions$initial_conditions_file - # if the initial conditions file contains a 'date' column, filter for config$start_date + } else if (config$initial_conditions$method %in% c("InitialConditionsFolderDraw", "SetInitialConditionsFolderDraw")) { + print("Initial conditions in inference has not been fully implemented yet for the 'folder draw' methods, + and no copying to global or chimeric files is being done.") - if("date" %in% colnames(initial_init)){ - - initial_init <- initial_init %>% - dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% - dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) - - if (nrow(initial_init) == 0) { - stop("ERROR: Initial conditions file specified but does not contain the start date.") - } - + if (is.null(config$initial_conditions$initial_file_type)) { + stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") } - - arrow::write_parquet(initial_init, global_files[["init_filename"]]) + initial_init_file <- global_files[[paste0(config$initial_conditions$initial_file_type, "_filename")]] + } + + + if (!file.exists(initial_init_file)) { + stop("ERROR: Initial conditions file specified but does not exist.") + } + + if (grepl(".csv", initial_init_file)){ + initial_init <- readr::read_csv(initial_init_file,show_col_types = FALSE) + }else{ + initial_init <- arrow::read_parquet(initial_init_file) + } + + # if the initial conditions file contains a 'date' column, filter for config$start_date + + if("date" %in% colnames(initial_init)){ + + initial_init <- initial_init %>% + dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% + dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) + + if (nrow(initial_init) == 0) { + stop("ERROR: Initial conditions file specified but does not contain the start date.") + } + + } + + arrow::write_parquet(initial_init, global_files[["init_filename"]]) } - + # if the initial conditions file contains a 'date' column, filter for config$start_date if (grepl(".csv", global_files[["init_filename"]])){ - initial_init <- readr::read_csv(global_files[["init_filename"]],show_col_types = FALSE) + initial_init <- readr::read_csv(global_files[["init_filename"]],show_col_types = FALSE) }else{ - initial_init <- arrow::read_parquet(global_files[["init_filename"]]) + initial_init <- arrow::read_parquet(global_files[["init_filename"]]) } - + if("date" %in% colnames(initial_init)){ - - initial_init <- initial_init %>% - dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% - dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) - - if (nrow(initial_init) == 0) { - stop("ERROR: Initial conditions file specified but does not contain the start date.") - } - + + initial_init <- initial_init %>% + dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% + dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) + + if (nrow(initial_init) == 0) { + stop("ERROR: Initial conditions file specified but does not contain the start date.") + } + } arrow::write_parquet(initial_init, global_files[["init_filename"]]) + }else if(config$initial_conditions$method == "plugin"){ + print("Initial conditions files generated by gempyor using plugin method.") + } }