diff --git a/flepimop/R_packages/inference/R/functions.R b/flepimop/R_packages/inference/R/functions.R index 80f29c9a6..2f7cfbc5e 100644 --- a/flepimop/R_packages/inference/R/functions.R +++ b/flepimop/R_packages/inference/R/functions.R @@ -39,11 +39,13 @@ periodAggregate <- function(data, dates, start_date = NULL, end_date = NULL, per tmp <- tmp %>% tidyr::unite("time_unit", names(tmp)[grepl("time_unit_", names(tmp))]) %>% dplyr::group_by(time_unit) %>% - dplyr::summarize(first_date = min(date), value = aggregator(value), valid = period_unit_validator(date,time_unit)) %>% + dplyr::summarize(last_date = max(date),first_date = min(date), value = aggregator(value), valid = period_unit_validator(date,time_unit)) %>% dplyr::ungroup() %>% dplyr::arrange(first_date) %>% dplyr::filter(valid) - return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$first_date)))) + # return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$first_date)))) + return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$last_date)))) + } diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R index a708b2460..fbd20ba93 100644 --- a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R +++ b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R @@ -67,12 +67,18 @@ aggregate_and_calc_loc_likelihoods <- function( this_location_log_likelihood <- 0 for (var in names(ground_truth_data[[location]])) { - + obs_tmp1 <- ground_truth_data[[location]][[var]] + obs_tmp <- obs_tmp1[!is.na(obs_tmp1$data_var) & !is.na(obs_tmp1$date),] + sim_tmp1 <- this_location_modeled_outcome[[var]] + sim_tmp <- sim_tmp1[match(lubridate::as_date(sim_tmp1$date), + lubridate::as_date(obs_tmp$date)),] %>% na.omit() + + this_location_log_likelihood <- this_location_log_likelihood + ## Actually compute likelihood for this location and statistic here: sum(inference::logLikStat( - obs = ground_truth_data[[location]][[var]]$data_var, - sim = this_location_modeled_outcome[[var]]$sim_var, + obs = as.numeric(obs_tmp$data_var), + sim = as.numeric(sim_tmp$sim_var), dist = targets_config[[var]]$likelihood$dist, param = targets_config[[var]]$likelihood$param, add_one = targets_config[[var]]$add_one @@ -615,72 +621,77 @@ initialize_mcmc_first_block <- function( ## initial conditions (init) if (!is.null(config$initial_conditions)){ + if(config$initial_conditions$method != "plugin"){ + if ("init_filename" %in% global_file_names) { - - if (config$initial_conditions$method %in% c("FromFile", "SetInitialConditions")){ - - if (is.null(config$initial_conditions$initial_conditions_file)) { - stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") - } - initial_init_file <- config$initial_conditions$initial_conditions_file - - } else if (config$initial_conditions$method %in% c("InitialConditionsFolderDraw", "SetInitialConditionsFolderDraw", "plugin")) { - print("Initial conditions in inference has not been fully implemented yet for the 'folder draw' methods, - and no copying to global or chimeric files is being done.") - - if (is.null(config$initial_conditions$initial_file_type)) { - stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") - } - initial_init_file <- global_files[[paste0(config$initial_conditions$initial_file_type, "_filename")]] - } - - - if (!file.exists(initial_init_file)) { - stop("ERROR: Initial conditions file specified but does not exist.") - } + + if (config$initial_conditions$method %in% c("FromFile", "SetInitialConditions")){ - if (grepl(".csv", initial_init_file)){ - initial_init <- readr::read_csv(initial_init_file,show_col_types = FALSE) - }else{ - initial_init <- arrow::read_parquet(initial_init_file) + if (is.null(config$initial_conditions$initial_conditions_file)) { + stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") } + initial_init_file <- config$initial_conditions$initial_conditions_file - # if the initial conditions file contains a 'date' column, filter for config$start_date + } else if (config$initial_conditions$method %in% c("InitialConditionsFolderDraw", "SetInitialConditionsFolderDraw")) { + print("Initial conditions in inference has not been fully implemented yet for the 'folder draw' methods, + and no copying to global or chimeric files is being done.") - if("date" %in% colnames(initial_init)){ - - initial_init <- initial_init %>% - dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% - dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) - - if (nrow(initial_init) == 0) { - stop("ERROR: Initial conditions file specified but does not contain the start date.") - } - + if (is.null(config$initial_conditions$initial_file_type)) { + stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`") } - - arrow::write_parquet(initial_init, global_files[["init_filename"]]) + initial_init_file <- global_files[[paste0(config$initial_conditions$initial_file_type, "_filename")]] + } + + + if (!file.exists(initial_init_file)) { + stop("ERROR: Initial conditions file specified but does not exist.") + } + + if (grepl(".csv", initial_init_file)){ + initial_init <- readr::read_csv(initial_init_file,show_col_types = FALSE) + }else{ + initial_init <- arrow::read_parquet(initial_init_file) + } + + # if the initial conditions file contains a 'date' column, filter for config$start_date + + if("date" %in% colnames(initial_init)){ + + initial_init <- initial_init %>% + dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% + dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) + + if (nrow(initial_init) == 0) { + stop("ERROR: Initial conditions file specified but does not contain the start date.") + } + + } + + arrow::write_parquet(initial_init, global_files[["init_filename"]]) } - + # if the initial conditions file contains a 'date' column, filter for config$start_date if (grepl(".csv", global_files[["init_filename"]])){ - initial_init <- readr::read_csv(global_files[["init_filename"]],show_col_types = FALSE) + initial_init <- readr::read_csv(global_files[["init_filename"]],show_col_types = FALSE) }else{ - initial_init <- arrow::read_parquet(global_files[["init_filename"]]) + initial_init <- arrow::read_parquet(global_files[["init_filename"]]) } - + if("date" %in% colnames(initial_init)){ - - initial_init <- initial_init %>% - dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% - dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) - - if (nrow(initial_init) == 0) { - stop("ERROR: Initial conditions file specified but does not contain the start date.") - } - + + initial_init <- initial_init %>% + dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>% + dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC")) + + if (nrow(initial_init) == 0) { + stop("ERROR: Initial conditions file specified but does not contain the start date.") + } + } arrow::write_parquet(initial_init, global_files[["init_filename"]]) + }else if(config$initial_conditions$method == "plugin"){ + print("Initial conditions files generated by gempyor using plugin method.") + } } diff --git a/flepimop/gempyor_pkg/src/gempyor/inference.py b/flepimop/gempyor_pkg/src/gempyor/inference.py index 81b1c22d0..fc0613045 100644 --- a/flepimop/gempyor_pkg/src/gempyor/inference.py +++ b/flepimop/gempyor_pkg/src/gempyor/inference.py @@ -538,10 +538,12 @@ def one_simulation( self.outcomes_parameters = outcomes.read_parameters_from_config(self.modinf) npi_outcomes = None + npi_seir = None if parallel: with Timer("//things"): with ProcessPoolExecutor(max_workers=max(mp.cpu_count(), 3)) as executor: - ret_seir = executor.submit(seir.build_npi_SEIR, self.modinf, load_ID, sim_id2load, config) + if self.modinf.seir_config is not None and self.modinf.npi_config_seir is not None: + ret_seir = executor.submit(seir.build_npi_SEIR, self.modinf, load_ID, sim_id2load, config) if self.modinf.outcomes_config is not None and self.modinf.npi_config_outcomes: ret_outcomes = executor.submit( outcomes.build_outcome_modifiers, @@ -563,15 +565,17 @@ def one_simulation( self.proportion_info, ) = ret_comparments.result() self.already_built = True - npi_seir = ret_seir.result() + if self.modinf.seir_config is not None and self.modinf.npi_config_seir is not None: + npi_seir = ret_seir.result() if self.modinf.outcomes_config is not None and self.modinf.npi_config_outcomes: npi_outcomes = ret_outcomes.result() else: if not self.already_built: self.build_structure() - npi_seir = seir.build_npi_SEIR( - modinf=self.modinf, load_ID=load_ID, sim_id2load=sim_id2load, config=config - ) + if self.modinf.seir_config is not None and self.modinf.npi_config_seir is not None: + npi_seir = seir.build_npi_SEIR( + modinf=self.modinf, load_ID=load_ID, sim_id2load=sim_id2load, config=config + ) if self.modinf.outcomes_config is not None and self.modinf.npi_config_outcomes: npi_outcomes = outcomes.build_outcome_modifiers( modinf=self.modinf, diff --git a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py index 2cb09c99d..21c2d935d 100644 --- a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py +++ b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py @@ -134,7 +134,7 @@ def rhs(t, x, today): rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][ today ][visiting_subpop] - total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum() + total_rate[spatial_node] *= (rate_keep_compartment + rate_change_compartment.sum()) # compute the number of individual transitioning from source to destination from the total rate # number_move has shape (nspatial_nodes) diff --git a/flepimop/gempyor_pkg/tests/inference/data/config_inference_without_seir_modifiers.yml b/flepimop/gempyor_pkg/tests/inference/data/config_inference_without_seir_modifiers.yml new file mode 100644 index 000000000..3268c781a --- /dev/null +++ b/flepimop/gempyor_pkg/tests/inference/data/config_inference_without_seir_modifiers.yml @@ -0,0 +1,67 @@ +name: minimal_test +setup_name: minimal_test_setup +start_date: 2020-01-31 +end_date: 2020-05-31 +nslots: 5 + +subpop_setup: + geodata: data/geodata.csv + mobility: data/mobility.csv + +initial_conditions: + method: Default + +compartments: + infection_stage: ["S", "E", "I1", "I2", "I3", "R"] + vaccination_stage: ["unvaccinated"] + +seir: + integration: + method: legacy + dt: 1/6 + parameters: + alpha: + value: .9 + sigma: + value: + distribution: fixed + value: 1 / 5.2 + gamma: + value: + distribution: uniform + low: 1 / 6 + high: 1 / 2.6 + R0s: + value: + distribution: uniform + low: 2 + high: 3 + transitions: + - source: ["S", "unvaccinated"] + destination: ["E", "unvaccinated"] + rate: ["R0s * gamma", 1] + proportional_to: [ + ["S", "unvaccinated"], + [[["I1", "I2", "I3"]], "unvaccinated"], + ] + proportion_exponent: [["1", "1"], ["alpha", "1"]] + - source: [["E"], ["unvaccinated"]] + destination: [["I1"], ["unvaccinated"]] + rate: ["sigma", 1] + proportional_to: [[["E"], ["unvaccinated"]]] + proportion_exponent: [["1", "1"]] + - source: [["I1"], ["unvaccinated"]] + destination: [["I2"], ["unvaccinated"]] + rate: ["3 * gamma", 1] + proportional_to: [[["I1"], ["unvaccinated"]]] + proportion_exponent: [["1", "1"]] + - source: [["I2"], ["unvaccinated"]] + destination: [["I3"], ["unvaccinated"]] + rate: ["3 * gamma", 1] + proportional_to: [[["I2"], ["unvaccinated"]]] + proportion_exponent: [["1", "1"]] + - source: [["I3"], ["unvaccinated"]] + destination: [["R"], ["unvaccinated"]] + rate: ["3 * gamma", 1] + proportional_to: [[["I3"], ["unvaccinated"]]] + proportion_exponent: [["1", "1"]] diff --git a/flepimop/gempyor_pkg/tests/inference/test_inference.py b/flepimop/gempyor_pkg/tests/inference/test_inference.py index 8891f4d98..78c61c8dc 100644 --- a/flepimop/gempyor_pkg/tests/inference/test_inference.py +++ b/flepimop/gempyor_pkg/tests/inference/test_inference.py @@ -64,3 +64,15 @@ def test_GempyorInference_success(self): i.already_built = False i.one_simulation(sim_id2write=0, parallel=True) + + # HopkinsIDD/flepiMoP#269 + def test_inference_without_seir_modifiers(self): + # Setup + os.chdir(os.path.dirname(__file__)) + gempyor_inference = inference.GempyorInference( + f"{DATA_DIR}/config_inference_without_seir_modifiers.yml", + ) + + # Test + simulation_int = gempyor_inference.one_simulation(0) + assert simulation_int == 0 diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index 0e8c32bb5..04ce4316e 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -53,7 +53,7 @@ option_list = list( optparse::make_option(c("-M", "--memory_profiling"), action = "store", default = Sys.getenv("FLEPI_MEM_PROFILE", FALSE), type = 'logical', help = 'Should the memory profiling be run during iterations'), optparse::make_option(c("-P", "--memory_profiling_iters"), action = "store", default = Sys.getenv("FLEPI_MEM_PROF_ITERS", 100), type = 'integer', help = 'If doing memory profiling, after every X iterations run the profiler'), optparse::make_option(c("-g", "--subpop_len"), action="store", default=Sys.getenv("SUBPOP_LENGTH", 5), type='integer', help = "number of digits in subpop"), - optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", TRUE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.') + optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", FALSE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.') ) parser=optparse::OptionParser(option_list=option_list) @@ -268,12 +268,12 @@ if (config$inference$do_inference){ obs <- suppressMessages( readr::read_csv(config$inference$gt_data_path, col_types = readr::cols(date = readr::col_date(), - source = readr::col_character(), + # source = readr::col_character(), subpop = readr::col_character(), .default = readr::col_double()), )) %>% dplyr::filter(subpop %in% subpops_, date >= gt_start_date, date <= gt_end_date) %>% - dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) %>% - dplyr::mutate_if(is.numeric, dplyr::coalesce, 0) + dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) #%>% + # dplyr::mutate_if(is.numeric, dplyr::coalesce, 0) # add aggregate groundtruth to the obs data for the likelihood calc