Skip to content

Commit

Permalink
Merge branch 'main' into python_script_resume
Browse files Browse the repository at this point in the history
  • Loading branch information
jcblemai authored Sep 13, 2024
2 parents 1cbce6c + fb5a9c5 commit d4ba408
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 67 deletions.
6 changes: 4 additions & 2 deletions flepimop/R_packages/inference/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ periodAggregate <- function(data, dates, start_date = NULL, end_date = NULL, per
tmp <- tmp %>%
tidyr::unite("time_unit", names(tmp)[grepl("time_unit_", names(tmp))]) %>%
dplyr::group_by(time_unit) %>%
dplyr::summarize(first_date = min(date), value = aggregator(value), valid = period_unit_validator(date,time_unit)) %>%
dplyr::summarize(last_date = max(date),first_date = min(date), value = aggregator(value), valid = period_unit_validator(date,time_unit)) %>%
dplyr::ungroup() %>%
dplyr::arrange(first_date) %>%
dplyr::filter(valid)
return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$first_date))))
# return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$first_date))))
return(matrix(tmp$value, ncol = 1, dimnames = list(as.character(tmp$last_date))))

}


Expand Down
121 changes: 66 additions & 55 deletions flepimop/R_packages/inference/R/inference_slot_runner_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,18 @@ aggregate_and_calc_loc_likelihoods <- function(
this_location_log_likelihood <- 0
for (var in names(ground_truth_data[[location]])) {


obs_tmp1 <- ground_truth_data[[location]][[var]]
obs_tmp <- obs_tmp1[!is.na(obs_tmp1$data_var) & !is.na(obs_tmp1$date),]
sim_tmp1 <- this_location_modeled_outcome[[var]]
sim_tmp <- sim_tmp1[match(lubridate::as_date(sim_tmp1$date),
lubridate::as_date(obs_tmp$date)),] %>% na.omit()


this_location_log_likelihood <- this_location_log_likelihood +
## Actually compute likelihood for this location and statistic here:
sum(inference::logLikStat(
obs = ground_truth_data[[location]][[var]]$data_var,
sim = this_location_modeled_outcome[[var]]$sim_var,
obs = as.numeric(obs_tmp$data_var),
sim = as.numeric(sim_tmp$sim_var),
dist = targets_config[[var]]$likelihood$dist,
param = targets_config[[var]]$likelihood$param,
add_one = targets_config[[var]]$add_one
Expand Down Expand Up @@ -615,72 +621,77 @@ initialize_mcmc_first_block <- function(
## initial conditions (init)

if (!is.null(config$initial_conditions)){
if(config$initial_conditions$method != "plugin"){

if ("init_filename" %in% global_file_names) {

if (config$initial_conditions$method %in% c("FromFile", "SetInitialConditions")){

if (is.null(config$initial_conditions$initial_conditions_file)) {
stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`")
}
initial_init_file <- config$initial_conditions$initial_conditions_file

} else if (config$initial_conditions$method %in% c("InitialConditionsFolderDraw", "SetInitialConditionsFolderDraw", "plugin")) {
print("Initial conditions in inference has not been fully implemented yet for the 'folder draw' methods,
and no copying to global or chimeric files is being done.")

if (is.null(config$initial_conditions$initial_file_type)) {
stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`")
}
initial_init_file <- global_files[[paste0(config$initial_conditions$initial_file_type, "_filename")]]
}


if (!file.exists(initial_init_file)) {
stop("ERROR: Initial conditions file specified but does not exist.")
}

if (config$initial_conditions$method %in% c("FromFile", "SetInitialConditions")){

if (grepl(".csv", initial_init_file)){
initial_init <- readr::read_csv(initial_init_file,show_col_types = FALSE)
}else{
initial_init <- arrow::read_parquet(initial_init_file)
if (is.null(config$initial_conditions$initial_conditions_file)) {
stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`")
}
initial_init_file <- config$initial_conditions$initial_conditions_file

# if the initial conditions file contains a 'date' column, filter for config$start_date
} else if (config$initial_conditions$method %in% c("InitialConditionsFolderDraw", "SetInitialConditionsFolderDraw")) {
print("Initial conditions in inference has not been fully implemented yet for the 'folder draw' methods,
and no copying to global or chimeric files is being done.")

if("date" %in% colnames(initial_init)){

initial_init <- initial_init %>%
dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>%
dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC"))

if (nrow(initial_init) == 0) {
stop("ERROR: Initial conditions file specified but does not contain the start date.")
}

if (is.null(config$initial_conditions$initial_file_type)) {
stop("ERROR: Initial conditions file needs to be specified in the config under `initial_conditions:initial_conditions_file`")
}

arrow::write_parquet(initial_init, global_files[["init_filename"]])
initial_init_file <- global_files[[paste0(config$initial_conditions$initial_file_type, "_filename")]]
}


if (!file.exists(initial_init_file)) {
stop("ERROR: Initial conditions file specified but does not exist.")
}

if (grepl(".csv", initial_init_file)){
initial_init <- readr::read_csv(initial_init_file,show_col_types = FALSE)
}else{
initial_init <- arrow::read_parquet(initial_init_file)
}

# if the initial conditions file contains a 'date' column, filter for config$start_date

if("date" %in% colnames(initial_init)){

initial_init <- initial_init %>%
dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>%
dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC"))

if (nrow(initial_init) == 0) {
stop("ERROR: Initial conditions file specified but does not contain the start date.")
}

}

arrow::write_parquet(initial_init, global_files[["init_filename"]])
}

# if the initial conditions file contains a 'date' column, filter for config$start_date
if (grepl(".csv", global_files[["init_filename"]])){
initial_init <- readr::read_csv(global_files[["init_filename"]],show_col_types = FALSE)
initial_init <- readr::read_csv(global_files[["init_filename"]],show_col_types = FALSE)
}else{
initial_init <- arrow::read_parquet(global_files[["init_filename"]])
initial_init <- arrow::read_parquet(global_files[["init_filename"]])
}

if("date" %in% colnames(initial_init)){
initial_init <- initial_init %>%
dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>%
dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC"))
if (nrow(initial_init) == 0) {
stop("ERROR: Initial conditions file specified but does not contain the start date.")
}

initial_init <- initial_init %>%
dplyr::mutate(date = as.POSIXct(date, tz="UTC")) %>%
dplyr::filter(date == as.POSIXct(paste0(config$start_date, " 00:00:00"), tz="UTC"))

if (nrow(initial_init) == 0) {
stop("ERROR: Initial conditions file specified but does not contain the start date.")
}

}
arrow::write_parquet(initial_init, global_files[["init_filename"]])
}else if(config$initial_conditions$method == "plugin"){
print("Initial conditions files generated by gempyor using plugin method.")
}
}


Expand Down
14 changes: 9 additions & 5 deletions flepimop/gempyor_pkg/src/gempyor/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,10 +538,12 @@ def one_simulation(
self.outcomes_parameters = outcomes.read_parameters_from_config(self.modinf)

npi_outcomes = None
npi_seir = None
if parallel:
with Timer("//things"):
with ProcessPoolExecutor(max_workers=max(mp.cpu_count(), 3)) as executor:
ret_seir = executor.submit(seir.build_npi_SEIR, self.modinf, load_ID, sim_id2load, config)
if self.modinf.seir_config is not None and self.modinf.npi_config_seir is not None:
ret_seir = executor.submit(seir.build_npi_SEIR, self.modinf, load_ID, sim_id2load, config)
if self.modinf.outcomes_config is not None and self.modinf.npi_config_outcomes:
ret_outcomes = executor.submit(
outcomes.build_outcome_modifiers,
Expand All @@ -563,15 +565,17 @@ def one_simulation(
self.proportion_info,
) = ret_comparments.result()
self.already_built = True
npi_seir = ret_seir.result()
if self.modinf.seir_config is not None and self.modinf.npi_config_seir is not None:
npi_seir = ret_seir.result()
if self.modinf.outcomes_config is not None and self.modinf.npi_config_outcomes:
npi_outcomes = ret_outcomes.result()
else:
if not self.already_built:
self.build_structure()
npi_seir = seir.build_npi_SEIR(
modinf=self.modinf, load_ID=load_ID, sim_id2load=sim_id2load, config=config
)
if self.modinf.seir_config is not None and self.modinf.npi_config_seir is not None:
npi_seir = seir.build_npi_SEIR(
modinf=self.modinf, load_ID=load_ID, sim_id2load=sim_id2load, config=config
)
if self.modinf.outcomes_config is not None and self.modinf.npi_config_outcomes:
npi_outcomes = outcomes.build_outcome_modifiers(
modinf=self.modinf,
Expand Down
2 changes: 1 addition & 1 deletion flepimop/gempyor_pkg/src/gempyor/steps_rk4.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def rhs(t, x, today):
rate_change_compartment *= parameters[transitions[transition_rate_col][transition_index]][
today
][visiting_subpop]
total_rate[spatial_node] *= rate_keep_compartment + rate_change_compartment.sum()
total_rate[spatial_node] *= (rate_keep_compartment + rate_change_compartment.sum())

# compute the number of individual transitioning from source to destination from the total rate
# number_move has shape (nspatial_nodes)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: minimal_test
setup_name: minimal_test_setup
start_date: 2020-01-31
end_date: 2020-05-31
nslots: 5

subpop_setup:
geodata: data/geodata.csv
mobility: data/mobility.csv

initial_conditions:
method: Default

compartments:
infection_stage: ["S", "E", "I1", "I2", "I3", "R"]
vaccination_stage: ["unvaccinated"]

seir:
integration:
method: legacy
dt: 1/6
parameters:
alpha:
value: .9
sigma:
value:
distribution: fixed
value: 1 / 5.2
gamma:
value:
distribution: uniform
low: 1 / 6
high: 1 / 2.6
R0s:
value:
distribution: uniform
low: 2
high: 3
transitions:
- source: ["S", "unvaccinated"]
destination: ["E", "unvaccinated"]
rate: ["R0s * gamma", 1]
proportional_to: [
["S", "unvaccinated"],
[[["I1", "I2", "I3"]], "unvaccinated"],
]
proportion_exponent: [["1", "1"], ["alpha", "1"]]
- source: [["E"], ["unvaccinated"]]
destination: [["I1"], ["unvaccinated"]]
rate: ["sigma", 1]
proportional_to: [[["E"], ["unvaccinated"]]]
proportion_exponent: [["1", "1"]]
- source: [["I1"], ["unvaccinated"]]
destination: [["I2"], ["unvaccinated"]]
rate: ["3 * gamma", 1]
proportional_to: [[["I1"], ["unvaccinated"]]]
proportion_exponent: [["1", "1"]]
- source: [["I2"], ["unvaccinated"]]
destination: [["I3"], ["unvaccinated"]]
rate: ["3 * gamma", 1]
proportional_to: [[["I2"], ["unvaccinated"]]]
proportion_exponent: [["1", "1"]]
- source: [["I3"], ["unvaccinated"]]
destination: [["R"], ["unvaccinated"]]
rate: ["3 * gamma", 1]
proportional_to: [[["I3"], ["unvaccinated"]]]
proportion_exponent: [["1", "1"]]
12 changes: 12 additions & 0 deletions flepimop/gempyor_pkg/tests/inference/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,15 @@ def test_GempyorInference_success(self):

i.already_built = False
i.one_simulation(sim_id2write=0, parallel=True)

# HopkinsIDD/flepiMoP#269
def test_inference_without_seir_modifiers(self):
# Setup
os.chdir(os.path.dirname(__file__))
gempyor_inference = inference.GempyorInference(
f"{DATA_DIR}/config_inference_without_seir_modifiers.yml",
)

# Test
simulation_int = gempyor_inference.one_simulation(0)
assert simulation_int == 0
8 changes: 4 additions & 4 deletions flepimop/main_scripts/inference_slot.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ option_list = list(
optparse::make_option(c("-M", "--memory_profiling"), action = "store", default = Sys.getenv("FLEPI_MEM_PROFILE", FALSE), type = 'logical', help = 'Should the memory profiling be run during iterations'),
optparse::make_option(c("-P", "--memory_profiling_iters"), action = "store", default = Sys.getenv("FLEPI_MEM_PROF_ITERS", 100), type = 'integer', help = 'If doing memory profiling, after every X iterations run the profiler'),
optparse::make_option(c("-g", "--subpop_len"), action="store", default=Sys.getenv("SUBPOP_LENGTH", 5), type='integer', help = "number of digits in subpop"),
optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", TRUE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.')
optparse::make_option(c("-a", "--incl_aggr_likelihood"), action = "store", default = Sys.getenv("INCL_AGGR_LIKELIHOOD", FALSE), type = 'logical', help = 'Should the likelihood be calculated with the aggregate estiamtes.')
)

parser=optparse::OptionParser(option_list=option_list)
Expand Down Expand Up @@ -268,12 +268,12 @@ if (config$inference$do_inference){
obs <- suppressMessages(
readr::read_csv(config$inference$gt_data_path,
col_types = readr::cols(date = readr::col_date(),
source = readr::col_character(),
# source = readr::col_character(),
subpop = readr::col_character(),
.default = readr::col_double()), )) %>%
dplyr::filter(subpop %in% subpops_, date >= gt_start_date, date <= gt_end_date) %>%
dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) %>%
dplyr::mutate_if(is.numeric, dplyr::coalesce, 0)
dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) #%>%
# dplyr::mutate_if(is.numeric, dplyr::coalesce, 0)


# add aggregate groundtruth to the obs data for the likelihood calc
Expand Down

0 comments on commit d4ba408

Please sign in to comment.