From bedbe2fd20d1789bc3dd6ed48ab54d5c0138f160 Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Tue, 12 Sep 2023 16:25:45 +0200 Subject: [PATCH 1/8] blind initial condition fits --- flepimop/R_packages/inference/NAMESPACE | 1 + flepimop/R_packages/inference/R/functions.R | 30 ++++++++++++++++- .../inference/R/inference_slot_runner_funcs.R | 11 ++++--- flepimop/main_scripts/inference_slot.R | 32 +++++++++++++++++-- 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/flepimop/R_packages/inference/NAMESPACE b/flepimop/R_packages/inference/NAMESPACE index fa908dd87..91db773f1 100644 --- a/flepimop/R_packages/inference/NAMESPACE +++ b/flepimop/R_packages/inference/NAMESPACE @@ -25,5 +25,6 @@ export(perturb_hnpi_from_file) export(perturb_hpar) export(perturb_seeding) export(perturb_snpi) +export(perturb_init) export(perturb_snpi_from_file) importFrom(magrittr,"%>%") diff --git a/flepimop/R_packages/inference/R/functions.R b/flepimop/R_packages/inference/R/functions.R index 0285fe7ef..6e3c290a2 100644 --- a/flepimop/R_packages/inference/R/functions.R +++ b/flepimop/R_packages/inference/R/functions.R @@ -411,6 +411,29 @@ perturb_snpi <- function(snpi, intervention_settings) { return(snpi) } +perturb_init <- function(init, perturbation) { + + pert_dist <- flepicommon::as_random_distribution(perturbation) + perturb <- init$perturb + + init$amount[perturb] <- init$amount[perturb] + pert_dist(nrow(perturb)) + + clip_to_bounds <- function(value) { + if (value < 0) { + return(0) + } else if (value > 1) { + return(1) + } else { + return(value) + } + } + + # Apply the clip_to_bounds function to elements outside the bounds + init$amount[perturb] <- sapply(init$amount[perturb], clip_to_bounds) + + return(init) +} + ##' Function perturbs an npi parameter file based on ##' user-specified distributions @@ -520,6 +543,8 @@ perturb_hpar <- function(hpar, intervention_settings) { ##' @return a new data frame with the confirmed seedin. ##' @export accept_reject_new_seeding_npis <- function( + init_orig, + init_prop, seeding_orig, seeding_prop, snpi_orig, @@ -532,6 +557,7 @@ accept_reject_new_seeding_npis <- function( prop_lls ) { rc_seeding <- seeding_orig + rc_init <- init_orig rc_snpi <- snpi_orig rc_hnpi <- hnpi_orig rc_hpar <- hpar_orig @@ -549,7 +575,8 @@ accept_reject_new_seeding_npis <- function( orig_lls$accept_prob <- min(1,ratio) # added column for acceptance decision for (subpop in orig_lls$subpop[accept]) { - rc_seeding[rc_seeding$subpop == subpop, ] <- seeding_prop[seeding_prop$subpop ==subpop, ] + rc_seeding[rc_seeding$subpop == subpop, ] <- seeding_prop[seeding_prop$subpop == subpop, ] + rc_init[rc_init$subpop == subpop, ] <- init_prop[init_prop$subpop == subpop, ] rc_snpi[rc_snpi$subpop == subpop, ] <- snpi_prop[snpi_prop$subpop == subpop, ] rc_hnpi[rc_hnpi$subpop == subpop, ] <- hnpi_prop[hnpi_prop$subpop == subpop, ] rc_hpar[rc_hpar$subpop == subpop, ] <- hpar_prop[hpar_prop$subpop == subpop, ] @@ -557,6 +584,7 @@ accept_reject_new_seeding_npis <- function( return(list( seeding = rc_seeding, + init = rc_init, snpi = rc_snpi, hnpi = rc_hnpi, hpar = rc_hpar, diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R index f97300d3b..035d84451 100644 --- a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R +++ b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R @@ -96,7 +96,7 @@ aggregate_and_calc_loc_likelihoods <- function( #' @importFrom magrittr %>% likelihood_data <- likelihood_data %>% do.call(what = rbind) - ##Update likelihood data based on hierarchical_stats + ##Update likelihood data based on hierarchical_stats (NOT SUPPORTED FOR INIT FILES) for (stat in names(hierarchical_stats)) { if (hierarchical_stats[[stat]]$module %in% c("seir_interventions", "seir")) { @@ -134,6 +134,7 @@ aggregate_and_calc_loc_likelihoods <- function( } else { stop("unsupported hierarchical stat module") } + @@ -596,8 +597,8 @@ initialize_mcmc_first_block <- function( ## Only works on these files: global_types <- c("seed", "init", "seir", "snpi", "hnpi", "spar", "hosp", "hpar", "llik") global_extensions <- c("csv", "parquet", "parquet", "parquet", "parquet", "parquet", "parquet", "parquet", "parquet") - chimeric_types <- c("seed", "snpi", "hnpi", "spar", "hpar", "llik") - chimeric_extensions <- c("csv", "parquet", "parquet", "parquet", "parquet", "parquet") + chimeric_types <- c("seed", "init", "snpi", "hnpi", "spar", "hpar", "llik") + chimeric_extensions <- c("csv", "parquet", "parquet", "parquet", "parquet", "parquet", "parquet") non_llik_types <- paste(c("seed", "init", "seir", "snpi", "hnpi", "spar", "hosp", "hpar"), "filename", sep = "_") global_files <- create_filename_list(run_id, global_prefix, block - 1, global_types, global_extensions) # makes file names of the form variable/name/npi_scenario/outcome_scenario/run_id/global/intermediate/slot.(block-1).run_ID.variable.ext @@ -737,7 +738,7 @@ initialize_mcmc_first_block <- function( # These functions save variables to files of the form variable/name/npi_scenario/outcome_scenario/run_id/global/intermediate/slot.(block-1),runID.variable.ext if (any(checked_par_files %in% global_file_names)) { if (!all(checked_par_files %in% global_file_names)) { - stop("Provided some GempyorSimulator input, but not all") + stop("Provided some InferenceSimulator input, but not all") } if (any(checked_sim_files %in% global_file_names)) { if (!all(checked_sim_files %in% global_file_names)) { @@ -745,7 +746,7 @@ initialize_mcmc_first_block <- function( } gempyor_inference_runner$one_simulation(sim_id2write = block - 1) } else { - stop("Provided some GempyorSimulator output(seir, hosp), but not GempyorSimulator input") + stop("Provided some InferenceSimulator output(seir, hosp), but not InferenceSimulator input") } } else { if (any(checked_sim_files %in% global_file_names)) { diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index 41b6d708c..665c791d8 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -83,6 +83,21 @@ if (!is.null(config$seeding)){ } +infer_initial_conditions <- FALSE +if (!is.null(config$initial_conditions)){ + if (('perturbation' %in% names(config$initial_conditions))) { + infer_initial_conditions <- TRUE + if (!(config$initial_conditions$method %in% c('SetInitialConditionsFolderDraw'))){ + stop("This filtration method requires the initial_condition method 'SetInitialConditionsFolderDraw'") + } + if (!(config$initial_conditions$proportional)){ + stop("This filtration method requires the initial_condition to be set proportional'") + } + } +} else { + print("⚠️ No initial_conditions: section found in config >> not fitting initial_conditions.") +} + #if (!('lambda_file' %in% names(config$seeding))) { # stop("Despite being a folder draw method, filtration method requires the seeding to provide a lambda_file argument.") @@ -290,9 +305,6 @@ if (config$inference$do_inference){ print("Running WITHOUT inference") } - - - required_packages <- c("dplyr", "magrittr", "xts", "zoo", "stringr") # Load gempyor module @@ -395,6 +407,7 @@ for(npi_scenario in npi_scenarios) { initial_seeding$amount <- as.integer(round(initial_seeding$amount)) } # } + initial_init <- arrow::read_parquet(first_chimeric_files[['init_filename']]) initial_snpi <- arrow::read_parquet(first_chimeric_files[['snpi_filename']]) initial_hnpi <- arrow::read_parquet(first_chimeric_files[['hnpi_filename']]) initial_spar <- arrow::read_parquet(first_chimeric_files[['spar_filename']]) @@ -457,6 +470,11 @@ for(npi_scenario in npi_scenarios) { } else { proposed_seeding <- initial_seeding } + if (infer_initial_conditions) { + proposed_init <- inference::perturb_init(initial_init, config$initial_conditions$perturbation) + } else { + proposed_init <- initial_init + } proposed_snpi <- inference::perturb_snpi(initial_snpi, config$interventions$settings) proposed_hnpi <- inference::perturb_hnpi(initial_hnpi, config$interventions$settings) proposed_spar <- initial_spar @@ -467,6 +485,7 @@ for(npi_scenario in npi_scenarios) { # since the first iteration is accepted by default, we don't perturb it if ((opt$this_block == 1) && (current_index == 0)) { + proposed_init <- initial_init proposed_snpi <- initial_snpi proposed_hnpi <- initial_hnpi proposed_spar <- initial_spar @@ -489,6 +508,7 @@ for(npi_scenario in npi_scenarios) { write.csv(proposed_seeding, this_global_files[['seed_filename']], row.names = FALSE) # } + arrow::write_parquet(proposed_init,this_global_files[['init_filename']]) arrow::write_parquet(proposed_snpi,this_global_files[['snpi_filename']]) arrow::write_parquet(proposed_hnpi,this_global_files[['hnpi_filename']]) arrow::write_parquet(proposed_spar,this_global_files[['spar_filename']]) @@ -618,6 +638,8 @@ for(npi_scenario in npi_scenarios) { # "Chimeric" means GeoID-specific seeding_npis_list <- inference::accept_reject_new_seeding_npis( + init_orig = initial_init, + init_prop = proposed_init, seeding_orig = initial_seeding, seeding_prop = proposed_seeding, snpi_orig = initial_snpi, @@ -632,6 +654,7 @@ for(npi_scenario in npi_scenarios) { # Update accepted parameters to start next simulation + initial_init <- seeding_npis_list$init initial_seeding <- seeding_npis_list$seeding initial_snpi <- seeding_npis_list$snpi initial_hnpi <- seeding_npis_list$hnpi @@ -639,6 +662,7 @@ for(npi_scenario in npi_scenarios) { chimeric_likelihood_data <- seeding_npis_list$ll } else { print("Resetting chimeric files to global") + initial_init <- proposed_init initial_seeding <- proposed_seeding initial_snpi <- proposed_snpi initial_hnpi <- proposed_hnpi @@ -655,6 +679,7 @@ for(npi_scenario in npi_scenarios) { ## Write accepted parameters to file # writes to file of the form variable/name/npi_scenario/outcome_scenario/run_id/chimeric/intermediate/slot.block.iter.run_id.variable.ext write.csv(initial_seeding,this_chimeric_files[['seed_filename']], row.names = FALSE) + arrow::write_parquet(initial_init,this_chimeric_files[['init_filename']]) arrow::write_parquet(initial_snpi,this_chimeric_files[['snpi_filename']]) arrow::write_parquet(initial_hnpi,this_chimeric_files[['hnpi_filename']]) arrow::write_parquet(initial_spar,this_chimeric_files[['spar_filename']]) @@ -664,6 +689,7 @@ for(npi_scenario in npi_scenarios) { print(paste("Current index is ",current_index)) ###Memory management + rm(proposed_init) rm(proposed_snpi) rm(proposed_hnpi) rm(proposed_hpar) From db12eab61553a1235faca111f13750d5eae082cf Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 14 Sep 2023 17:31:00 +0200 Subject: [PATCH 2/8] spatial_groups > subpop_groups --- flepimop/gempyor_pkg/src/gempyor/NPI/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flepimop/gempyor_pkg/src/gempyor/NPI/helpers.py b/flepimop/gempyor_pkg/src/gempyor/NPI/helpers.py index 297b33977..d2796b1f4 100644 --- a/flepimop/gempyor_pkg/src/gempyor/NPI/helpers.py +++ b/flepimop/gempyor_pkg/src/gempyor/NPI/helpers.py @@ -33,13 +33,13 @@ def get_spatial_groups(grp_config, affected_subpops: list) -> dict: spatial_groups = {"grouped": [], "ungrouped": []} - if not grp_config["spatial_groups"].exists(): + if not grp_config["subpop_groups"].exists(): spatial_groups["ungrouped"] = affected_subpops else: - if grp_config["spatial_groups"].get() == "all": + if grp_config["subpop_groups"].get() == "all": spatial_groups["grouped"] = [affected_subpops] else: - spatial_groups["grouped"] = grp_config["spatial_groups"].get() + spatial_groups["grouped"] = grp_config["subpop_groups"].get() spatial_groups["ungrouped"] = list( set(affected_subpops) - set(flatten_list_of_lists(spatial_groups["grouped"])) ) From 412ea5132ffc64b0457e1950a5b7ec119c94cc33 Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 14 Sep 2023 17:33:41 +0200 Subject: [PATCH 3/8] test --- .../tests/npi/config_test_spatial_group_npi.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml b/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml index afb989120..cceac5597 100644 --- a/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml +++ b/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml @@ -67,7 +67,7 @@ interventions: template: SinglePeriodModifier parameter: r2 subpop: "all" - spatial_groups: "all" + subpop_groups: "all" period_start_date: 2020-01-01 period_end_date: 2022-09-03 value: @@ -80,7 +80,7 @@ interventions: template: SinglePeriodModifier parameter: r3 subpop: "all" - spatial_groups: + subpop_groups: - ["01000", "02000"] - ["04000", "06000"] period_start_date: 2020-04-04 @@ -101,7 +101,7 @@ interventions: template: SinglePeriodModifier parameter: r4 subpop: ["01000", "02000", "04000", "06000"] - spatial_groups: + subpop_groups: - ["01000", "02000"] period_start_date: 2020-04-04 period_end_date: 2020-04-30 @@ -117,14 +117,14 @@ interventions: parameter: r5 groups: - subpop: ["09000", "10000"] - spatial_groups: ["09000", "10000"] + subpop_groups: ["09000", "10000"] periods: - start_date: 2020-12-01 end_date: 2020-12-31 - start_date: 2021-12-01 end_date: 2021-12-31 - subpop: ["01000", "02000", "04000", "06000"] - spatial_groups: ["01000","04000"] + subpop_groups: ["01000","04000"] periods: - start_date: 2020-10-01 end_date: 2020-10-31 @@ -142,14 +142,14 @@ interventions: parameter: r1 groups: - subpop: ["09000", "10000"] - spatial_groups: ["09000", "10000"] + subpop_groups: ["09000", "10000"] periods: - start_date: 2020-12-01 end_date: 2020-12-31 - start_date: 2021-12-01 end_date: 2021-12-31 - subpop: ["01000", "02000", "04000", "06000"] - spatial_groups: ["10000"] + subpop_groups: ["10000"] periods: - start_date: 2021-08-16 end_date: 2021-11-23 From 20df95b16fb3a477150b10a349907bb8a6615660 Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 14 Sep 2023 17:35:02 +0200 Subject: [PATCH 4/8] spatial_setup > subpop_setup --- batch/inference_job_launcher.py | 2 +- .../gempyor_pkg/src/gempyor/dev/dev_seir.py | 2 +- flepimop/gempyor_pkg/src/gempyor/interface.py | 8 ++--- flepimop/gempyor_pkg/src/gempyor/setup.py | 4 +-- flepimop/gempyor_pkg/src/gempyor/simulate.py | 14 ++++----- .../src/gempyor/simulate_outcome.py | 6 ++-- .../gempyor_pkg/src/gempyor/simulate_seir.py | 12 ++++---- .../tests/seir/test_compartments.py | 4 +-- .../gempyor_pkg/tests/seir/test_new_seir.py | 2 +- .../gempyor_pkg/tests/seir/test_parameters.py | 6 ++-- flepimop/gempyor_pkg/tests/seir/test_seir.py | 30 +++++++++---------- postprocessing/postprocess_auto.py | 2 +- 12 files changed, 46 insertions(+), 46 deletions(-) diff --git a/batch/inference_job_launcher.py b/batch/inference_job_launcher.py index f2715a7f7..a9582f003 100755 --- a/batch/inference_job_launcher.py +++ b/batch/inference_job_launcher.py @@ -425,7 +425,7 @@ def autodetect_params(config, data_path, *, num_jobs=None, sims_per_job=None, nu print(f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]") print(f"Setting sims per job to {sims_per_job} [via {iterations_per_slot} iterations_per_slot in config]") else: - geodata_fname = pathlib.Path(data_path, config["data_path"]) / config["spatial_setup"]["geodata"] + geodata_fname = pathlib.Path(data_path, config["data_path"]) / config["subpop_setup"]["geodata"] with open(geodata_fname) as geodata_fp: num_subpops = sum(1 for line in geodata_fp) diff --git a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py index 3781497bb..9f5b9182b 100644 --- a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py @@ -33,7 +33,7 @@ prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], diff --git a/flepimop/gempyor_pkg/src/gempyor/interface.py b/flepimop/gempyor_pkg/src/gempyor/interface.py index 181044b69..40d1ab295 100644 --- a/flepimop/gempyor_pkg/src/gempyor/interface.py +++ b/flepimop/gempyor_pkg/src/gempyor/interface.py @@ -69,7 +69,7 @@ def __init__( config.clear() config.read(user=False) config.set_file(config_path) - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = config["data_path"].get() spatial_base_path = pathlib.Path(spatial_path_prefix + spatial_base_path) @@ -80,7 +80,7 @@ def __init__( write_parquet = True self.s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=subpopulation_structure.SubpopulationStructure( + subpop_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() @@ -438,7 +438,7 @@ def paramred_parallel(run_spec, snpi_fn): npi_scenario="inference", # NPIs scenario to use outcome_scenario="med", # Outcome scenario to use stoch_traj_flag=False, - spatial_path_prefix=run_spec["geodata"], # prefix where to find the folder indicated in spatial_setup$ + spatial_path_prefix=run_spec["geodata"], # prefix where to find the folder indicated in subpop_setup$ ) snpi = pq.read_table(snpi_fn).to_pandas() @@ -464,7 +464,7 @@ def paramred_parallel_config(run_spec, dummy): npi_scenario="inference", # NPIs scenario to use outcome_scenario="med", # Outcome scenario to use stoch_traj_flag=False, - spatial_path_prefix=run_spec["geodata"], # prefix where to find the folder indicated in spatial_setup$ + spatial_path_prefix=run_spec["geodata"], # prefix where to find the folder indicated in subpop_setup$ ) npi_seir = gempyor_simulator.get_seir_npi() diff --git a/flepimop/gempyor_pkg/src/gempyor/setup.py b/flepimop/gempyor_pkg/src/gempyor/setup.py index 7d5ea48c2..b8bca6104 100644 --- a/flepimop/gempyor_pkg/src/gempyor/setup.py +++ b/flepimop/gempyor_pkg/src/gempyor/setup.py @@ -28,7 +28,7 @@ def __init__( self, *, setup_name, - spatial_setup, + subpop_setup, nslots, ti, # time to start tf, # time to finish @@ -75,7 +75,7 @@ def __init__( self.first_sim_index = first_sim_index self.outcome_scenario = outcome_scenario - self.subpop_struct = spatial_setup + self.subpop_struct = subpop_setup self.n_days = (self.tf - self.ti).days + 1 # because we include s.ti and s.tf self.nnodes = self.subpop_struct.nnodes self.popnodes = self.subpop_struct.popnodes diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate.py b/flepimop/gempyor_pkg/src/gempyor/simulate.py index 102d6422c..12db2364a 100644 --- a/flepimop/gempyor_pkg/src/gempyor/simulate.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate.py @@ -16,7 +16,7 @@ # dt: float # nslots: overridden by the -n/--nslots script parameter # data_path: -# spatial_setup: +# subpop_setup: # geodata: # mobility: # @@ -98,8 +98,8 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with columns {spatial_setup::subpop_names} and {spatial_setup::popnodes} -# * {data_path}/{spatial_setup::mobility} +# * {data_path}/{subpop_setup::geodata} is a csv with columns {subpop_setup::subpop_names} and {subpop_setup::popnodes} +# * {data_path}/{subpop_setup::mobility} # # If {seeding::method} is PoissonDistributed # * {seeding::lambda_file} @@ -300,7 +300,7 @@ def simulate( config.clear() config.read(user=False) config.set_file(config_file) - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = config["data_path"].get() spatial_base_path = pathlib.Path(spatial_path_prefix + spatial_base_path) @@ -317,7 +317,7 @@ def simulate( nslots = config["nslots"].as_number() print(f"Simulations to be run: {nslots}") - spatial_setup = subpopulation_structure.SubpopulationStructure( + subpop_setup = subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() @@ -332,7 +332,7 @@ def simulate( s = setup.Setup( setup_name=config["name"].get() + "/" + str(npi_scenario) + "/", - spatial_setup=spatial_setup, + subpop_setup=subpop_setup, nslots=nslots, npi_scenario=npi_scenario, npi_config_seir=config["interventions"]["settings"][npi_scenario], @@ -374,7 +374,7 @@ def simulate( s = setup.Setup( setup_name=config["name"].get() + "/" + str(scenarios_outcomes) + "/", - spatial_setup=spatial_setup, + subpop_setup=subpop_setup, nslots=nslots, outcomes_config=config["outcomes"], outcomes_scenario=scenario_outcomes, diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py b/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py index 41f8f4c75..789d2634d 100755 --- a/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py @@ -185,7 +185,7 @@ def simulate( config.clear() config.read(user=False) config.set_file(config_file) - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = config["data_path"].get() spatial_base_path = pathlib.Path(spatial_path_prefix + spatial_base_path) @@ -197,7 +197,7 @@ def simulate( nslots = config["nslots"].as_number() print(f"Simulations to be run: {nslots}") - spatial_setup = subpopulation_structure.SubpopulationStructure( + subpop_setup = subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() @@ -218,7 +218,7 @@ def simulate( raise ValueError(f"in_prefix must be provided") s = setup.Setup( setup_name=config["name"].get() + "/" + str(scenarios_outcomes) + "/", - spatial_setup=spatial_setup, + subpop_setup=subpop_setup, nslots=nslots, outcomes_config=config["outcomes"], outcomes_scenario=scenario_outcomes, diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py b/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py index 5995bde77..9ad0f022c 100755 --- a/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py @@ -16,7 +16,7 @@ # dt: float # nslots: overridden by the -n/--nslots script parameter # data_path: -# spatial_setup: +# subpop_setup: # geodata: # mobility: # @@ -98,8 +98,8 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with columns {spatial_setup::subpop_names} and {spatial_setup::popnodes} -# * {data_path}/{spatial_setup::mobility} +# * {data_path}/{subpop_setup::geodata} is a csv with columns {subpop_setup::subpop_names} and {subpop_setup::popnodes} +# * {data_path}/{subpop_setup::mobility} # # If {seeding::method} is PoissonDistributed # * {seeding::lambda_file} @@ -238,7 +238,7 @@ def simulate( config.clear() config.read(user=False) config.set_file(config_file) - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = config["data_path"].get() spatial_base_path = pathlib.Path(spatial_path_prefix + spatial_base_path) @@ -249,7 +249,7 @@ def simulate( if not nslots: nslots = config["nslots"].as_number() - spatial_setup = subpopulation_structure.SubpopulationStructure( + subpop_setup = subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get() @@ -264,7 +264,7 @@ def simulate( s = setup.Setup( setup_name=config["name"].get() + "/" + str(npi_scenario) + "/", - spatial_setup=spatial_setup, + subpop_setup=subpop_setup, nslots=nslots, npi_scenario=npi_scenario, npi_config_seir=config["interventions"]["settings"][npi_scenario], diff --git a/flepimop/gempyor_pkg/tests/seir/test_compartments.py b/flepimop/gempyor_pkg/tests/seir/test_compartments.py index 4a2f86d61..f8af0e046 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_compartments.py +++ b/flepimop/gempyor_pkg/tests/seir/test_compartments.py @@ -75,7 +75,7 @@ def test_Setup_has_compartments_component(): s = setup.Setup( setup_name="test_values", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -96,7 +96,7 @@ def test_Setup_has_compartments_component(): s = setup.Setup( setup_name="test_values", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], diff --git a/flepimop/gempyor_pkg/tests/seir/test_new_seir.py b/flepimop/gempyor_pkg/tests/seir/test_new_seir.py index f6880b71a..312a1d0c1 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_new_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_new_seir.py @@ -29,7 +29,7 @@ def test_constant_population(): s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], diff --git a/flepimop/gempyor_pkg/tests/seir/test_parameters.py b/flepimop/gempyor_pkg/tests/seir/test_parameters.py index c10ce34bd..c8de02fcb 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_parameters.py +++ b/flepimop/gempyor_pkg/tests/seir/test_parameters.py @@ -36,7 +36,7 @@ def test_parameters_from_config_plus_read_write(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -103,7 +103,7 @@ def test_parameters_quick_draw_old(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -175,7 +175,7 @@ def test_parameters_from_timeserie_file(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], diff --git a/flepimop/gempyor_pkg/tests/seir/test_seir.py b/flepimop/gempyor_pkg/tests/seir/test_seir.py index c3bf7c1c8..92f23e967 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seir.py @@ -30,7 +30,7 @@ def test_check_values(): s = setup.Setup( setup_name="test_values", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -86,7 +86,7 @@ def test_constant_population_legacy_integration(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -162,7 +162,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -248,7 +248,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -317,7 +317,7 @@ def test_steps_SEIR_no_spread(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="None", npi_config_seir=config["interventions"]["settings"]["None"], @@ -401,11 +401,11 @@ def test_continuation_resume(): prefix = "" stoch_traj_flag = True - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=subpopulation_structure.SubpopulationStructure( + subpop_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -451,11 +451,11 @@ def test_continuation_resume(): prefix = "" stoch_traj_flag = True - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=subpopulation_structure.SubpopulationStructure( + subpop_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -519,11 +519,11 @@ def test_inference_resume(): prefix = "" stoch_traj_flag = True - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=subpopulation_structure.SubpopulationStructure( + subpop_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -564,11 +564,11 @@ def test_inference_resume(): prefix = "" stoch_traj_flag = True - spatial_config = config["spatial_setup"] + spatial_config = config["subpop_setup"] spatial_base_path = pathlib.Path(config["data_path"].get()) s = setup.Setup( setup_name=config["name"].get() + "_" + str(npi_scenario), - spatial_setup=subpopulation_structure.SubpopulationStructure( + subpop_setup=subpopulation_structure.SubpopulationStructure( setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), @@ -629,7 +629,7 @@ def test_parallel_compartments_with_vacc(): prefix = "" s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="Scenario_vacc", npi_config_seir=config["interventions"]["settings"]["Scenario_vacc"], @@ -724,7 +724,7 @@ def test_parallel_compartments_no_vacc(): s = setup.Setup( setup_name="test_seir", - spatial_setup=ss, + subpop_setup=ss, nslots=1, npi_scenario="Scenario_novacc", npi_config_seir=config["interventions"]["settings"]["Scenario_novacc"], diff --git a/postprocessing/postprocess_auto.py b/postprocessing/postprocess_auto.py index 39c2c7afa..f755e8e02 100644 --- a/postprocessing/postprocess_auto.py +++ b/postprocessing/postprocess_auto.py @@ -182,7 +182,7 @@ def generate_pdf(config_path, run_id, job_name, fs_results_path, slack_token, sl npi_scenario="inference", # NPIs scenario to use outcome_scenario="med", # Outcome scenario to use stoch_traj_flag=False, - spatial_path_prefix="./", # prefix where to find the folder indicated in spatial_setup$ + spatial_path_prefix="./", # prefix where to find the folder indicated in subpop_setup$ ) run_info.folder_path = f"{fs_results_path}/model_output" From 8317d8083cdf0ed8d90f94e8dd861fb725b682b5 Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 14 Sep 2023 17:37:31 +0200 Subject: [PATCH 5/8] test configs --- .../R_packages/config.writer/tests/testthat/sample_config.yml | 2 +- flepimop/gempyor_pkg/tests/npi/config_npi.yml | 2 +- .../gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml | 2 +- flepimop/gempyor_pkg/tests/outcomes/config.yml | 2 +- flepimop/gempyor_pkg/tests/outcomes/config_load.yml | 2 +- flepimop/gempyor_pkg/tests/outcomes/config_load_subclasses.yml | 2 +- flepimop/gempyor_pkg/tests/outcomes/config_mc_selection.yml | 2 +- flepimop/gempyor_pkg/tests/outcomes/config_npi.yml | 2 +- .../gempyor_pkg/tests/outcomes/config_npi_custom_pnames.yml | 2 +- flepimop/gempyor_pkg/tests/outcomes/config_subclasses.yml | 2 +- flepimop/gempyor_pkg/tests/seir/data/config.yml | 2 +- .../tests/seir/data/config_compartmental_model_format.yml | 2 +- .../data/config_compartmental_model_format_with_covariates.yml | 2 +- .../tests/seir/data/config_compartmental_model_full.yml | 2 +- .../gempyor_pkg/tests/seir/data/config_continuation_resume.yml | 2 +- .../gempyor_pkg/tests/seir/data/config_inference_resume.yml | 2 +- flepimop/gempyor_pkg/tests/seir/data/config_parallel.yml | 2 +- flepimop/gempyor_pkg/tests/seir/data/config_resume.yml | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/flepimop/R_packages/config.writer/tests/testthat/sample_config.yml b/flepimop/R_packages/config.writer/tests/testthat/sample_config.yml index 5cc7782d3..5885d32d4 100644 --- a/flepimop/R_packages/config.writer/tests/testthat/sample_config.yml +++ b/flepimop/R_packages/config.writer/tests/testthat/sample_config.yml @@ -6,7 +6,7 @@ data_path: data nslots: 300 dt: 0.25 -spatial_setup: +subpop_setup: census_year: 2019 modeled_states: - AL diff --git a/flepimop/gempyor_pkg/tests/npi/config_npi.yml b/flepimop/gempyor_pkg/tests/npi/config_npi.yml index 20909f1e9..fc810811a 100644 --- a/flepimop/gempyor_pkg/tests/npi/config_npi.yml +++ b/flepimop/gempyor_pkg/tests/npi/config_npi.yml @@ -13,7 +13,7 @@ compartments: variant_type: ["WILD", "ALPHA", "DELTA", "OMICRON"] age_strata: ["age0to17", "age18to64", "age65to100"] -spatial_setup: +subpop_setup: census_year: 2019 modeled_states: - AK diff --git a/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml b/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml index cceac5597..3505f3361 100644 --- a/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml +++ b/flepimop/gempyor_pkg/tests/npi/config_test_spatial_group_npi.yml @@ -13,7 +13,7 @@ compartments: variant_type: ["WILD", "ALPHA", "DELTA", "OMICRON"] age_strata: ["age0to17", "age18to64", "age65to100"] -spatial_setup: +subpop_setup: geodata: geodata_2019_statelevel.csv mobility: mobility_2011-2015_statelevel.csv include_in_report: include_in_report diff --git a/flepimop/gempyor_pkg/tests/outcomes/config.yml b/flepimop/gempyor_pkg/tests/outcomes/config.yml index 72cf3b3a9..406ddbb91 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/outcomes/config_load.yml b/flepimop/gempyor_pkg/tests/outcomes/config_load.yml index cdd0d15fb..a5b27b6aa 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config_load.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config_load.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/outcomes/config_load_subclasses.yml b/flepimop/gempyor_pkg/tests/outcomes/config_load_subclasses.yml index 5b72523e0..949a4184d 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config_load_subclasses.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config_load_subclasses.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/outcomes/config_mc_selection.yml b/flepimop/gempyor_pkg/tests/outcomes/config_mc_selection.yml index 5a8d5b949..d853c8d64 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config_mc_selection.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config_mc_selection.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/outcomes/config_npi.yml b/flepimop/gempyor_pkg/tests/outcomes/config_npi.yml index c6bfcc830..7cbd16e15 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config_npi.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config_npi.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/outcomes/config_npi_custom_pnames.yml b/flepimop/gempyor_pkg/tests/outcomes/config_npi_custom_pnames.yml index ffbae9ca3..c0dd84adb 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config_npi_custom_pnames.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config_npi_custom_pnames.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/outcomes/config_subclasses.yml b/flepimop/gempyor_pkg/tests/outcomes/config_subclasses.yml index 81abe0ba0..ff4e340b2 100644 --- a/flepimop/gempyor_pkg/tests/outcomes/config_subclasses.yml +++ b/flepimop/gempyor_pkg/tests/outcomes/config_subclasses.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.csv census_year: 2018 diff --git a/flepimop/gempyor_pkg/tests/seir/data/config.yml b/flepimop/gempyor_pkg/tests/seir/data/config.yml index 6c4cb47fd..95c223eed 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config.yml @@ -6,7 +6,7 @@ data_path: data nslots: 15 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.txt diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format.yml b/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format.yml index 932fa382a..cd27179db 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format.yml @@ -5,7 +5,7 @@ end_date: 2020-02-15 data_path: data nslots: 15 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.txt diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format_with_covariates.yml b/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format_with_covariates.yml index 9afd97a54..79363980b 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format_with_covariates.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_format_with_covariates.yml @@ -5,7 +5,7 @@ end_date: 2020-02-15 data_path: data nslots: 15 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.txt diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_full.yml b/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_full.yml index b884d8a54..d350230d2 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_full.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_compartmental_model_full.yml @@ -5,7 +5,7 @@ end_date: 2020-05-31 data_path: data nslots: 15 -spatial_setup: +subpop_setup: geodata: geodata.csv mobility: mobility.txt diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_continuation_resume.yml b/flepimop/gempyor_pkg/tests/seir/data/config_continuation_resume.yml index 3dea2721c..883e899a6 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_continuation_resume.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_continuation_resume.yml @@ -5,7 +5,7 @@ end_date: 2020-05-31 data_path: data nslots: 15 -spatial_setup: +subpop_setup: base_path: data geodata: geodata.csv mobility: mobility.txt diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_inference_resume.yml b/flepimop/gempyor_pkg/tests/seir/data/config_inference_resume.yml index e11cdf53e..29ef00af5 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_inference_resume.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_inference_resume.yml @@ -5,7 +5,7 @@ end_date: 2020-05-31 data_path: data nslots: 15 -spatial_setup: +subpop_setup: base_path: data geodata: geodata.csv mobility: mobility.txt diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_parallel.yml b/flepimop/gempyor_pkg/tests/seir/data/config_parallel.yml index c496f2cba..260acd78d 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_parallel.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_parallel.yml @@ -5,7 +5,7 @@ end_date: 2020-05-15 data_path: data nslots: 1 -spatial_setup: +subpop_setup: base_path: data geodata: geodata.csv mobility: mobility.csv diff --git a/flepimop/gempyor_pkg/tests/seir/data/config_resume.yml b/flepimop/gempyor_pkg/tests/seir/data/config_resume.yml index ed111ed0e..67d05a713 100644 --- a/flepimop/gempyor_pkg/tests/seir/data/config_resume.yml +++ b/flepimop/gempyor_pkg/tests/seir/data/config_resume.yml @@ -6,7 +6,7 @@ data_path: data nslots: 15 -spatial_setup: +subpop_setup: base_path: data geodata: geodata.csv mobility: mobility.txt From 1174f921bbdf4c32bde3a71716a438af9d8c748a Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 14 Sep 2023 17:40:23 +0200 Subject: [PATCH 6/8] more spatial to subpop --- datasetup/build_US_setup.R | 34 +++--- datasetup/build_covid_data.R | 14 +-- datasetup/build_flu_data.R | 6 +- datasetup/build_nonUS_setup.R | 8 +- .../R_packages/config.writer/R/yaml_utils.R | 34 +++--- .../flepicommon/R/config_test_new.R | 20 ++-- .../inference/R/inference_to_forecast.R | 2 +- .../inference/archive/InferenceTest.R | 8 +- flepimop/main_scripts/create_seeding.R | 20 ++-- flepimop/main_scripts/create_seeding_added.R | 20 ++-- flepimop/main_scripts/inference_slot.R | 6 +- .../main_scripts/seir_init_immuneladder.R | 4 +- postprocessing/postprocess_snapshot.R | 102 +++++++++--------- postprocessing/processing_diagnostics.R | 2 +- postprocessing/processing_diagnostics_AWS.R | 2 +- postprocessing/processing_diagnostics_SLURM.R | 2 +- .../run_sim_processing_FluSightExample.R | 2 +- postprocessing/run_sim_processing_SLURM.R | 2 +- postprocessing/run_sim_processing_TEMPLATE.R | 2 +- .../seir_init_immuneladder_r17phase3.R | 4 +- .../seir_init_immuneladder_r17phase3_preOm.R | 4 +- ...nit_immuneladder_r17phase3_preOm_noDelta.R | 4 +- 22 files changed, 151 insertions(+), 151 deletions(-) diff --git a/datasetup/build_US_setup.R b/datasetup/build_US_setup.R index d15befb14..2943a3b2a 100644 --- a/datasetup/build_US_setup.R +++ b/datasetup/build_US_setup.R @@ -8,7 +8,7 @@ # # ```yaml # data_path: -# spatial_setup: +# subpop_setup: # modeled_states: e.g. MD, CA, NY # mobility: optional; default is 'mobility.csv' # geodata: optional; default is 'geodata.csv' @@ -23,8 +23,8 @@ # # ## Output Data # -# * {data_path}/{spatial_setup::mobility} -# * {data_path}/{spatial_setup::geodata} +# * {data_path}/{subpop_setup::mobility} +# * {data_path}/{subpop_setup::geodata} # ## @cond @@ -52,11 +52,11 @@ if (length(config) == 0) { } outdir <- config$data_path -filterUSPS <- config$spatial_setup$modeled_states +filterUSPS <- config$subpop_setup$modeled_states dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Aggregation to state level if in config -state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) +state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # commute_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","commute_data.gz.parquet")) @@ -80,7 +80,7 @@ tidycensus::census_api_key(key = census_key) census_data <- tidycensus::get_acs(geography="county", state=filterUSPS, - variables="B01003_001", year=config$spatial_setup$census_year, + variables="B01003_001", year=config$subpop_setup$census_year, keep_geo_vars=TRUE, geometry=FALSE, show_call=TRUE) census_data <- census_data %>% dplyr::rename(population=estimate, subpop=GEOID) %>% @@ -137,12 +137,12 @@ if (state_level){ census_data <- census_data %>% dplyr::arrange(population) -if (!is.null(config$spatial_setup$popnodes)) { - names(census_data)[names(census_data) == "population"] <- config$spatial_setup$popnodes +if (!is.null(config$subpop_setup$popnodes)) { + names(census_data)[names(census_data) == "population"] <- config$subpop_setup$popnodes } -if (length(config$spatial_setup$geodata) > 0) { - geodata_file <- config$spatial_setup$geodata +if (length(config$subpop_setup$geodata) > 0) { + geodata_file <- config$subpop_setup$geodata } else { geodata_file <- 'geodata.csv' } @@ -155,13 +155,13 @@ print(paste("Wrote geodata file:", file.path(outdir, geodata_file))) # MOBILITY DATA (COMMUTER DATA) ------------------------------------------------------------ -if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup$mobility))){ +if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$mobility))){ - warning(paste("State-level mobility files must be created manually because `build_US_setup.R` does not generate a state-level mobility file automatically. No valid mobility file named", paste0(config$data_path, "/", config$spatial_setup$mobility), "(specified in the config) currently exists. Please check again.")) + warning(paste("State-level mobility files must be created manually because `build_US_setup.R` does not generate a state-level mobility file automatically. No valid mobility file named", paste0(config$data_path, "/", config$subpop_setup$mobility), "(specified in the config) currently exists. Please check again.")) -} else if(state_level & file.exists(paste0(config$data_path, "/", config$spatial_setup$mobility))){ +} else if(state_level & file.exists(paste0(config$data_path, "/", config$subpop_setup$mobility))){ - warning(paste("Using existing state-level mobility file named", paste0(config$data_path, "/", config$spatial_setup$mobility))) + warning(paste("Using existing state-level mobility file named", paste0(config$data_path, "/", config$subpop_setup$mobility))) } else{ @@ -176,8 +176,8 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup if(opt$w){ mobility_file <- 'mobility.txt' - } else if (length(config$spatial_setup$mobility) > 0) { - mobility_file <- config$spatial_setup$mobility + } else if (length(config$subpop_setup$mobility) > 0) { + mobility_file <- config$subpop_setup$mobility } else { mobility_file <- 'mobility.csv' } @@ -210,7 +210,7 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup write.csv(file = file.path(outdir, mobility_file), rc, row.names=FALSE) } else { - stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's spatial_setup::mobility.") + stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's subpop_setup::mobility.") } print(paste("Wrote mobility file:", file.path(outdir, mobility_file))) diff --git a/datasetup/build_covid_data.R b/datasetup/build_covid_data.R index d44d8b1bd..d21a75001 100644 --- a/datasetup/build_covid_data.R +++ b/datasetup/build_covid_data.R @@ -31,11 +31,11 @@ if (exists("config$inference$gt_source")) { } outdir <- config$data_path -filterUSPS <- config$spatial_setup$modeled_states +filterUSPS <- config$subpop_setup$modeled_states dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Aggregation to state level if in config -state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) +state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) @@ -218,7 +218,7 @@ if (any(grepl("fluview", opt$gt_data_source))){ max(fluview_data$Update) - census_data <- read_csv(file = file.path(config$data_path, config$spatial_setup$geodata)) + census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata)) fluview_data <- fluview_data %>% dplyr::inner_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>% dplyr::select(Update, source, FIPS, incidD) @@ -235,7 +235,7 @@ if (any(grepl("fluview", opt$gt_data_source))){ fluview_data <- make_daily_data(data = fluview_data, current_timescale = "week") #%>% # mutate(gt_source = "nchs") # fluview_data <- fluview_data %>% - # filter(source %in% config$spatial_setup$modeled_states) + # filter(source %in% config$subpop_setup$modeled_states) # Update >= config$start_date, # Update <= config$end_date_groundtruth) gt_data <- append(gt_data, list(fluview_data)) @@ -283,7 +283,7 @@ if (any(grepl("fluview", opt$gt_data_source))){ # # max(fluview_data$Update) # -# census_data <- read_csv(file = file.path(config$data_path, config$spatial_setup$geodata)) +# census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata)) # fluview_data <- fluview_data %>% # left_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>% # dplyr::select(Update, source, FIPS, incidD) @@ -300,7 +300,7 @@ if (any(grepl("fluview", opt$gt_data_source))){ # fluview_data <- make_daily_data(data = fluview_data, current_timescale = "week") #%>% # # mutate(gt_source = "nchs") # # fluview_data <- fluview_data %>% -# # filter(source %in% config$spatial_setup$modeled_states) +# # filter(source %in% config$subpop_setup$modeled_states) # # Update >= config$start_date, # # Update <= config$end_date_groundtruth) # gt_data <- append(gt_data, list(fluview_data)) @@ -372,7 +372,7 @@ us_data <- us_data %>% filter(Update >= lubridate::as_date(config$start_date) & Update <= lubridate::as_date(end_date_)) # Filter to states we care about -locs <- config$spatial_setup$modeled_states +locs <- config$subpop_setup$modeled_states us_data <- us_data %>% filter(source %in% locs) %>% filter(!is.na(source)) %>% diff --git a/datasetup/build_flu_data.R b/datasetup/build_flu_data.R index f44ea0568..2a2529288 100644 --- a/datasetup/build_flu_data.R +++ b/datasetup/build_flu_data.R @@ -32,11 +32,11 @@ if (length(config) == 0) { } outdir <- config$data_path -filterUSPS <- config$spatial_setup$modeled_states +filterUSPS <- config$subpop_setup$modeled_states dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Aggregation to state level if in config -state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) +state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) dir.create(outdir, showWarnings = FALSE, recursive = TRUE) @@ -59,7 +59,7 @@ source("https://raw.githubusercontent.com/cdcepi/Flusight-forecast-data/master/d # Pull daily hospitalizations for model run us_data <- load_flu_hosp_data(temporal_resolution = 'daily', na.rm = TRUE) -locs <- read_csv(file.path(config$data_path, config$spatial_setup$geodata)) +locs <- read_csv(file.path(config$data_path, config$subpop_setup$geodata)) # fix string pad issue on left side us_data <- us_data %>% diff --git a/datasetup/build_nonUS_setup.R b/datasetup/build_nonUS_setup.R index 60926450d..4ba52c8b2 100644 --- a/datasetup/build_nonUS_setup.R +++ b/datasetup/build_nonUS_setup.R @@ -8,7 +8,7 @@ # # ```yaml # data_path: -# spatial_setup: +# subpop_setup: # modeled_states: e.g. ZMB, BGD, CAN # mobility: optional; default is 'mobility.csv' # geodata: optional; default is 'geodata.csv' @@ -19,8 +19,8 @@ # # ## Output Data # -# * {data_path}/{spatial_setup::mobility} -# * {data_path}/{spatial_setup::geodata} +# * {data_path}/{subpop_setup::mobility} +# * {data_path}/{subpop_setup::geodata} # ## @cond @@ -42,7 +42,7 @@ if (length(config) == 0) { } outdir <- config$data_path -filterADMIN0 <- config$spatial_setup$modeled_states +filterADMIN0 <- config$subpop_setup$modeled_states dir.create(outdir, showWarnings = FALSE, recursive = TRUE) diff --git a/flepimop/R_packages/config.writer/R/yaml_utils.R b/flepimop/R_packages/config.writer/R/yaml_utils.R index 5fcd06ab9..4c2f3edfe 100644 --- a/flepimop/R_packages/config.writer/R/yaml_utils.R +++ b/flepimop/R_packages/config.writer/R/yaml_utils.R @@ -88,12 +88,12 @@ collapse_intervention<- function(dat){ dplyr::group_by(dplyr::across(-period)) %>% dplyr::summarize(period = paste0(period, collapse="\n ")) - if (!all(is.na(mtr$spatial_groups)) & !all(is.null(mtr$spatial_groups))) { + if (!all(is.na(mtr$subpop_groups)) & !all(is.null(mtr$subpop_groups))) { mtr <- mtr %>% dplyr::group_by(dplyr::across(-subpop)) %>% dplyr::summarize(subpop = paste0(subpop, collapse='", "'), - spatial_groups = paste0(spatial_groups, collapse='", "')) %>% + subpop_groups = paste0(subpop_groups, collapse='", "')) %>% dplyr::mutate(period = paste0(" ", period)) } else { @@ -104,7 +104,7 @@ collapse_intervention<- function(dat){ } reduce <- dat %>% - dplyr::select(USPS, subpop, contains("spatial_groups"), start_date, end_date, name, template, type, category, parameter, baseline_scenario, starts_with("value_"), starts_with("pert_")) %>% + dplyr::select(USPS, subpop, contains("subpop_groups"), start_date, end_date, name, template, type, category, parameter, baseline_scenario, starts_with("value_"), starts_with("pert_")) %>% dplyr::filter(template %in% c("SinglePeriodModifier", "ModifierModifier")) %>% dplyr::mutate(end_date=paste0("period_end_date: ", end_date), start_date=paste0("period_start_date: ", start_date)) %>% @@ -150,9 +150,9 @@ yaml_mtr_template <- function(dat){ " groups:\n", ' - subpop: "all"\n' )) - if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){ + if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){ cat(paste0( - ' spatial_groups: "all"\n')) + ' subpop_groups: "all"\n')) } for(j in 1:nrow(dat)){ @@ -174,9 +174,9 @@ yaml_mtr_template <- function(dat){ cat(paste0( ' - subpop: ["', dat$subpop[j], '"]\n')) - if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){ + if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){ cat(paste0( - ' spatial_groups: ["', dat$spatial_groups[j], '"]\n')) + ' subpop_groups: ["', dat$subpop_groups[j], '"]\n')) } cat(paste0( ' periods:\n', @@ -376,12 +376,12 @@ yaml_reduce_template<- function(dat){ } else { paste0(' subpop: ["', dat$subpop, '"]\n') }, - if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){ - if(all(dat$spatial_groups == "all")){ - ' spatial_groups: "all"\n' + if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){ + if(all(dat$subpop_groups == "all")){ + ' subpop_groups: "all"\n' } else { - paste0(' spatial_groups: \n', - paste(sapply(X=dat$spatial_groups, function(x = X) paste0(' - ["', paste(x, collapse = '", "'), '"]\n')), collapse = "")) + paste0(' subpop_groups: \n', + paste(sapply(X=dat$subpop_groups, function(x = X) paste0(' - ["', paste(x, collapse = '", "'), '"]\n')), collapse = "")) } }, dat$period, @@ -527,7 +527,7 @@ yaml_stack2 <- function (dat, scenario = "Inference", stack = TRUE){ #' Print Header Section -#' @description Prints the global options and the spatial setup section of the configuration files. These typically sit at the top of the configuration file. +#' @description Prints the global options and the subpop setup section of the configuration files. These typically sit at the top of the configuration file. #' #' @param sim_name name of simulation, typically named after the region/location you are modeling #' @param setup_name # SMH, FCH @@ -540,7 +540,7 @@ yaml_stack2 <- function (dat, scenario = "Inference", stack = TRUE){ #' @param nslots number of simulations to run #' @param model_output_dirname #' @param start_date_groundtruth -#' @param setup_name spatial folder name +#' @param setup_name subpop folder name #' #' @return #' @export @@ -582,7 +582,7 @@ print_header <- function ( #' Print Header Section -#' @description Prints the global options and the spatial setup section of the configuration files. These typically sit at the top of the configuration file. +#' @description Prints the global options and the subpop setup section of the configuration files. These typically sit at the top of the configuration file. #' #' @param census_year integer(year) #' @param modeled_states vector of sub-populations (i.e., locations) that will be modeled. This can be different from the subpop IDs. For the US, state abbreviations are often used. This component is only used for filtering the data to the set of populations. @@ -597,7 +597,7 @@ print_header <- function ( #' #' @examples #' -print_spatial_setup <- function ( +print_subpop_setup <- function ( census_year = 2019, modeled_states = NULL, geodata_file = "geodata.csv", @@ -605,7 +605,7 @@ print_spatial_setup <- function ( state_level = TRUE) { cat( - paste0("spatial_setup:\n", + paste0("subpop_setup:\n", " census_year: ", census_year, "\n"), ifelse(!is.null(modeled_states), paste0(" modeled_states:\n", diff --git a/flepimop/R_packages/flepicommon/R/config_test_new.R b/flepimop/R_packages/flepicommon/R/config_test_new.R index e6c71d0ef..26e55eeb1 100644 --- a/flepimop/R_packages/flepicommon/R/config_test_new.R +++ b/flepimop/R_packages/flepicommon/R/config_test_new.R @@ -78,14 +78,14 @@ validation_list$nslots<- function(value,full_config,config_name){ } return(TRUE) } -#### SPATIAL SETUP PART +#### subpop SETUP PART ##Checking if the following values are present or not. ##If they do not have an assigned default value then the execution will be stopped. ##If they have a default then A statement will be printed and test will continue ## No Default: Base Path, Modeled States, Year. subpop ## With Default: Geodata, Mobility, Popnodes, Statelevel -validation_list$spatial_setup <- list() +validation_list$subpop_setup <- list() validation_list$data_path <- function(value, full_config, config_name) { if (is.null(value)) { print("No base path mentioned in the configuration file") @@ -98,7 +98,7 @@ validation_list$data_path <- function(value, full_config, config_name) { return(TRUE) } -validation_list$spatial_setup$modeled_states <- function(value, full_config,config_name) { +validation_list$subpop_setup$modeled_states <- function(value, full_config,config_name) { if(length(value)==0){ print("No state mentioned in the configuration file") return(FALSE) @@ -117,7 +117,7 @@ validation_list$spatial_setup$modeled_states <- function(value, full_config,conf return(TRUE) } -validation_list$spatial_setup$geodata <- function(value, full_config,config_name) { +validation_list$subpop_setup$geodata <- function(value, full_config,config_name) { if (is.null(value)) { print("No geodata path mentioned in the configuration file") return(FALSE) @@ -131,7 +131,7 @@ validation_list$spatial_setup$geodata <- function(value, full_config,config_name return(TRUE) } -validation_list$spatial_setup$mobility <- function(value, full_config,config_name) { +validation_list$subpop_setup$mobility <- function(value, full_config,config_name) { if (is.null(value)) { print("No mobility path mentioned in the configuration file") return(FALSE) @@ -145,7 +145,7 @@ validation_list$spatial_setup$mobility <- function(value, full_config,config_nam return(TRUE) } -validation_list$spatial_setup$census_year <- function(value, full_config,config_name) { +validation_list$subpop_setup$census_year <- function(value, full_config,config_name) { if (is.null(value)) { print("No year mentioned") return(FALSE) @@ -153,7 +153,7 @@ validation_list$spatial_setup$census_year <- function(value, full_config,config_ return(TRUE) } -validation_list$spatial_setup$subpop <- function(value, full_config,config_name) { +validation_list$subpop_setup$subpop <- function(value, full_config,config_name) { if (is.null(value)) { print("No subpops mentioned") #Should display a better error message than subpop. return(FALSE) @@ -161,7 +161,7 @@ validation_list$spatial_setup$subpop <- function(value, full_config,config_name) return(TRUE) } -validation_list$spatial_setup$popnodes <- function(value, full_config,config_name) { +validation_list$subpop_setup$popnodes <- function(value, full_config,config_name) { if (is.null(value)) { print("No Population Nodes mentioned") #Should display a better error message than subpop. return(FALSE) @@ -170,7 +170,7 @@ validation_list$spatial_setup$popnodes <- function(value, full_config,config_nam } #SINCE NOT NECESSARY written to remove warning -validation_list$spatial_setup$include_in_report <- function(value, full_config,config_name) { +validation_list$subpop_setup$include_in_report <- function(value, full_config,config_name) { return(TRUE) } @@ -186,7 +186,7 @@ validation_list$setup_name <- function(value, full_config,config_name) { return(TRUE) } -validation_list$spatial_setup$state_level <- function(value, full_config,config_name) { +validation_list$subpop_setup$state_level <- function(value, full_config,config_name) { if (is.null(value)) { print("No specifications about state level runs mentioned mentioned") return(FALSE) diff --git a/flepimop/R_packages/inference/R/inference_to_forecast.R b/flepimop/R_packages/inference/R/inference_to_forecast.R index 4ede21d29..8de70504d 100644 --- a/flepimop/R_packages/inference/R/inference_to_forecast.R +++ b/flepimop/R_packages/inference/R/inference_to_forecast.R @@ -1,7 +1,7 @@ ##' Functoin that takes the results of an inference run, a date ##' and a set of cumulative numbers and and results from the simulation. ##' -##' @param sim_data data from the simse to use. already aggregated to correct spatial scale +##' @param sim_data data from the simse to use. already aggregated to correct subpop scale ##' @param start_date the date to start from ##' @param cum_dat the cumulative data on start date. should have a column to join to sta on and cumDeaths ##' @param loc_column whihc column defines location diff --git a/flepimop/R_packages/inference/archive/InferenceTest.R b/flepimop/R_packages/inference/archive/InferenceTest.R index 83e383aa6..b505456fc 100644 --- a/flepimop/R_packages/inference/archive/InferenceTest.R +++ b/flepimop/R_packages/inference/archive/InferenceTest.R @@ -30,8 +30,8 @@ single_loc_inference_test <- function(to_fit, cl <- parallel::makeCluster(ncores) registerDoSNOW(cl) - # Column name that stores spatial unique id - obs_subpop <- config$spatial_setup$subpop + # Column name that stores subpop unique id + obs_subpop <- config$subpop_setup$subpop # Set number of simulations iterations_per_slot <- config$inference$iterations_per_slot @@ -273,8 +273,8 @@ multi_loc_inference_test <- function(to_fit, registerDoSNOW(cl) N <- length(S0s) - # Column name that stores spatial unique id - obs_subpop <- config$spatial_setup$subpop + # Column name that stores subpop unique id + obs_subpop <- config$subpop_setup$subpop # Set number of simulations iterations_per_slot <- config$inference$iterations_per_slot diff --git a/flepimop/main_scripts/create_seeding.R b/flepimop/main_scripts/create_seeding.R index b52681044..ce6516c70 100644 --- a/flepimop/main_scripts/create_seeding.R +++ b/flepimop/main_scripts/create_seeding.R @@ -13,7 +13,7 @@ # end_date: # data_path: -# spatial_setup: +# subpop_setup: # geodata: # subpop: # @@ -24,7 +24,7 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with column {spatial_setup::subpop} that denotes the subpop +# * {data_path}/{subpop_setup::geodata} is a csv with column {subpop_setup::subpop} that denotes the subpop # # ## Output Data # @@ -57,14 +57,14 @@ if (length(config) == 0) { stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag") } -if (is.null(config$spatial_setup$us_model)) { - config$spatial_setup$us_model <- FALSE - if ("modeled_states" %in% names(config$spatial_setup)) { - config$spatial_setup$us_model <- TRUE +if (is.null(config$subpop_setup$us_model)) { + config$subpop_setup$us_model <- FALSE + if ("modeled_states" %in% names(config$subpop_setup)) { + config$subpop_setup$us_model <- TRUE } } -is_US_run <- config$spatial_setup$us_model +is_US_run <- config$subpop_setup$us_model seed_variants <- "variant_filename" %in% names(config$seeding) @@ -161,7 +161,7 @@ if ("date" %in% names(cases_deaths)) { cases_deaths$Update <- cases_deaths$date warning("Changing Update name in seeding. This is a hack") } -obs_subpop <- config$spatial_setup$subpop +obs_subpop <- config$subpop_setup$subpop required_column_names <- NULL check_required_names <- function(df, cols, msg) { @@ -266,13 +266,13 @@ all_times <- lubridate::ymd(config$start_date) + seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date)) geodata <- flepicommon::load_geodata_file( - file.path(config$data_path, config$spatial_setup$geodata), + file.path(config$data_path, config$subpop_setup$geodata), 5, "0", TRUE ) -all_subpop <- geodata[[config$spatial_setup$subpop]] +all_subpop <- geodata[[config$subpop_setup$subpop]] diff --git a/flepimop/main_scripts/create_seeding_added.R b/flepimop/main_scripts/create_seeding_added.R index ccfee8f89..d9ca6403a 100644 --- a/flepimop/main_scripts/create_seeding_added.R +++ b/flepimop/main_scripts/create_seeding_added.R @@ -13,7 +13,7 @@ # end_date: # data_path: -# spatial_setup: +# subpop_setup: # geodata: # subpop: # @@ -24,7 +24,7 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with column {spatial_setup::subpop} that denotes the subpop +# * {data_path}/{subpop_setup::geodata} is a csv with column {subpop_setup::subpop} that denotes the subpop # # ## Output Data # @@ -55,14 +55,14 @@ if (length(config) == 0) { stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag") } -if (is.null(config$spatial_setup$us_model)) { - config$spatial_setup$us_model <- FALSE - if ("modeled_states" %in% names(config$spatial_setup)) { - config$spatial_setup$us_model <- TRUE +if (is.null(config$subpop_setup$us_model)) { + config$subpop_setup$us_model <- FALSE + if ("modeled_states" %in% names(config$subpop_setup)) { + config$subpop_setup$us_model <- TRUE } } -is_US_run <- config$spatial_setup$us_model +is_US_run <- config$subpop_setup$us_model seed_variants <- "variant_filename" %in% names(config$seeding) @@ -159,7 +159,7 @@ if ("date" %in% names(cases_deaths)) { cases_deaths$Update <- cases_deaths$date warning("Changing Update name in seeding. This is a hack") } -obs_subpop <- config$spatial_setup$subpop +obs_subpop <- config$subpop_setup$subpop required_column_names <- NULL check_required_names <- function(df, cols, msg) { @@ -264,13 +264,13 @@ all_times <- lubridate::ymd(config$start_date) + seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date)) geodata <- flepicommon::load_geodata_file( - file.path(config$data_path, config$spatial_setup$geodata), + file.path(config$data_path, config$subpop_setup$geodata), 5, "0", TRUE ) -all_subpop <- geodata[[config$spatial_setup$subpop]] +all_subpop <- geodata[[config$subpop_setup$subpop]] diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index 665c791d8..703422f78 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -104,7 +104,7 @@ if (!is.null(config$initial_conditions)){ #} # Aggregation to state level if in config -state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) +state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) ##Load information on geographic locations from geodata file. @@ -112,12 +112,12 @@ suppressMessages( geodata <- flepicommon::load_geodata_file( paste( config$data_path, - config$spatial_setup$geodata, sep = "/" + config$subpop_setup$geodata, sep = "/" ), subpop_len = opt$subpop_len ) ) -obs_subpop <- config$spatial_setup$subpop +obs_subpop <- config$subpop_setup$subpop ##Load simulations per slot from config if not defined on command line ##command options take precedence diff --git a/flepimop/main_scripts/seir_init_immuneladder.R b/flepimop/main_scripts/seir_init_immuneladder.R index a7c2d3e84..70af508f2 100644 --- a/flepimop/main_scripts/seir_init_immuneladder.R +++ b/flepimop/main_scripts/seir_init_immuneladder.R @@ -13,7 +13,7 @@ # end_date: # data_path: -# spatial_setup: +# subpop_setup: # geodata: # subpop: # @@ -24,7 +24,7 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with column {spatial_setup::subpop} that denotes the subpop +# * {data_path}/{subpop_setup::geodata} is a csv with column {subpop_setup::subpop} that denotes the subpop # # ## Output Data # diff --git a/postprocessing/postprocess_snapshot.R b/postprocessing/postprocess_snapshot.R index e3b4d8a24..f60ee37df 100644 --- a/postprocessing/postprocess_snapshot.R +++ b/postprocessing/postprocess_snapshot.R @@ -60,7 +60,7 @@ print(opt$select_outputs) config <- flepicommon::load_config(opt$config) # Pull in subpop data -geodata <- setDT(read.csv(file.path(config$data_path, config$spatial_setup$geodata))) +geodata <- setDT(read.csv(file.path(config$data_path, config$subpop_setup$geodata))) ## gt_data MUST exist directly after a run gt_data <- data.table::fread(config$inference$gt_data_path) %>% @@ -77,7 +77,7 @@ pdf.options(useDingbats = TRUE) import_model_outputs <- function(scn_dir, outcome, global_opt, final_opt, lim_hosp = c("date", sapply(1:length(names(config$inference$statistics)), function(i) purrr::flatten(config$inference$statistics[i])$sim_var), - config$spatial_setup$subpop)){ + config$subpop_setup$subpop)){ dir_ <- paste0(scn_dir, "/", outcome, "/", config$name, "/", @@ -145,7 +145,7 @@ print(end_time - start_time) if("hosp" %in% model_outputs){ gg_cols <- 8 - num_nodes <- length(unique(outputs_global$hosp %>% .[,get(config$spatial_setup$subpop)])) + num_nodes <- length(unique(outputs_global$hosp %>% .[,get(config$subpop_setup$subpop)])) pdf_dims <- data.frame(width = gg_cols*2, length = num_nodes/gg_cols * 2) fname <- paste0("pplot/hosp_mod_outputs_", opt$run_id,".pdf") @@ -154,31 +154,31 @@ if("hosp" %in% model_outputs){ for(i in 1:length(fit_stats)){ statistics <- purrr::flatten(config$inference$statistics[i]) - cols_sim <- c("date", statistics$sim_var, config$spatial_setup$subpop,"slot") - cols_data <- c("date", config$spatial_setup$subpop, statistics$data_var) + cols_sim <- c("date", statistics$sim_var, config$subpop_setup$subpop,"slot") + cols_data <- c("date", config$subpop_setup$subpop, statistics$data_var) ## summarize slots print(outputs_global$hosp %>% .[, ..cols_sim] %>% .[, date := lubridate::as_date(date)] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } %>% - .[, as.list(quantile(get(statistics$sim_var), c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$spatial_setup$subpop)] %>% + .[, as.list(quantile(get(statistics$sim_var), c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$subpop_setup$subpop)] %>% ggplot() + geom_ribbon(aes(x = date, ymin = V1, ymax = V5), alpha = 0.1) + geom_ribbon(aes(x = date, ymin = V2, ymax = V4), alpha = 0.1) + geom_line(aes(x = date, y = V3)) + geom_point(data = gt_data %>% .[, ..cols_data] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } , aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) + - facet_wrap(~get(config$spatial_setup$subpop), scales = 'free', ncol = gg_cols) + + facet_wrap(~get(config$subpop_setup$subpop), scales = 'free', ncol = gg_cols) + labs(x = 'date', y = fit_stats[i], title = statistics$sim_var) + theme_classic() ) @@ -187,7 +187,7 @@ if("hosp" %in% model_outputs){ # print(outputs_global$hosp %>% # ggplot() + # geom_line(aes(lubridate::as_date(date), get(sim_var), group = as.factor(slot)), alpha = 0.1) + - # facet_wrap(~get(config$spatial_setup$subpop), scales = 'free') + + # facet_wrap(~get(config$subpop_setup$subpop), scales = 'free') + # geom_point(data = gt_data %>% # .[, ..cols_data], # aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) + @@ -200,28 +200,28 @@ if("hosp" %in% model_outputs){ print(outputs_global$hosp %>% .[, ..cols_sim] %>% .[, date := lubridate::as_date(date)] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } %>% - .[, csum := cumsum(get(statistics$sim_var)), by = .(get(config$spatial_setup$subpop), slot)] %>% - .[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$spatial_setup$subpop)] %>% + .[, csum := cumsum(get(statistics$sim_var)), by = .(get(config$subpop_setup$subpop), slot)] %>% + .[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$subpop_setup$subpop)] %>% ggplot() + geom_ribbon(aes(x = date, ymin = V1, ymax = V5), alpha = 0.1) + geom_ribbon(aes(x = date, ymin = V2, ymax = V4), alpha = 0.1) + geom_line(aes(x = date, y = V3)) + geom_point(data = gt_data %>% .[, ..cols_data] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } %>% - .[, csum := cumsum(replace_na(get(statistics$data_var), 0)) , by = .(get(config$spatial_setup$subpop))] + .[, csum := cumsum(replace_na(get(statistics$data_var), 0)) , by = .(get(config$subpop_setup$subpop))] , aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) + - facet_wrap(~get(config$spatial_setup$subpop), scales = 'free', ncol = gg_cols) + + facet_wrap(~get(config$subpop_setup$subpop), scales = 'free', ncol = gg_cols) + labs(x = 'date', y = fit_stats[i], title = paste0("cumulative ", statistics$sim_var)) + theme_classic() ) @@ -238,31 +238,31 @@ if("hosp" %in% model_outputs){ for(i in 1:length(fit_stats)){ statistics <- purrr::flatten(config$inference$statistics[i]) - cols_sim <- c("date", statistics$sim_var, config$spatial_setup$subpop,"slot") - cols_data <- c("date", config$spatial_setup$subpop, statistics$data_var) + cols_sim <- c("date", statistics$sim_var, config$subpop_setup$subpop,"slot") + cols_data <- c("date", config$subpop_setup$subpop, statistics$data_var) if("llik" %in% model_outputs){ llik_rank <- copy(outputs_global$llik) %>% - .[, .SD[order(ll)], eval(config$spatial_setup$subpop)] - high_low_llik <- rbindlist(list(data.table(llik_rank, key = eval(config$spatial_setup$subpop)) %>% - .[, head(.SD,5), by = eval(config$spatial_setup$subpop)] %>% + .[, .SD[order(ll)], eval(config$subpop_setup$subpop)] + high_low_llik <- rbindlist(list(data.table(llik_rank, key = eval(config$subpop_setup$subpop)) %>% + .[, head(.SD,5), by = eval(config$subpop_setup$subpop)] %>% .[, llik_bin := "top"], - data.table(llik_rank, key = eval(config$spatial_setup$subpop)) %>% - .[, tail(.SD,5), by = eval(config$spatial_setup$subpop)]%>% + data.table(llik_rank, key = eval(config$subpop_setup$subpop)) %>% + .[, tail(.SD,5), by = eval(config$subpop_setup$subpop)]%>% .[, llik_bin := "bottom"]) ) high_low_hosp_llik <- copy(outputs_global$hosp) %>% - .[high_low_llik, on = c("slot", eval(config$spatial_setup$subpop))] + .[high_low_llik, on = c("slot", eval(config$subpop_setup$subpop))] - hosp_llik_plots <- lapply(unique(high_low_hosp_llik %>% .[, get(config$spatial_setup$subpop)]), + hosp_llik_plots <- lapply(unique(high_low_hosp_llik %>% .[, get(config$subpop_setup$subpop)]), function(e){ high_low_hosp_llik %>% .[, date := lubridate::as_date(date)] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - .[get(config$spatial_setup$subpop) == e] %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + .[get(config$subpop_setup$subpop) == e] %>% + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } %>% ggplot() + geom_line(aes(lubridate::as_date(date), get(statistics$data_var), @@ -271,14 +271,14 @@ if("hosp" %in% model_outputs){ scale_color_viridis_c(option = "D", name = "log\nlikelihood") + geom_point(data = gt_data %>% .[, ..cols_data] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - .[get(config$spatial_setup$subpop) == e] %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + .[get(config$subpop_setup$subpop) == e] %>% + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } , aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) + - facet_wrap(~get(config$spatial_setup$subpop), scales = 'free', ncol = gg_cols) + + facet_wrap(~get(config$subpop_setup$subpop), scales = 'free', ncol = gg_cols) + labs(x = 'date', y = fit_stats[i]) + #, title = paste0("top 5, bottom 5 lliks, ", statistics$sim_var)) + theme_classic() + guides(linetype = 'none') @@ -299,27 +299,27 @@ if("hosp" %in% model_outputs){ if("hnpi" %in% model_outputs){ gg_cols <- 4 - num_nodes <- length(unique(outputs_global$hosp %>% .[,get(config$spatial_setup$subpop)])) + num_nodes <- length(unique(outputs_global$hosp %>% .[,get(config$subpop_setup$subpop)])) pdf_dims <- data.frame(width = gg_cols*3, length = num_nodes/gg_cols * 2) fname <- paste0("pplot/hnpi_mod_outputs_", opt$run_id,".pdf") pdf(fname, width = pdf_dims$width, height = pdf_dims$length) - hnpi_plots <- lapply(sort(unique(outputs_global$hnpi %>% .[, get(config$spatial_setup$subpop)])), + hnpi_plots <- lapply(sort(unique(outputs_global$hnpi %>% .[, get(config$subpop_setup$subpop)])), function(i){ outputs_global$hnpi %>% - .[outputs_global$llik, on = c(config$spatial_setup$subpop, "slot")] %>% - { if(config$spatial_setup$subpop == 'subpop'){ + .[outputs_global$llik, on = c(config$subpop_setup$subpop, "slot")] %>% + { if(config$subpop_setup$subpop == 'subpop'){ .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} } %>% - .[get(config$spatial_setup$subpop) == i] %>% - { if(config$spatial_setup$subpop == 'subpop'){ .[, subpop := USPS]} + .[get(config$subpop_setup$subpop) == i] %>% + { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} } %>% ggplot(aes(npi_name,reduction)) + geom_violin() + geom_jitter(aes(group = npi_name, color = ll), size = 0.6, height = 0, width = 0.2, alpha = 1) + - facet_wrap(~get(config$spatial_setup$subpop), scales = 'free') + + facet_wrap(~get(config$subpop_setup$subpop), scales = 'free') + scale_color_viridis_c(option = "B", name = "log\nlikelihood") + theme_classic() } @@ -358,7 +358,7 @@ if("seed" %in% model_outputs){ ## TO DO: MODIFIED FOR WHEN LOTS MORE SEEDING COM tmp_ <- paste("+", destination_columns, collapse = "") facet_formula <- paste("~", substr(tmp_, 2, nchar(tmp_))) - seed_plots <- lapply(sort(unique(setDT(geodata) %>% .[, get(config$spatial_setup$subpop)])), + seed_plots <- lapply(sort(unique(setDT(geodata) %>% .[, get(config$subpop_setup$subpop)])), function(i){ outputs_global$seed %>% .[subpop == i] %>% @@ -400,24 +400,24 @@ if("seir" %in% model_outputs){ if("snpi" %in% model_outputs){ gg_cols <- 4 - num_nodes <- length(unique(outputs_global$hosp %>% .[,get(config$spatial_setup$subpop)])) + num_nodes <- length(unique(outputs_global$hosp %>% .[,get(config$subpop_setup$subpop)])) pdf_dims <- data.frame(width = gg_cols*4, length = num_nodes/gg_cols * 3) fname <- paste0("pplot/snpi_mod_outputs_", opt$run_id,".pdf") pdf(fname, width = pdf_dims$width, height = pdf_dims$length) - node_names <- unique(sort(outputs_global$snpi %>% .[ , get(config$spatial_setup$subpop)])) + node_names <- unique(sort(outputs_global$snpi %>% .[ , get(config$subpop_setup$subpop)])) node_names <- c(node_names[str_detect(node_names,",")], node_names[!str_detect(node_names,",")]) snpi_plots <- lapply(node_names, function(i){ if(!grepl(',', i)){ - i_lab <- ifelse(config$spatial_setup$subpop == 'subpop', geodata[subpop == i, USPS], i) + i_lab <- ifelse(config$subpop_setup$subpop == 'subpop', geodata[subpop == i, USPS], i) outputs_global$snpi %>% - .[outputs_global$llik, on = c(config$spatial_setup$subpop, "slot")] %>% - .[get(config$spatial_setup$subpop) == i] %>% + .[outputs_global$llik, on = c(config$subpop_setup$subpop, "slot")] %>% + .[get(config$subpop_setup$subpop) == i] %>% ggplot(aes(npi_name,reduction)) + geom_violin() + geom_jitter(aes(group = npi_name, color = ll), size = 0.5, height = 0, width = 0.2, alpha = 0.5) + @@ -429,11 +429,11 @@ if("snpi" %in% model_outputs){ nodes_ <- unlist(strsplit(i,",")) ll_across_nodes <- outputs_global$llik %>% - .[get(config$spatial_setup$subpop) %in% nodes_] %>% + .[get(config$subpop_setup$subpop) %in% nodes_] %>% .[, .(ll_sum = sum(ll)), by = .(slot)] outputs_global$snpi %>% - .[get(config$spatial_setup$subpop) == i] %>% + .[get(config$subpop_setup$subpop) == i] %>% .[ll_across_nodes, on = c("slot")] %>% ggplot(aes(npi_name,reduction)) + geom_violin() + diff --git a/postprocessing/processing_diagnostics.R b/postprocessing/processing_diagnostics.R index 57ca3aa22..b18f8d651 100644 --- a/postprocessing/processing_diagnostics.R +++ b/postprocessing/processing_diagnostics.R @@ -17,7 +17,7 @@ s3_name <- "idd-inference-runs" # Pull in subpop data geodata_states <- read.csv(paste0("./data/", - config$spatial_setup$geodata)) %>% + config$subpop_setup$geodata)) %>% mutate(subpop = stringr::str_pad(subpop, width = 5, side = "left", pad = "0")) # PULL OUTCOMES FROM S3 --------------------------------------------------- diff --git a/postprocessing/processing_diagnostics_AWS.R b/postprocessing/processing_diagnostics_AWS.R index 0eed22462..3b60663fc 100644 --- a/postprocessing/processing_diagnostics_AWS.R +++ b/postprocessing/processing_diagnostics_AWS.R @@ -17,7 +17,7 @@ s3_name <- "idd-inference-runs" # Pull in subpop data geodata_states <- read.csv(paste0("./data/", - config$spatial_setup$geodata)) %>% + config$subpop_setup$geodata)) %>% mutate(subpop = stringr::str_pad(subpop, width = 5, side = "left", pad = "0")) # PULL OUTCOMES FROM S3 --------------------------------------------------- diff --git a/postprocessing/processing_diagnostics_SLURM.R b/postprocessing/processing_diagnostics_SLURM.R index 505e51d57..4b27e704d 100644 --- a/postprocessing/processing_diagnostics_SLURM.R +++ b/postprocessing/processing_diagnostics_SLURM.R @@ -13,7 +13,7 @@ library(lubridate) # Pull in subpop data geodata_states <- read.csv(paste0("./data/", - config$spatial_setup$geodata)) %>% + config$subpop_setup$geodata)) %>% mutate(subpop = stringr::str_pad(subpop, width = 5, side = "left", pad = "0")) # FUNCTIONS --------------------------------------------------------------- diff --git a/postprocessing/run_sim_processing_FluSightExample.R b/postprocessing/run_sim_processing_FluSightExample.R index cff430101..b634f0c33 100644 --- a/postprocessing/run_sim_processing_FluSightExample.R +++ b/postprocessing/run_sim_processing_FluSightExample.R @@ -101,7 +101,7 @@ scenario_s3_buckets <- scenario_s3_buckets[scenario_num] # automatically pull fr override_pull_from_s3 <- override_pull_from_s3[scenario_num] # !!!! VERY IMPORTANT - LEAVE FALSE UNLESS YOU ARE REWRITING THE CURRENT S3 DATA !!!! -geodata_file_path = file.path(config$data_path, config$spatial_setup$geodata) +geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata) diff --git a/postprocessing/run_sim_processing_SLURM.R b/postprocessing/run_sim_processing_SLURM.R index f38e368e9..3d8396338 100644 --- a/postprocessing/run_sim_processing_SLURM.R +++ b/postprocessing/run_sim_processing_SLURM.R @@ -164,7 +164,7 @@ if(tolower(smh_or_fch) == "fch"){ } scenarios <- scenarios[scenario_num] -geodata_file_path = file.path(config$data_path, config$spatial_setup$geodata) +geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata) print(disease) diff --git a/postprocessing/run_sim_processing_TEMPLATE.R b/postprocessing/run_sim_processing_TEMPLATE.R index e8f37fdb5..166783a83 100644 --- a/postprocessing/run_sim_processing_TEMPLATE.R +++ b/postprocessing/run_sim_processing_TEMPLATE.R @@ -101,7 +101,7 @@ scenario_s3_buckets <- scenario_s3_buckets[scenario_num] # automatically pull fr override_pull_from_s3 <- override_pull_from_s3[scenario_num] # !!!! VERY IMPORTANT - LEAVE FALSE UNLESS YOU ARE REWRITING THE CURRENT S3 DATA !!!! -geodata_file_path = file.path(config$data_path, config$spatial_setup$geodata) +geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata) diff --git a/preprocessing/seir_init_immuneladder_r17phase3.R b/preprocessing/seir_init_immuneladder_r17phase3.R index 857c88882..9055caad7 100644 --- a/preprocessing/seir_init_immuneladder_r17phase3.R +++ b/preprocessing/seir_init_immuneladder_r17phase3.R @@ -13,7 +13,7 @@ # end_date: # data_path: -# spatial_setup: +# subpop_setup: # geodata: # # seeding: @@ -23,7 +23,7 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with column {spatial_setup::subpop} that denotes the subpop +# * {data_path}/{subpop_setup::geodata} is a csv with column {subpop_setup::subpop} that denotes the subpop # # ## Output Data # diff --git a/preprocessing/seir_init_immuneladder_r17phase3_preOm.R b/preprocessing/seir_init_immuneladder_r17phase3_preOm.R index fac8e5770..b4f6a803b 100644 --- a/preprocessing/seir_init_immuneladder_r17phase3_preOm.R +++ b/preprocessing/seir_init_immuneladder_r17phase3_preOm.R @@ -13,7 +13,7 @@ # end_date: # data_path: -# spatial_setup: +# subpop_setup: # geodata: # subpop: # @@ -24,7 +24,7 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with column {spatial_setup::subpop} that denotes the subpop +# * {data_path}/{subpop_setup::geodata} is a csv with column {subpop_setup::subpop} that denotes the subpop # # ## Output Data # diff --git a/preprocessing/seir_init_immuneladder_r17phase3_preOm_noDelta.R b/preprocessing/seir_init_immuneladder_r17phase3_preOm_noDelta.R index 9853512b3..527926026 100644 --- a/preprocessing/seir_init_immuneladder_r17phase3_preOm_noDelta.R +++ b/preprocessing/seir_init_immuneladder_r17phase3_preOm_noDelta.R @@ -13,7 +13,7 @@ # end_date: # data_path: -# spatial_setup: +# subpop_setup: # geodata: # # seeding: @@ -23,7 +23,7 @@ # # ## Input Data # -# * {data_path}/{spatial_setup::geodata} is a csv with column {spatial_setup::subpop} that denotes the subpop +# * {data_path}/{subpop_setup::geodata} is a csv with column {subpop_setup::subpop} that denotes the subpop # # ## Output Data # From 65772291a5c49ed20657e3f8fd58274f828a910c Mon Sep 17 00:00:00 2001 From: Joseph Lemaitre Date: Thu, 28 Sep 2023 10:17:03 +0200 Subject: [PATCH 7/8] internal nomenclature changes --- .../docs/integration_benchmark.ipynb | 20 ++++----- .../gempyor_pkg/docs/integration_doc.ipynb | 2 +- flepimop/gempyor_pkg/docs/interface.ipynb | 4 +- .../gempyor_pkg/src/gempyor/NPI/__init__.py | 2 +- .../gempyor_pkg/src/gempyor/compartments.py | 1 - .../gempyor_pkg/src/gempyor/dev/dev_seir.py | 8 ++-- flepimop/gempyor_pkg/src/gempyor/dev/steps.py | 9 ---- flepimop/gempyor_pkg/src/gempyor/interface.py | 6 +-- flepimop/gempyor_pkg/src/gempyor/outcomes.py | 5 ++- .../gempyor_pkg/src/gempyor/parameters.py | 26 +++++------ .../gempyor_pkg/src/gempyor/seeding_ic.py | 25 ++++++----- flepimop/gempyor_pkg/src/gempyor/seir.py | 26 ++++++----- flepimop/gempyor_pkg/src/gempyor/setup.py | 5 +-- flepimop/gempyor_pkg/src/gempyor/simulate.py | 6 +-- .../src/gempyor/simulate_outcome.py | 2 +- .../gempyor_pkg/src/gempyor/simulate_seir.py | 6 +-- flepimop/gempyor_pkg/src/gempyor/steps_rk4.py | 1 - .../src/gempyor/subpopulation_structure.py | 36 ++++++++-------- flepimop/gempyor_pkg/tests/npi/test_npis.py | 11 +++-- .../gempyor_pkg/tests/seir/dev_new_test.py | 2 +- .../gempyor_pkg/tests/seir/interface.ipynb | 4 +- .../tests/seir/test_compartments.py | 2 +- .../gempyor_pkg/tests/seir/test_new_seir.py | 4 +- .../gempyor_pkg/tests/seir/test_parameters.py | 32 +++++++------- flepimop/gempyor_pkg/tests/seir/test_seir.py | 43 +++++++++---------- flepimop/gempyor_pkg/tests/seir/test_setup.py | 16 +++---- 26 files changed, 142 insertions(+), 162 deletions(-) diff --git a/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb b/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb index e392401cd..22cb46d6c 100644 --- a/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb +++ b/flepimop/gempyor_pkg/docs/integration_benchmark.ipynb @@ -204,7 +204,7 @@ " setup_name=config[\"setup_name\"].get(),\n", " geodata_file=spatial_base_path / spatial_config[\"geodata\"].get(),\n", " mobility_file=spatial_base_path / spatial_config[\"mobility\"].get(),\n", - " popnodes_key=spatial_config[\"popnodes\"].get(),\n", + " subpop_pop_key=spatial_config[\"subpop_pop\"].get(),\n", " subpop_key=spatial_config[\"subpop\"].get(),\n", " ),\n", " nslots=nslots,\n", @@ -313,9 +313,9 @@ "):\n", " mobility_data = mobility_data.astype(\"float64\")\n", " assert type(s.compartments.compartments.shape[0]) == int\n", - " assert type(s.nnodes) == int\n", + " assert type(s.nsubpops) == int\n", " assert s.n_days > 1\n", - " assert parsed_parameters.shape[1:3] == (s.n_days, s.nnodes)\n", + " assert parsed_parameters.shape[1:3] == (s.n_days, s.nsubpops)\n", " assert type(s.dt) == float\n", " # assert (transition_array.shape == (5, 5))\n", " assert type(transition_array[0][0]) == np.int64\n", @@ -323,7 +323,7 @@ " assert type(proportion_array[0]) == np.int64\n", " # assert (proportion_info.shape == (3, 6))\n", " assert type(proportion_info[0][0]) == np.int64\n", - " assert initial_conditions.shape == (s.compartments.compartments.shape[0], s.nnodes)\n", + " assert initial_conditions.shape == (s.compartments.compartments.shape[0], s.nsubpops)\n", " assert type(initial_conditions[0][0]) == np.float64\n", " # Test of empty seeding:\n", " assert len(seeding_data.keys()) == 4\n", @@ -349,14 +349,14 @@ " assert type(mobility_data[0]) == np.float64\n", " assert len(mobility_data) == len(mobility_subpop_indices)\n", " assert type(mobility_subpop_indices[0]) == np.int32\n", - " assert len(mobility_data_indices) == s.nnodes + 1\n", + " assert len(mobility_data_indices) == s.nsubpops + 1\n", " assert type(mobility_data_indices[0]) == np.int32\n", - " assert len(s.popnodes) == s.nnodes\n", - " assert type(s.popnodes[0]) == np.int64\n", + " assert len(s.subpop_pop) == s.nsubpops\n", + " assert type(s.subpop_pop[0]) == np.int64\n", "\n", " fnct_args = (\n", " s.compartments.compartments.shape[0],\n", - " s.nnodes,\n", + " s.nsubpops,\n", " s.n_days,\n", " parsed_parameters,\n", " s.dt,\n", @@ -369,7 +369,7 @@ " mobility_data,\n", " mobility_subpop_indices,\n", " mobility_data_indices,\n", - " s.popnodes,\n", + " s.subpop_pop,\n", " stoch_traj_flag,\n", " ) # TODO make it a dict, it's safer\n", "\n", @@ -457,7 +457,7 @@ "mobility_data = s.mobility.data\n", "\n", "with Timer(\"onerun_SEIR.pdraw\"):\n", - " p_draw = s.parameters.parameters_quick_draw(s.n_days, s.nnodes)\n", + " p_draw = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops)\n", "\n", "with Timer(\"onerun_SEIR.reduce\"):\n", " parameters = s.parameters.parameters_reduce(p_draw, npi)\n", diff --git a/flepimop/gempyor_pkg/docs/integration_doc.ipynb b/flepimop/gempyor_pkg/docs/integration_doc.ipynb index f98873201..c9cec9c83 100644 --- a/flepimop/gempyor_pkg/docs/integration_doc.ipynb +++ b/flepimop/gempyor_pkg/docs/integration_doc.ipynb @@ -115,7 +115,7 @@ "\n", "\n", "p_draw = gempyor_simulator.s.parameters.parameters_quick_draw(\n", - " n_days=gempyor_simulator.s.n_days, nnodes=gempyor_simulator.s.nnodes\n", + " n_days=gempyor_simulator.s.n_days, nsubpops=gempyor_simulator.s.nsubpops\n", ")\n", "\n", "parameters = gempyor_simulator.s.parameters.parameters_reduce(p_draw, npi_seir)\n", diff --git a/flepimop/gempyor_pkg/docs/interface.ipynb b/flepimop/gempyor_pkg/docs/interface.ipynb index bde9af0ec..0e0e1d2c7 100644 --- a/flepimop/gempyor_pkg/docs/interface.ipynb +++ b/flepimop/gempyor_pkg/docs/interface.ipynb @@ -277,11 +277,11 @@ " p_draw = gempyor_simulator.s.parameters.parameters_load(\n", " param_df=gempyor_simulator.s.read_simID(ftype=\"spar\", sim_id=sim_id2load),\n", " n_daysempyor_simulator.s.n_days,\n", - " nnodes=gempyor_simulator.s.nnodes,\n", + " nsubpops=gempyor_simulator.s.nsubpops,\n", " )\n", " else:\n", " p_draw = gempyor_simulator.s.parameters.parameters_quick_draw(\n", - " n_days=gempyor_simulator.s.n_days, nnodes=gempyor_simulator.s.nnodes\n", + " n_days=gempyor_simulator.s.n_days, nsubpops=gempyor_simulator.s.nsubpops\n", " )\n", " # reduce them\n", " parameters = gempyor_simulator.s.parameters.parameters_reduce(p_draw, npi_seir)\n", diff --git a/flepimop/gempyor_pkg/src/gempyor/NPI/__init__.py b/flepimop/gempyor_pkg/src/gempyor/NPI/__init__.py index 86a93e80f..33bddba2c 100644 --- a/flepimop/gempyor_pkg/src/gempyor/NPI/__init__.py +++ b/flepimop/gempyor_pkg/src/gempyor/NPI/__init__.py @@ -13,7 +13,7 @@ def _load_npi_plugins(): "Recurse through the package directory and import classes that derive from NPIBase" - for (_, name, _) in pkgutil.iter_modules([str(Path(__file__).parent)]): + for _, name, _ in pkgutil.iter_modules([str(Path(__file__).parent)]): imported_module = import_module("." + name, package=__name__) for i in dir(imported_module): diff --git a/flepimop/gempyor_pkg/src/gempyor/compartments.py b/flepimop/gempyor_pkg/src/gempyor/compartments.py index bb568a436..04a05ca85 100644 --- a/flepimop/gempyor_pkg/src/gempyor/compartments.py +++ b/flepimop/gempyor_pkg/src/gempyor/compartments.py @@ -14,7 +14,6 @@ class Compartments: # Minimal object to be easily picklable for // runs def __init__(self, seir_config=None, compartments_config=None, compartments_file=None, transitions_file=None): - self.times_set = 0 ## Something like this is needed for check script: diff --git a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py index 9f5b9182b..95b4afff9 100644 --- a/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/dev/dev_seir.py @@ -24,7 +24,7 @@ setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -60,7 +60,7 @@ npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) -params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) +params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( @@ -74,7 +74,7 @@ states = seir.steps_SEIR_nb( s.compartments.compartments.shape[0], - s.nnodes, + s.nsubpops, s.n_days, parsed_parameters, s.dt, @@ -86,7 +86,7 @@ mobility_data, mobility_subpop_indices, mobility_data_indices, - s.popnodes, + s.subpop_pop, True, ) df = seir.states2Df(s, states) diff --git a/flepimop/gempyor_pkg/src/gempyor/dev/steps.py b/flepimop/gempyor_pkg/src/gempyor/dev/steps.py index 002529df5..43066e5ee 100644 --- a/flepimop/gempyor_pkg/src/gempyor/dev/steps.py +++ b/flepimop/gempyor_pkg/src/gempyor/dev/steps.py @@ -38,7 +38,6 @@ def ode_integration( stochastic_p, # 16 integration_method, ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) @@ -243,7 +242,6 @@ def rk4_integration1( population, # 15 stochastic_p, # 16 ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) @@ -434,7 +432,6 @@ def rk4_integration2( population, # 15 stochastic_p, # 16 ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) @@ -627,7 +624,6 @@ def rk4_integration3( population, # 15 stochastic_p, # 16 ): - seeding_subpops_dict = seeding_data["seeding_subpops"] seeding_sources_dict = seeding_data["seeding_sources"] seeding_destinations_dict = seeding_data["seeding_destinations"] @@ -827,7 +823,6 @@ def rk4_integration4( population, # 15 stochastic_p, # 16 ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) @@ -1021,7 +1016,6 @@ def rk4_integration5( population, # 15 stochastic_p, # 16 ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) @@ -1225,7 +1219,6 @@ def rk4_integration2_smart( population, # 15 stochastic_p, # 16 ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) @@ -1427,7 +1420,6 @@ def rk4_integrate(today, x): # with Timer(f'solver_solve{time_jump}'): # ts, sol = solver.run(ts) with Timer(f"solver_solve{time_jump}"): - sol = odeint( rhs, y0=x_, @@ -1497,7 +1489,6 @@ def rk4_integration_aot( population, # 15 stochastic_p, # 16 ): - states = np.zeros((ndays, ncompartments, nspatial_nodes)) states_daily_incid = np.zeros((ndays, ncompartments, nspatial_nodes)) states_current = np.zeros((ncompartments, nspatial_nodes)) diff --git a/flepimop/gempyor_pkg/src/gempyor/interface.py b/flepimop/gempyor_pkg/src/gempyor/interface.py index 40d1ab295..32686a19b 100644 --- a/flepimop/gempyor_pkg/src/gempyor/interface.py +++ b/flepimop/gempyor_pkg/src/gempyor/interface.py @@ -86,7 +86,7 @@ def __init__( mobility_file=spatial_base_path / spatial_config["mobility"].get() if spatial_config["mobility"].exists() else None, - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ), nslots=nslots, @@ -339,10 +339,10 @@ def get_seir_parameters(self, load_ID=False, sim_id2load=None, bypass_DF=None, b p_draw = self.s.parameters.parameters_load( param_df=param_df, n_days=self.s.n_days, - nnodes=self.s.nnodes, + nsubpops=self.s.nsubpops, ) else: - p_draw = self.s.parameters.parameters_quick_draw(n_days=self.s.n_days, nnodes=self.s.nnodes) + p_draw = self.s.parameters.parameters_quick_draw(n_days=self.s.n_days, nsubpops=self.s.nsubpops) return p_draw def get_seir_parametersDF(self, load_ID=False, sim_id2load=None, bypass_DF=None, bypass_FN=None): diff --git a/flepimop/gempyor_pkg/src/gempyor/outcomes.py b/flepimop/gempyor_pkg/src/gempyor/outcomes.py index 2760b9bbe..16279c620 100644 --- a/flepimop/gempyor_pkg/src/gempyor/outcomes.py +++ b/flepimop/gempyor_pkg/src/gempyor/outcomes.py @@ -230,7 +230,9 @@ def read_parameters_from_config(s: setup.Setup): logging.debug(f"Using 'param_from_file' for relative probability in outcome {class_name}") # Sort it in case the relative probablity file is mispecified rel_probability.subpop = rel_probability.subpop.astype("category") - rel_probability.subpop = rel_probability.subpop.cat.set_categories(s.subpop_struct.subpop_names) + rel_probability.subpop = rel_probability.subpop.cat.set_categories( + s.subpop_struct.subpop_names + ) rel_probability = rel_probability.sort_values(["subpop"]) parameters[class_name]["rel_probability"] = rel_probability["value"].to_numpy() else: @@ -487,7 +489,6 @@ def compute_all_multioutcomes(*, s, sim_id2write, parameters, loaded_values=None def get_filtered_incidI(diffI, dates, subpops, filters): - if list(filters.keys()) == ["incidence"]: vtype = "incidence" elif list(filters.keys()) == ["prevalence"]: diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py index 9632db8f7..f315c65cf 100644 --- a/flepimop/gempyor_pkg/src/gempyor/parameters.py +++ b/flepimop/gempyor_pkg/src/gempyor/parameters.py @@ -110,46 +110,46 @@ def picklable_lamda_sigma(self): def get_pnames2pindex(self) -> dict: return self.pnames2pindex - def parameters_quick_draw(self, n_days: int, nnodes: int) -> ndarray: + def parameters_quick_draw(self, n_days: int, nsubpops: int) -> ndarray: """ Returns all parameter in an array. These are drawn based on the seir::parameters section of the config, passed in as p_config. :param n_days: number of time interval - :param nnodes: number of spatial nodes - :return: array of shape (nparam, n_days, nnodes) with all parameters for all nodes and all time (same value) + :param nsubpops: number of spatial nodes + :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time (same value) """ - param_arr = np.empty((self.npar, n_days, nnodes), dtype="float64") + param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently for idx, pn in enumerate(self.pnames): if "dist" in self.pdata[pn]: - param_arr[idx] = np.full((n_days, nnodes), self.pdata[pn]["dist"]()) + param_arr[idx] = np.full((n_days, nsubpops), self.pdata[pn]["dist"]()) else: param_arr[idx] = self.pdata[pn]["ts"].values return param_arr # we don't store it as a member because this object needs to be small to be pickable - def parameters_load(self, param_df: pd.DataFrame, n_days: int, nnodes: int) -> ndarray: + def parameters_load(self, param_df: pd.DataFrame, n_days: int, nsubpops: int) -> ndarray: """ drop-in equivalent to param_quick_draw() that take a file as written parameter_write() :param fname: :param n_days: - :param nnodes: + :param nsubpops: :param extension: - :return: array of shape (nparam, n_days, nnodes) with all parameters for all nodes and all time. + :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time. """ - param_arr = np.empty((self.npar, n_days, nnodes), dtype="float64") + param_arr = np.empty((self.npar, n_days, nsubpops), dtype="float64") param_arr[:] = np.nan # fill with NaNs so we don't fail silently for idx, pn in enumerate(self.pnames): if pn in param_df["parameter"].values: pval = float(param_df[param_df["parameter"] == pn].value) - param_arr[idx] = np.full((n_days, nnodes), pval) + param_arr[idx] = np.full((n_days, nsubpops), pval) elif "ts" in self.pdata[pn]: param_arr[idx] = self.pdata[pn]["ts"].values else: print(f"PARAM: parameter {pn} NOT found in loadID file. Drawing from config distribution") pval = self.pdata[pn]["dist"]() - param_arr[idx] = np.full((n_days, nnodes), pval) + param_arr[idx] = np.full((n_days, nsubpops), pval) return param_arr @@ -172,9 +172,9 @@ def getParameterDF(self, p_draw: ndarray) -> pd.DataFrame: def parameters_reduce(self, p_draw: ndarray, npi: object) -> ndarray: """ Params reduced according to the NPI provided. - :param p_draw: array of shape (nparam, n_days, nnodes) from p_draw + :param p_draw: array of shape (nparam, n_days, nsubpops) from p_draw :param npi: NPI object with the reduction - :return: array of shape (nparam, n_days, nnodes) with all parameters for all nodes and all time, reduced + :return: array of shape (nparam, n_days, nsubpops) with all parameters for all nodes and all time, reduced """ p_reduced = copy.deepcopy(p_draw) diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py b/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py index 61a650908..a83b0ca35 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py +++ b/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py @@ -100,8 +100,8 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: if method == "Default": ## JK : This could be specified in the config - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nnodes)) - y0[0, :] = setup.popnodes + y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) + y0[0, :] = setup.subpop_pop elif method == "SetInitialConditions" or method == "SetInitialConditionsFolderDraw": # TODO Think about - Does not support the new way of doing compartment indexing @@ -112,12 +112,11 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: self.initial_conditions_config["initial_conditions_file"].get(), ) - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nnodes)) + y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): # if pl in list(ic_df["subpop"]): states_pl = ic_df[ic_df["subpop"] == pl] for comp_idx, comp_name in setup.compartments.compartments["name"].items(): - if "mc_name" in states_pl.columns: ic_df_compartment_val = states_pl[states_pl["mc_name"] == comp_name]["amount"] else: @@ -145,15 +144,15 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: y0[comp_idx, pl_idx] = float(ic_df_compartment_val) elif allow_missing_nodes: logger.critical( - f"No initial conditions for for node {pl}, assuming everyone (n={setup.popnodes[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" + f"No initial conditions for for node {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartment ({setup.compartments.compartments['name'].iloc[0]})" ) if "proportional" in self.initial_conditions_config.keys(): if self.initial_conditions_config["proportional"].get(): y0[0, pl_idx] = 1.0 else: - y0[0, pl_idx] = setup.popnodes[pl_idx] + y0[0, pl_idx] = setup.subpop_pop[pl_idx] else: - y0[0, pl_idx] = setup.popnodes[pl_idx] + y0[0, pl_idx] = setup.subpop_pop[pl_idx] else: raise ValueError( f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_nodes=TRUE to bypass this error" @@ -176,7 +175,7 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: raise ValueError( f"There is no entry for initial time ti in the provided initial_conditions::states_file." ) - y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nnodes)) + y0 = np.zeros((setup.compartments.compartments.shape[0], setup.nsubpops)) for comp_idx, comp_name in setup.compartments.compartments["name"].items(): # rely on all the mc's instead of mc_name to avoid errors due to e.g order. @@ -209,12 +208,12 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: y0[comp_idx, pl_idx] = float(ic_df_compartment[pl]) elif allow_missing_nodes: logger.critical( - f"No initial conditions for for node {pl}, assuming everyone (n={setup.popnodes[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" + f"No initial conditions for for node {pl}, assuming everyone (n={setup.subpop_pop[pl_idx]}) in the first metacompartments ({setup.compartments.compartments['name'].iloc[0]})" ) if "proportion" in self.initial_conditions_config.keys(): if self.initial_conditions_config["proportion"].get(): y0[0, pl_idx] = 1.0 - y0[0, pl_idx] = setup.popnodes[pl_idx] + y0[0, pl_idx] = setup.subpop_pop[pl_idx] else: raise ValueError( f"subpop {pl} does not exist in initial_conditions::states_file. You can set allow_missing_nodes=TRUE to bypass this error" @@ -225,7 +224,7 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: # rest if rests: # not empty for comp_idx, pl_idx in rests: - total = setup.popnodes[pl_idx] + total = setup.subpop_pop[pl_idx] if "proportional" in self.initial_conditions_config.keys(): if self.initial_conditions_config["proportional"].get(): total = 1.0 @@ -233,13 +232,13 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray: if "proportional" in self.initial_conditions_config.keys(): if self.initial_conditions_config["proportional"].get(): - y0 = y0 * setup.popnodes[pl_idx] + y0 = y0 * setup.subpop_pop[pl_idx] # check that the inputed values sums to the node_population: error = False for pl_idx, pl in enumerate(setup.subpop_struct.subpop_names): n_y0 = y0[:, pl_idx].sum() - n_pop = setup.popnodes[pl_idx] + n_pop = setup.subpop_pop[pl_idx] if abs(n_y0 - n_pop) > 1: error = True print( diff --git a/flepimop/gempyor_pkg/src/gempyor/seir.py b/flepimop/gempyor_pkg/src/gempyor/seir.py index aa5e210b0..b466bf78f 100644 --- a/flepimop/gempyor_pkg/src/gempyor/seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/seir.py @@ -27,14 +27,14 @@ def build_step_source_arg( mobility_data = s.mobility.data mobility_data = mobility_data.astype("float64") assert type(s.compartments.compartments.shape[0]) == int - assert type(s.nnodes) == int + assert type(s.nsubpops) == int assert s.n_days > 1 - assert parsed_parameters.shape[1:3] == (s.n_days, s.nnodes) + assert parsed_parameters.shape[1:3] == (s.n_days, s.nsubpops) assert type(s.dt) == float assert type(transition_array[0][0]) == np.int64 assert type(proportion_array[0]) == np.int64 assert type(proportion_info[0][0]) == np.int64 - assert initial_conditions.shape == (s.compartments.compartments.shape[0], s.nnodes) + assert initial_conditions.shape == (s.compartments.compartments.shape[0], s.nsubpops) assert type(initial_conditions[0][0]) == np.float64 # Test of empty seeding: assert len(seeding_data.keys()) == 4 @@ -58,17 +58,17 @@ def build_step_source_arg( assert type(mobility_data[0]) == np.float64 assert len(mobility_data) == len(s.mobility.indices) assert type(s.mobility.indices[0]) == np.int32 - assert len(s.mobility.indptr) == s.nnodes + 1 + assert len(s.mobility.indptr) == s.nsubpops + 1 assert type(s.mobility.indptr[0]) == np.int32 - assert len(s.popnodes) == s.nnodes - assert type(s.popnodes[0]) == np.int64 + assert len(s.subpop_pop) == s.nsubpops + assert type(s.subpop_pop[0]) == np.int64 assert s.dt <= 1.0 or s.dt == 2.0 fnct_args = { "ncompartments": s.compartments.compartments.shape[0], - "nspatial_nodes": s.nnodes, + "nspatial_nodes": s.nsubpops, "ndays": s.n_days, "parameters": parsed_parameters, "dt": s.dt, @@ -81,7 +81,7 @@ def build_step_source_arg( "mobility_data": mobility_data, "mobility_row_indices": s.mobility.indices, "mobility_data_indices": s.mobility.indptr, - "population": s.popnodes, + "population": s.subpop_pop, "stochastic_p": s.stoch_traj_flag, } return fnct_args @@ -97,7 +97,6 @@ def steps_SEIR( seeding_data, seeding_amounts, ): - fnct_args = build_step_source_arg( s, parsed_parameters, @@ -218,10 +217,10 @@ def onerun_SEIR( p_draw = s.parameters.parameters_load( param_df=s.read_simID(ftype="spar", sim_id=sim_id2load), n_days=s.n_days, - nnodes=s.nnodes, + nsubpops=s.nsubpops, ) else: - p_draw = s.parameters.parameters_quick_draw(n_days=s.n_days, nnodes=s.nnodes) + p_draw = s.parameters.parameters_quick_draw(n_days=s.n_days, nsubpops=s.nsubpops) # reduce them parameters = s.parameters.parameters_reduce(p_draw, npi) log_debug_parameters(p_draw, "Parameters without interventions") @@ -291,7 +290,7 @@ def states2Df(s, states): ) # prevalence data, we use multi.index dataframe, sparring us the array manipulation we use to do prev_df = pd.DataFrame( - data=states_prev.reshape(s.n_days * s.compartments.get_ncomp(), s.nnodes), + data=states_prev.reshape(s.n_days * s.compartments.get_ncomp(), s.nsubpops), index=ts_index, columns=s.subpop_struct.subpop_names, ).reset_index() @@ -309,7 +308,7 @@ def states2Df(s, states): ) incid_df = pd.DataFrame( - data=states_incid.reshape(s.n_days * s.compartments.get_ncomp(), s.nnodes), + data=states_incid.reshape(s.n_days * s.compartments.get_ncomp(), s.nsubpops), index=ts_index, columns=s.subpop_struct.subpop_names, ).reset_index() @@ -329,7 +328,6 @@ def states2Df(s, states): def postprocess_and_write(sim_id, s, states, p_draw, npi, seeding): - # print(f"before postprocess_and_write for id {s.out_run_id}, {s.out_prefix}, {sim_id + s.first_sim_index - 1}") # aws_disk_diagnosis() diff --git a/flepimop/gempyor_pkg/src/gempyor/setup.py b/flepimop/gempyor_pkg/src/gempyor/setup.py index b8bca6104..86176dc71 100644 --- a/flepimop/gempyor_pkg/src/gempyor/setup.py +++ b/flepimop/gempyor_pkg/src/gempyor/setup.py @@ -51,7 +51,6 @@ def __init__( out_prefix=None, stoch_traj_flag=False, ): - # 1. Important global variables self.setup_name = setup_name self.nslots = nslots @@ -77,8 +76,8 @@ def __init__( self.subpop_struct = subpop_setup self.n_days = (self.tf - self.ti).days + 1 # because we include s.ti and s.tf - self.nnodes = self.subpop_struct.nnodes - self.popnodes = self.subpop_struct.popnodes + self.nsubpops = self.subpop_struct.nsubpops + self.subpop_pop = self.subpop_struct.subpop_pop self.mobility = self.subpop_struct.mobility self.stoch_traj_flag = stoch_traj_flag diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate.py b/flepimop/gempyor_pkg/src/gempyor/simulate.py index 12db2364a..2e209db4d 100644 --- a/flepimop/gempyor_pkg/src/gempyor/simulate.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate.py @@ -98,7 +98,7 @@ # # ## Input Data # -# * {data_path}/{subpop_setup::geodata} is a csv with columns {subpop_setup::subpop_names} and {subpop_setup::popnodes} +# * {data_path}/{subpop_setup::geodata} is a csv with columns {subpop_setup::subpop_names} and {subpop_setup::subpop_pop} # * {data_path}/{subpop_setup::mobility} # # If {seeding::method} is PoissonDistributed @@ -295,7 +295,6 @@ def simulate( first_sim_index, stoch_traj_flag, ): - spatial_path_prefix = "" config.clear() config.read(user=False) @@ -323,13 +322,12 @@ def simulate( mobility_file=spatial_base_path / spatial_config["mobility"].get() if spatial_config["mobility"].exists() else None, - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) start = time.monotonic() for npi_scenario in npi_scenarios: - s = setup.Setup( setup_name=config["name"].get() + "/" + str(npi_scenario) + "/", subpop_setup=subpop_setup, diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py b/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py index 789d2634d..629c3e407 100755 --- a/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate_outcome.py @@ -203,7 +203,7 @@ def simulate( mobility_file=spatial_base_path / spatial_config["mobility"].get() if spatial_config["mobility"].exists() else None, - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) diff --git a/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py b/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py index 9ad0f022c..3be2b3531 100755 --- a/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py +++ b/flepimop/gempyor_pkg/src/gempyor/simulate_seir.py @@ -98,7 +98,7 @@ # # ## Input Data # -# * {data_path}/{subpop_setup::geodata} is a csv with columns {subpop_setup::subpop_names} and {subpop_setup::popnodes} +# * {data_path}/{subpop_setup::geodata} is a csv with columns {subpop_setup::subpop_names} and {subpop_setup::subpop_pop} # * {data_path}/{subpop_setup::mobility} # # If {seeding::method} is PoissonDistributed @@ -233,7 +233,6 @@ def simulate( first_sim_index, stoch_traj_flag, ): - spatial_path_prefix = "" config.clear() config.read(user=False) @@ -255,13 +254,12 @@ def simulate( mobility_file=spatial_base_path / spatial_config["mobility"].get() if spatial_config["mobility"].exists() else None, - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) start = time.monotonic() for npi_scenario in npi_scenarios: - s = setup.Setup( setup_name=config["name"].get() + "/" + str(npi_scenario) + "/", subpop_setup=subpop_setup, diff --git a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py index e20b4e930..0e092fb42 100644 --- a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py +++ b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py @@ -93,7 +93,6 @@ def rhs(t, x, today): # source compartment. That's why there is nothing with n_spatial node here. # but (TODO) we should enforce that ? if first_proportion: - only_one_proportion = ( transitions[transition_proportion_start_col][transition_index] + 1 ) == transitions[transition_proportion_stop_col][transition_index] diff --git a/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py b/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py index 083b6111d..2cf5c7c46 100644 --- a/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py +++ b/flepimop/gempyor_pkg/src/gempyor/subpopulation_structure.py @@ -10,22 +10,22 @@ class SubpopulationStructure: - def __init__(self, *, setup_name, geodata_file, mobility_file, popnodes_key, subpop_names_key): + def __init__(self, *, setup_name, geodata_file, mobility_file, subpop_pop_key, subpop_names_key): self.setup_name = setup_name self.data = pd.read_csv( geodata_file, converters={subpop_names_key: lambda x: str(x).strip()}, skipinitialspace=True ) # subpops and populations, strip whitespaces - self.nnodes = len(self.data) # K = # of locations + self.nsubpops = len(self.data) # K = # of locations - # popnodes_key is the name of the column in geodata_file with populations - if popnodes_key not in self.data: + # subpop_pop_key is the name of the column in geodata_file with populations + if subpop_pop_key not in self.data: raise ValueError( - f"popnodes_key: {popnodes_key} does not correspond to a column in geodata: {self.data.columns}" + f"subpop_pop_key: {subpop_pop_key} does not correspond to a column in geodata: {self.data.columns}" ) - self.popnodes = self.data[popnodes_key].to_numpy() # population - if len(np.argwhere(self.popnodes == 0)): + self.subpop_pop = self.data[subpop_pop_key].to_numpy() # population + if len(np.argwhere(self.subpop_pop == 0)): raise ValueError( - f"There are {len(np.argwhere(self.popnodes == 0))} nodes with population zero, this is not supported." + f"There are {len(np.argwhere(self.subpop_pop == 0))} nodes with population zero, this is not supported." ) # subpop_names_key is the name of the column in geodata_file with subpops @@ -43,9 +43,9 @@ def __init__(self, *, setup_name, geodata_file, mobility_file, popnodes_key, sub np.loadtxt(mobility_file), dtype=int ) # K x K matrix of people moving # Validate mobility data - if self.mobility.shape != (self.nnodes, self.nnodes): + if self.mobility.shape != (self.nsubpops, self.nsubpops): raise ValueError( - f"mobility data must have dimensions of length of geodata ({self.nnodes}, {self.nnodes}). Actual: {self.mobility.shape}" + f"mobility data must have dimensions of length of geodata ({self.nsubpops}, {self.nsubpops}). Actual: {self.mobility.shape}" ) elif mobility_file.suffix == ".csv": @@ -60,16 +60,16 @@ def __init__(self, *, setup_name, geodata_file, mobility_file, popnodes_key, sub self.mobility = scipy.sparse.coo_matrix( (mobility_data.amount, (mobility_data.ori_idx, mobility_data.dest_idx)), - shape=(self.nnodes, self.nnodes), + shape=(self.nsubpops, self.nsubpops), dtype=int, ).tocsr() elif mobility_file.suffix == ".npz": self.mobility = scipy.sparse.load_npz(mobility_file).astype(int) # Validate mobility data - if self.mobility.shape != (self.nnodes, self.nnodes): + if self.mobility.shape != (self.nsubpops, self.nsubpops): raise ValueError( - f"mobility data must have dimensions of length of geodata ({self.nnodes}, {self.nnodes}). Actual: {self.mobility.shape}" + f"mobility data must have dimensions of length of geodata ({self.nsubpops}, {self.nsubpops}). Actual: {self.mobility.shape}" ) else: raise ValueError( @@ -77,27 +77,27 @@ def __init__(self, *, setup_name, geodata_file, mobility_file, popnodes_key, sub ) # Make sure mobility values <= the population of src node - tmp = (self.mobility.T - self.popnodes).T + tmp = (self.mobility.T - self.subpop_pop).T tmp[tmp < 0] = 0 if tmp.any(): rows, cols, values = scipy.sparse.find(tmp) errmsg = "" for r, c, v in zip(rows, cols, values): - errmsg += f"\n({r}, {c}) = {self.mobility[r, c]} > population of '{self.subpop_names[r]}' = {self.popnodes[r]}" + errmsg += f"\n({r}, {c}) = {self.mobility[r, c]} > population of '{self.subpop_names[r]}' = {self.subpop_pop[r]}" raise ValueError( f"The following entries in the mobility data exceed the source node populations in geodata:{errmsg}" ) - tmp = self.popnodes - np.squeeze(np.asarray(self.mobility.sum(axis=1))) + tmp = self.subpop_pop - np.squeeze(np.asarray(self.mobility.sum(axis=1))) tmp[tmp > 0] = 0 if tmp.any(): (row,) = np.where(tmp) errmsg = "" for r in row: - errmsg += f"\n sum accross row {r} exceed population of node '{self.subpop_names[r]}' ({self.popnodes[r]}), by {-tmp[r]}" + errmsg += f"\n sum accross row {r} exceed population of node '{self.subpop_names[r]}' ({self.subpop_pop[r]}), by {-tmp[r]}" raise ValueError( f"The following rows in the mobility data exceed the source node populations in geodata:{errmsg}" ) else: logging.critical("No mobility matrix specified -- assuming no one moves") - self.mobility = scipy.sparse.csr_matrix(np.zeros((self.nnodes, self.nnodes)), dtype=int) + self.mobility = scipy.sparse.csr_matrix(np.zeros((self.nsubpops, self.nsubpops)), dtype=int) diff --git a/flepimop/gempyor_pkg/tests/npi/test_npis.py b/flepimop/gempyor_pkg/tests/npi/test_npis.py index 2cdb165e1..f9c514acd 100644 --- a/flepimop/gempyor_pkg/tests/npi/test_npis.py +++ b/flepimop/gempyor_pkg/tests/npi/test_npis.py @@ -120,7 +120,7 @@ def test_spatial_groups(): npi = seir.build_npi_SEIR(inference_simulator.s, load_ID=False, sim_id2load=None, config=config) # all independent: r1 - assert len(npi.getReduction("r1")["2021-01-01"].unique()) == inference_simulator.s.nnodes + assert len(npi.getReduction("r1")["2021-01-01"].unique()) == inference_simulator.s.nsubpops assert npi.getReduction("r1").isna().sum().sum() == 0 # all the same: r2 @@ -128,7 +128,7 @@ def test_spatial_groups(): assert npi.getReduction("r2").isna().sum().sum() == 0 # two groups: r3 - assert len(npi.getReduction("r3")["2020-04-15"].unique()) == inference_simulator.s.nnodes - 2 + assert len(npi.getReduction("r3")["2020-04-15"].unique()) == inference_simulator.s.nsubpops - 2 assert npi.getReduction("r3").isna().sum().sum() == 0 assert len(npi.getReduction("r3").loc[["01000", "02000"], "2020-04-15"].unique()) == 1 assert len(npi.getReduction("r3").loc[["04000", "06000"], "2020-04-15"].unique()) == 1 @@ -154,7 +154,7 @@ def test_spatial_groups(): # all independent: r1 df = npi_df[npi_df["npi_name"] == "all_independent"] - assert len(df) == inference_simulator.s.nnodes + assert len(df) == inference_simulator.s.nsubpops for g in df["subpop"]: assert "," not in g @@ -162,11 +162,11 @@ def test_spatial_groups(): df = npi_df[npi_df["npi_name"] == "all_together"] assert len(df) == 1 assert set(df["subpop"].iloc[0].split(",")) == set(inference_simulator.s.subpop_struct.subpop_names) - assert len(df["subpop"].iloc[0].split(",")) == inference_simulator.s.nnodes + assert len(df["subpop"].iloc[0].split(",")) == inference_simulator.s.nsubpops # two groups: r3 df = npi_df[npi_df["npi_name"] == "two_groups"] - assert len(df) == inference_simulator.s.nnodes - 2 + assert len(df) == inference_simulator.s.nsubpops - 2 for g in ["01000", "02000", "04000", "06000"]: assert g not in df["subpop"] assert len(df[df["subpop"] == "01000,02000"]) == 1 @@ -185,7 +185,6 @@ def test_spatial_groups(): def test_spatial_groups(): - inference_simulator = gempyor.GempyorSimulator( config_path=f"{config_path_prefix}config_test_spatial_group_npi.yml", run_id=105, diff --git a/flepimop/gempyor_pkg/tests/seir/dev_new_test.py b/flepimop/gempyor_pkg/tests/seir/dev_new_test.py index fc724d598..820ad683f 100644 --- a/flepimop/gempyor_pkg/tests/seir/dev_new_test.py +++ b/flepimop/gempyor_pkg/tests/seir/dev_new_test.py @@ -37,7 +37,7 @@ # parameter_config=config["seir"]["parameters"]) p = inference_simulator.s.parameters - p_draw = p.parameters_quick_draw(n_days=inference_simulator.s.n_days, nnodes=inference_simulator.s.nnodes) + p_draw = p.parameters_quick_draw(n_days=inference_simulator.s.n_days, nsubpops=inference_simulator.s.nsubpops) p_df = p.getParameterDF(p_draw)["parameter"] diff --git a/flepimop/gempyor_pkg/tests/seir/interface.ipynb b/flepimop/gempyor_pkg/tests/seir/interface.ipynb index 1ecaf0a17..cde1ad0bd 100644 --- a/flepimop/gempyor_pkg/tests/seir/interface.ipynb +++ b/flepimop/gempyor_pkg/tests/seir/interface.ipynb @@ -269,11 +269,11 @@ " p_draw = gempyor_simulator.s.parameters.parameters_load(\n", " param_df=gempyor_simulator.s.read_simID(ftype=\"spar\", sim_id=sim_id2load),\n", " n_days=gempyor_simulator.s.n_days,\n", - " nnodes=gempyor_simulator.s.nnodes,\n", + " nsubpops=gempyor_simulator.s.nsubpops,\n", " )\n", " else:\n", " p_draw = gempyor_simulator.s.parameters.parameters_quick_draw(\n", - " n_days=gempyor_simulator.s.n_days, nnodes=gempyor_simulator.s.nnodes\n", + " n_days=gempyor_simulator.s.n_days, nsubpops=gempyor_simulator.s.nsubpops\n", " )\n", " # reduce them\n", " parameters = gempyor_simulator.s.parameters.parameters_reduce(p_draw, npi_seir)\n", diff --git a/flepimop/gempyor_pkg/tests/seir/test_compartments.py b/flepimop/gempyor_pkg/tests/seir/test_compartments.py index f8af0e046..d63abe38f 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_compartments.py +++ b/flepimop/gempyor_pkg/tests/seir/test_compartments.py @@ -69,7 +69,7 @@ def test_Setup_has_compartments_component(): setup_name="test_values", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) diff --git a/flepimop/gempyor_pkg/tests/seir/test_new_seir.py b/flepimop/gempyor_pkg/tests/seir/test_new_seir.py index 312a1d0c1..e847a974e 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_new_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_new_seir.py @@ -23,7 +23,7 @@ def test_constant_population(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -49,7 +49,7 @@ def test_constant_population(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - parameters = s.parameters.parameters_quick_draw(n_days=s.n_days, nnodes=s.nnodes) + parameters = s.parameters.parameters_quick_draw(n_days=s.n_days, nsubpops=s.nsubpops) parameter_names = [x for x in s.parameters.pnames] print("RUN_FUN_START") diff --git a/flepimop/gempyor_pkg/tests/seir/test_parameters.py b/flepimop/gempyor_pkg/tests/seir/test_parameters.py index c8de02fcb..0fbaf8bed 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_parameters.py +++ b/flepimop/gempyor_pkg/tests/seir/test_parameters.py @@ -27,7 +27,7 @@ def test_parameters_from_config_plus_read_write(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -61,7 +61,7 @@ def test_parameters_from_config_plus_read_write(): subpop_names=s.subpop_struct.subpop_names, ) n_days = 10 - nnodes = 5 + nsubpops = 5 p = parameters.Parameters( parameter_config=config["seir"]["parameters"], @@ -69,9 +69,9 @@ def test_parameters_from_config_plus_read_write(): tf=s.tf, subpop_names=s.subpop_struct.subpop_names, ) - p_draw = p.parameters_quick_draw(n_days=10, nnodes=5) + p_draw = p.parameters_quick_draw(n_days=10, nsubpops=5) # test shape - assert p_draw.shape == (len(config["seir"]["parameters"].keys()), n_days, nnodes) + assert p_draw.shape == (len(config["seir"]["parameters"].keys()), n_days, nsubpops) write_df(fname="test_pwrite.parquet", df=p.getParameterDF(p_draw=p_draw)) @@ -81,7 +81,7 @@ def test_parameters_from_config_plus_read_write(): tf=s.tf, subpop_names=s.subpop_struct.subpop_names, ) - p_load = rhs.parameters_load(param_df=read_df("test_pwrite.parquet"), n_days=n_days, nnodes=nnodes) + p_load = rhs.parameters_load(param_df=read_df("test_pwrite.parquet"), n_days=n_days, nsubpops=nsubpops) assert (p_draw == p_load).all() @@ -95,7 +95,7 @@ def test_parameters_quick_draw_old(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) index = 1 @@ -135,7 +135,7 @@ def test_parameters_quick_draw_old(): assert params.intervention_overlap_operation["sum"] == [] assert params.intervention_overlap_operation["prod"] == [pn.lower() for pn in params.pnames] - p_array = params.parameters_quick_draw(n_days=s.n_days, nnodes=s.nnodes) + p_array = params.parameters_quick_draw(n_days=s.n_days, nsubpops=s.nsubpops) print(p_array.shape) alpha = p_array[params.pnames2pindex["alpha"]] @@ -145,17 +145,17 @@ def test_parameters_quick_draw_old(): # susceptibility_reduction = p_array[parameters.pnames2pindex['']] # transmissibility_reduction = p_array[parameters.pnames2pindex['alpha']] - assert alpha.shape == (s.n_days, s.nnodes) + assert alpha.shape == (s.n_days, s.nsubpops) assert (alpha == 0.9).all() - assert R0s.shape == (s.n_days, s.nnodes) + assert R0s.shape == (s.n_days, s.nsubpops) assert len(np.unique(R0s)) == 1 assert ((2 <= R0s) & (R0s <= 3)).all() - assert sigma.shape == (s.n_days, s.nnodes) + assert sigma.shape == (s.n_days, s.nsubpops) assert (sigma == config["seir"]["parameters"]["sigma"]["value"]["value"].as_evaled_expression()).all() - assert gamma.shape == (s.n_days, s.nnodes) + assert gamma.shape == (s.n_days, s.nsubpops) assert len(np.unique(gamma)) == 1 @@ -167,7 +167,7 @@ def test_parameters_from_timeserie_file(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) index = 1 @@ -200,7 +200,7 @@ def test_parameters_from_timeserie_file(): subpop_names=s.subpop_struct.subpop_names, ) n_days = 10 - nnodes = 5 + nsubpops = 5 p = parameters.Parameters( parameter_config=config["seir"]["parameters"], @@ -208,9 +208,9 @@ def test_parameters_from_timeserie_file(): tf=s.tf, subpop_names=s.subpop_struct.subpop_names, ) - p_draw = p.parameters_quick_draw(n_days=10, nnodes=5) + p_draw = p.parameters_quick_draw(n_days=10, nsubpops=5) # test shape - assert p_draw.shape == (len(config["seir"]["parameters"].keys()), n_days, nnodes) + assert p_draw.shape == (len(config["seir"]["parameters"].keys()), n_days, nsubpops) write_df(fname="test_pwrite.parquet", df=p.getParameterDF(p_draw=p_draw)) @@ -220,6 +220,6 @@ def test_parameters_from_timeserie_file(): tf=s.tf, subpop_names=s.subpop_struct.subpop_names, ) - p_load = rhs.parameters_load(param_df=read_df("test_pwrite.parquet"), n_days=n_days, nnodes=nnodes) + p_load = rhs.parameters_load(param_df=read_df("test_pwrite.parquet"), n_days=n_days, nsubpops=nsubpops) assert (p_draw == p_load).all() diff --git a/flepimop/gempyor_pkg/tests/seir/test_seir.py b/flepimop/gempyor_pkg/tests/seir/test_seir.py index 92f23e967..9f189ef68 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_seir.py +++ b/flepimop/gempyor_pkg/tests/seir/test_seir.py @@ -24,7 +24,7 @@ def test_check_values(): setup_name="test_values", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -43,8 +43,7 @@ def test_check_values(): ) with warnings.catch_warnings(record=True) as w: - - seeding = np.zeros((s.n_days, s.nnodes)) + seeding = np.zeros((s.n_days, s.nsubpops)) if np.all(seeding == 0): warnings.warn("provided seeding has only value 0", UserWarning) @@ -77,7 +76,7 @@ def test_constant_population_legacy_integration(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -110,7 +109,7 @@ def test_constant_population_legacy_integration(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) + params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( @@ -132,11 +131,11 @@ def test_constant_population_legacy_integration(): seeding_amounts, ) - completepop = s.popnodes.sum() - origpop = s.popnodes + completepop = s.subpop_pop.sum() + origpop = s.subpop_pop for it in range(s.n_days): totalpop = 0 - for i in range(s.nnodes): + for i in range(s.nsubpops): totalpop += states[0].sum(axis=1)[it, i] assert states[0].sum(axis=1)[it, i] - 1e-3 < origpop[i] < states[0].sum(axis=1)[it, i] + 1e-3 assert completepop - 1e-3 < totalpop < completepop + 1e-3 @@ -153,7 +152,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -185,7 +184,7 @@ def test_steps_SEIR_nb_simple_spread_with_txt_matrices(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) + params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( @@ -238,7 +237,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.csv", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -271,7 +270,7 @@ def test_steps_SEIR_nb_simple_spread_with_csv_matrices(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) + params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( @@ -308,7 +307,7 @@ def test_steps_SEIR_no_spread(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -342,7 +341,7 @@ def test_steps_SEIR_no_spread(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) + params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( @@ -409,7 +408,7 @@ def test_continuation_resume(): setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ), nslots=nslots, @@ -459,7 +458,7 @@ def test_continuation_resume(): setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ), nslots=nslots, @@ -527,7 +526,7 @@ def test_inference_resume(): setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ), nslots=nslots, @@ -572,7 +571,7 @@ def test_inference_resume(): setup_name=config["setup_name"].get(), geodata_file=spatial_base_path / spatial_config["geodata"].get(), mobility_file=spatial_base_path / spatial_config["mobility"].get(), - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ), nslots=nslots, @@ -620,7 +619,7 @@ def test_parallel_compartments_with_vacc(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -653,7 +652,7 @@ def test_parallel_compartments_with_vacc(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) + params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( @@ -714,7 +713,7 @@ def test_parallel_compartments_no_vacc(): setup_name="test_seir", geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -748,7 +747,7 @@ def test_parallel_compartments_no_vacc(): npi = NPI.NPIBase.execute(npi_config=s.npi_config_seir, global_config=config, subpops=s.subpop_struct.subpop_names) - params = s.parameters.parameters_quick_draw(s.n_days, s.nnodes) + params = s.parameters.parameters_quick_draw(s.n_days, s.nsubpops) params = s.parameters.parameters_reduce(params, npi) ( diff --git a/flepimop/gempyor_pkg/tests/seir/test_setup.py b/flepimop/gempyor_pkg/tests/seir/test_setup.py index 9ca8d7404..594ca52f2 100644 --- a/flepimop/gempyor_pkg/tests/seir/test_setup.py +++ b/flepimop/gempyor_pkg/tests/seir/test_setup.py @@ -21,18 +21,18 @@ def test_SubpopulationStructure_success(self): setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) - def test_bad_popnodes_key_fail(self): - # Bad popnodes_key error - with pytest.raises(ValueError, match=r".*popnodes_key.*"): + def test_bad_subpop_pop_key_fail(self): + # Bad subpop_pop_key error + with pytest.raises(ValueError, match=r".*subpop_pop_key.*"): subpopulation_structure.SubpopulationStructure( setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility_small.txt", - popnodes_key="wrong", + subpop_pop_key="wrong", subpop_names_key="subpop", ) @@ -42,7 +42,7 @@ def test_bad_subpop_names_key_fail(self): setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="wrong", ) @@ -52,7 +52,7 @@ def test_mobility_dimensions_fail(self): setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility_small.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) @@ -62,6 +62,6 @@ def test_mobility_too_big_fail(self): setup_name=TEST_SETUP_NAME, geodata_file=f"{DATA_DIR}/geodata.csv", mobility_file=f"{DATA_DIR}/mobility_big.txt", - popnodes_key="population", + subpop_pop_key="population", subpop_names_key="subpop", ) From 35673762e821c37596e0af3da8ec4d927b3fc0da Mon Sep 17 00:00:00 2001 From: saraloo <45245630+saraloo@users.noreply.github.com> Date: Thu, 28 Sep 2023 23:03:28 -0400 Subject: [PATCH 8/8] notebook with model outputs for all run types --- postprocessing/model_output_notebook.Rmd | 704 +++++++++++++++++++++++ 1 file changed, 704 insertions(+) create mode 100644 postprocessing/model_output_notebook.Rmd diff --git a/postprocessing/model_output_notebook.Rmd b/postprocessing/model_output_notebook.Rmd new file mode 100644 index 000000000..4ed788dcb --- /dev/null +++ b/postprocessing/model_output_notebook.Rmd @@ -0,0 +1,704 @@ +--- +title: "Model Output plots" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + html_document: + toc: true + toc_depth: 2 + number_sections: TRUE + keep_tex: FALSE +params: + opt: !r option_list = list(optparse::make_option(c("-c", "--config"), action="store", default=Sys.getenv("CONFIG_PATH", Sys.getenv("CONFIG_PATH")), type='character', help="path to the config file"), optparse::make_option(c("-d", "--data_path"), action="store", default=Sys.getenv("DATA_PATH", Sys.getenv("DATA_PATH")), type='character', help="path to the data repo"), optparse::make_option(c("-u","--run-id"), action="store", dest = "run_id", type='character', help="Unique identifier for this run", default = Sys.getenv("FLEPI_RUN_INDEX",flepicommon::run_id())), optparse::make_option(c("-R", "--results-path"), action="store", dest = "results_path", type='character', help="Path for model output", default = Sys.getenv("FS_RESULTS_PATH", Sys.getenv("FS_RESULTS_PATH")))) +--- + +```{r setup, include=FALSE} +suppressMessages(library(parallel)) +suppressMessages(library(foreach)) +suppressMessages(library(inference)) +suppressMessages(library(tidyverse)) +suppressMessages(library(tidyr)) +suppressMessages(library(doParallel)) +suppressMessages(library(dplyr)) +suppressMessages(library(data.table)) +suppressMessages(library(ggplot2)) +suppressMessages(library(ggforce)) +suppressMessages(library(ggforce)) +suppressMessages(library(gridExtra)) + +parser=optparse::OptionParser(option_list=params$opt) +opt = optparse::parse_args(parser, convert_hyphens_to_underscores = TRUE) + +knitr::opts_chunk$set( + echo = FALSE, + message = FALSE, + warning = FALSE, + cache = TRUE, + cache.lazy = FALSE +) +# knitr::opts_knit$set(root.dir = opt$data_path) + +``` + +```{r data-setup} + + +# FUNCTIONS --------------------------------------------------------------- + +import_model_outputs <- + function(scn_dir, + outcome, + global_opt, + final_opt, + lim_hosp = c("date", + "incidH", + "incidC", + "incidD", + # lim_hosp = c("date", + # sapply(1:length(names(config$inference$statistics)), function(i) purrr::flatten(config$inference$statistics[i])$sim_var), + config$spatial_setup$nodenames)) { + # "subpop")){ + dir_ <- paste0( + scn_dir, + "/", + outcome, + "/", + config$name, + "/", + config$interventions$scenarios, + "/", + config$outcomes$scenarios + ) + subdir_ <- paste0(dir_, "/", list.files(dir_), + "/", + global_opt, + "/", + final_opt) + subdir_list <- list.files(subdir_) + + out_ <- NULL + total <- length(subdir_list) + + print(paste0("Importing ", outcome, " files (n = ", total, "):")) + + for (i in 1:length(subdir_list)) { + if (any(grepl("parquet", subdir_list))) { + dat <- + arrow::read_parquet(paste(subdir_, subdir_list[i], sep = "/")) + } + if (outcome == "hosp") { + dat <- + arrow::read_parquet(paste(subdir_, subdir_list[i], sep = "/")) %>% + select(all_of(lim_hosp)) + } + if (any(grepl("csv", subdir_list))) { + dat <- read.csv(paste(subdir_, subdir_list[i], sep = "/")) + } + if (final_opt == "final") { + dat$slot <- as.numeric(str_sub(subdir_list[i], start = 1, end = 9)) + } + if (final_opt == "intermediate") { + dat$slot <- as.numeric(str_sub(subdir_list[i], start = 1, end = 9)) + dat$block <- + as.numeric(str_sub(subdir_list[i], start = 11, end = 19)) + } + out_ <- rbind(out_, dat) + + } + return(out_) + } + +config <- flepicommon::load_config(opt$config) + +res_dir <- file.path(opt$results_path, config$model_output_dirname) +print(res_dir) + +``` + +```{r read-in-model-output, cache = TRUE} +# Pull in subpop data +geodata <- + setDT(read.csv(file.path( + config$data_path, config$spatial_setup$geodata + ))) +# geodata <- setDT(read.csv(file.path(config$data_path, config$subpop_setup$geodata))) + + +## gt_data MUST exist directly after a run (ONLY IF INFERENCE RUN) +if (!is.null(config$inference)) { + gt_data <- data.table::fread(config$inference$gt_data_path) %>% + .[, subpop := stringr::str_pad(FIPS, + width = 5, + side = "left", + pad = "0")] +} + +if (!is.null(config$inference)) { + inference <- TRUE +} else{ + inference <- FALSE +} + +theme_small <- + theme( + text = element_text(size = 8), + strip.background = element_blank(), + strip.placement = "outside" + ) + +``` +📸 + +Here is a snapshot of your model outputs for run ID `r opt$run_id`, from config `r opt$config`, stored in `r opt$results_path`. + +# Infection model: SEIR model output + +```{r seir, cache = TRUE, fig.dim = c(8, 20), results='hide',fig.keep='all'} +# read in model outputs +seir_outputs_global <- + setDT(import_model_outputs(res_dir, "seir", 'global', 'final')) + +# get different aggregation from list of config compartments? +## assuming there is always infection_stage, aggregate over this, incorporate aggregation of other variables later TO DO +## assume always interested in prevalence + +# if(inference){group_by_cols <- c("mc_infection_stage", "mc_value_type","slot","date")}else{group_by_cols <- c("mc_infection_stage","mc_value_type","date")} +group_by_cols <- + c("mc_infection_stage", "mc_value_type", "slot", "date") # I think if just one slot, gets read in as slot = 1? +subpop_cols <- + colnames(seir_outputs_global)[!str_detect(colnames(seir_outputs_global), "mc")] +subpop_cols <- + subpop_cols[which(!subpop_cols %in% c("date", "slot"))] + +tmp_seir <- seir_outputs_global %>% + .[, lapply(.SD, sum, na.rm = TRUE), by = group_by_cols, .SDcols = subpop_cols] + +# plot an example simulation +print( + tmp_seir %>% .[mc_value_type == "prevalence" & + slot == sample(unique(tmp_seir$slot), 1)] %>% + data.table::melt(., measure.vars = subpop_cols) %>% + ggplot() + + geom_line(aes(date, value, colour = mc_infection_stage)) + + facet_wrap( + ~ variable, + scales = 'free', + ncol = 4, + strip.position = "right" + ) + + theme_classic() + + theme(legend.position = "bottom") + + theme_small +) +``` + + +# Infection model: SNPI model output + +```{r snpi, cache = TRUE, fig.dim = c(8,20), results='hide',fig.keep='all'} +# read in model outputs +snpi_outputs_global <- setDT(import_model_outputs(res_dir, "snpi", 'global', 'final')) +node_names <- unique(snpi_outputs_global %>% .[ , get(config$spatial_setup$nodenames)]) +# node_names <- unique(sort(snpi_outputs_global %>% .[ , "subpop"])) +node_names <- c(node_names[str_detect(node_names,",")], node_names[!str_detect(node_names,",")]) # sort so that multiple subpops are in front + +if(inference){ + llik <- setDT(import_model_outputs(res_dir, "llik", 'global', 'final')) + # snpi_outputs_global <- snpi_outputs_global %>% + # .[llik, on = c("geoid", "slot")] +} + +snpi_plots <- lapply(node_names, + function(i){ + if(!grepl(',', i)){ + snpi_outputs_global %>% + {if(inference) + .[llik, on = c("geoid", "slot")] } %>% + .[geoid == i] %>% + ggplot(aes(npi_name,reduction)) + + geom_violin() + + {if(inference) + geom_jitter(aes(group = npi_name, color = ll), size = 0.5, height = 0, width = 0.2, alpha = 0.5) + } + + {if(!inference) + geom_jitter(aes(group = npi_name), size = 0.5, height = 0, width = 0.2, alpha = 0.5) + } + + theme_bw(base_size = 10) + + theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 6), + text = element_text(size = 8)) + + guides(color = guide_legend(override.aes = list(size = 0.5)))+ + scale_color_viridis_c(option = "B", name = "log\nlikelihood") + + labs(x = "parameter", title = i) + theme_small + + }else{ + if(inference){ + nodes_ <- unlist(strsplit(i,",")) + ll_across_nodes <- + llik %>% + .[geoid %in% nodes_] %>% + .[, .(ll_sum = sum(ll)), by = .(slot)] + } + + snpi_outputs_global %>% + {if(inference) + .[ll_across_nodes, on = c("slot")]} %>% + .[geoid == i] %>% + ggplot(aes(npi_name,reduction)) + + geom_violin() + + {if(inference) + geom_jitter(aes(group = npi_name, color = ll_sum), size = 0.5, height = 0, width = 0.2, alpha = 0.5) + } + + {if(!inference) + geom_jitter(aes(group = npi_name), size = 0.5, height = 0, width = 0.2, alpha = 0.5) + } + + theme_bw(base_size = 10) + + theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 6), + text = element_text(size = 8)) + + guides(color = guide_legend(override.aes = list(size = 0.5)))+ + scale_color_viridis_c(option = "B", name = "log\nlikelihood") + + labs(x = "parameter") + theme_small + } + } +) + +print(do.call("grid.arrange", c(snpi_plots, ncol=4))) + +``` + + +#Outcome model: HOSP model output + + + + + +## Daily hosp {.tabset} +### Single trajectories {.tabset} +```{r hosp_daily_single_slot, results='asis', cache = TRUE, fig.dim = c(8,8)} +## add something so that if it doesn't exist, it prints some 'no output' message + +# get all outcome variables +scns <- config$outcomes$scenarios +list_of_vars_config <- paste0("config$outcomes$settings$", scns) +outcomes <- eval(parse(text = list_of_vars_config)) +outcome_vars <- names(outcomes) + +# for simplicity, get aggregate outcome variables +outcome_vars_ <- outcome_vars[!str_detect(outcome_vars, "_")] + +# read in model outputs +hosp_outputs_global <- setDT(import_model_outputs(res_dir, "hosp", 'global', 'final', + lim_hosp = c("date", config$spatial_setup$nodenames, outcome_vars_))) +# lim_hosp = c("date", "subpop", outcome_vars_))) +# num_nodes <- length(unique(hosp_outputs_global %>% .[,"subpop"])) +num_nodes <- length(unique(hosp_outputs_global %>% .[,get(config$spatial_setup$nodenames)])) + +sim_sample <- sample(unique(hosp_outputs_global$slot),1) + + +cat("\n\n") + +## plot ONE sample trajectory for sanity check (can modify) +for(i in 1:length(outcome_vars_)){ + + cat(paste0("#### ",outcome_vars_[i]," \n")) + + ## Incident + print( + hosp_outputs_global %>% + .[, date := lubridate::as_date(date)] %>% + .[, .(date, geoid, outcome = get(outcome_vars_[i]), slot)] %>% + .[slot == sim_sample] %>% + data.table::melt(., id.vars = c("date", "slot", "geoid")) %>% + # data.table::melt(., id.vars = c("date", "slot", "subpop")) %>% + ggplot() + + geom_line(aes(x = date, y = value)) + + # if inference, plot gt along side + {if(inference & outcome_vars_[i] %in% colnames(gt_data)) + if(any(!is.na(gt_data %>% .[, get(outcome_vars_[i])]))) + geom_point(data = gt_data %>% .[, .(date, geoid = subpop, value = get(outcome_vars_[i]))], + aes(lubridate::as_date(date), value), color = 'firebrick', alpha = 0.1) + } + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = outcome_vars_[i], title = "Incidence") + + theme_classic() + theme_small + ) + + ## Cumulative + print( + hosp_outputs_global %>% + .[, date := lubridate::as_date(date)] %>% + .[, .(date, geoid, outcome = get(outcome_vars_[i]), slot)] %>% + .[slot == sim_sample] %>% + data.table::melt(., id.vars = c("date", "slot", "geoid")) %>% + # dplyr::arrange(geoid, slot, date) %>% + .[, csum := cumsum(value), by = .(slot, geoid, variable)] %>% + ggplot() + + geom_line(aes(x = date, y = csum)) + + {if(inference & outcome_vars_[i] %in% colnames(gt_data)) + geom_point(data = gt_data %>% .[, .(date, geoid = subpop, value = get(outcome_vars_[i]))] %>% + .[, csum := cumsum(value) , by = .(geoid)], + aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) + } + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = paste0("cumulative ", outcome_vars_[i]), title = "Cumulative") + + theme_classic() + theme_small + ) + + + cat("\n\n") + +} + +``` + +### Quantiles {.tabset} +```{r hosp_daily_quantiles, results='asis', cache = TRUE, fig.dim = c(8,8)} +# ```{r hosp_daily_quantiles, fig.dim = c(8,8), results='hide',fig.keep='all'} + +if(length(unique(hosp_outputs_global$slot)) > 1){ + + cat("\n\n") + + ## plot quantiles (if more than one slot) + for(i in 1:length(outcome_vars_)){ + + cat(paste0("#### ",outcome_vars_[i]," \n")) + ## plot quantiles (if more than one slot) + # for(i in 1:length(outcome_vars_)){ + + # incident + print( + hosp_outputs_global %>% + .[, date := lubridate::as_date(date)] %>% + # .[, as.list(quantile(get(outcome_vars_[i]), c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "subpop")] %>% + .[, as.list(quantile(get(outcome_vars_[i]), c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$spatial_setup$nodenames)] %>% + setnames(., paste0("V", 1:5), paste0("q", c(.05,.25,.5,.75,.95))) %>% + ggplot() + + geom_ribbon(aes(x = date, ymin = q0.05, ymax = q0.95), alpha = 0.1) + + geom_ribbon(aes(x = date, ymin = q0.25, ymax = q0.75), alpha = 0.1) + + geom_line(aes(x = date, y = q0.5)) + + # if inference, plot gt along side + {if(inference & outcome_vars_[i] %in% colnames(gt_data)) + if(any(!is.na(gt_data %>% .[, get(outcome_vars_[i])]))) + geom_point(data = gt_data %>% .[, .(date, geoid = subpop, value = get(outcome_vars_[i]))], + aes(lubridate::as_date(date), value), color = 'firebrick', alpha = 0.1) + } + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = outcome_vars_[i], title = "Incidence") + + theme_classic()+ theme_small + ) + + # cumulative + print( + hosp_outputs_global %>% + .[, date := lubridate::as_date(date)] %>% + .[, .(date, geoid, outcome = get(outcome_vars_[i]), slot)] %>% + data.table::melt(., id.vars = c("date", "slot", "geoid")) %>% + # dplyr::arrange(geoid, slot, date) %>% + .[, csum := cumsum(value), by = .(slot, geoid, variable)] %>% + .[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "geoid")] %>% + setnames(., paste0("V", 1:5), paste0("q", c(.05,.25,.5,.75,.95))) %>% + ggplot() + + geom_ribbon(aes(x = date, ymin = q0.05, ymax = q0.95), alpha = 0.1) + + geom_ribbon(aes(x = date, ymin = q0.25, ymax = q0.75), alpha = 0.1) + + geom_line(aes(x = date, y = q0.5)) + + {if(inference & outcome_vars_[i] %in% colnames(gt_data)) + geom_point(data = gt_data %>% .[, .(date, geoid = subpop, value = get(outcome_vars_[i]))] %>% + .[, csum := cumsum(value) , by = .(geoid)], + aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) + } + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = paste0("cumulative ", outcome_vars_[i]), title = "Cumulative") + + theme_classic() + theme_small + ) + + } + cat("\n\n") + +} + +``` + + + + +# Outcome model: HNPI model output + +```{r hnpi, cache = TRUE, fig.dim = c(8,20), results='hide',fig.keep='all'} +# read in model outputs +hnpi_outputs_global <- setDT(import_model_outputs(res_dir, "hnpi", 'global', 'final')) +node_names <- unique(hnpi_outputs_global %>% .[ , get(config$spatial_setup$nodenames)]) +# node_names <- unique(sort(hnpi_outputs_global %>% .[ , "subpop"])) +node_names <- c(node_names[str_detect(node_names,",")], node_names[!str_detect(node_names,",")]) # sort so that multiple subpops are in front + +if(inference){ + llik <- setDT(import_model_outputs(res_dir, "llik", 'global', 'final')) +} + + +hnpi_plots <- lapply(node_names, + function(i){ + hnpi_outputs_global %>% + .[llik, on = c("geoid", "slot")] %>% + .[geoid == i] %>% + ggplot(aes(npi_name,reduction)) + + geom_violin() + + {if(inference) + geom_jitter(aes(group = npi_name, colour = ll), size = 0.6, height = 0, width = 0.2, alpha = 1) + } + + {if(!inference) + geom_jitter(aes(group = npi_name), size = 0.6, height = 0, width = 0.2, alpha = 1) + } + + facet_wrap(~geoid, scales = 'free') + + guides(color = guide_legend(override.aes = list(size = 0.5))) + + scale_color_viridis_c(option = "B", name = "log\nlikelihood") + + theme_classic()+ theme_small + } +) +print(do.call("grid.arrange", c(hnpi_plots, ncol=4))) + +``` + +# Inference: analyses +## Likelihood +```{r llik_acceptances} + +``` + + +## Inference specific outcomes: aggregated {.tabset} +### Single trajectories (aggregated by fitting) {.tabset} +```{r hosp_trajectories_inference_aggregate, fig.dim = c(8,20), results='hide',fig.keep='all'} +if(inference){ + # get all outcome variables + scns <- config$outcomes$scenarios + list_of_vars_config <- paste0("config$outcomes$settings$", scns) + outcomes <- eval(parse(text = list_of_vars_config)) + outcome_vars <- names(outcomes) + fit_stats <- names(config$inference$statistics) + # stat_list <- config$inference$statistics + + cat("\n\n") + for(i in 1:length(fit_stats)){ + + cat(paste0("#### ",fit_stats[i]," \n")) + + statistics <- purrr::flatten(config$inference$statistics[i]) + cols_sim <- c("date", statistics$sim_var, "geoid","slot") + cols_data <- c("date", "subpop", statistics$data_var) + + # aggregate based on what is in the config + df_data <- lapply(node_names, function(y) { + lapply(unique(hosp_outputs_global$slot), function(x) + purrr::flatten_df(inference::getStats( + hosp_outputs_global %>% .[geoid == y & slot == x], + "date", + "sim_var", + stat_list = config$inference$statistics, + start_date = config$start_date_groundtruth, + end_date = config$end_date_groundtruth + )) %>% dplyr::mutate(geoid = y, slot = x)) %>% dplyr::bind_rows() + }) %>% dplyr::bind_rows() + + df_gt <- lapply(node_names, function(x) purrr::flatten_df( + inference::getStats( + gt_data %>% .[subpop == x], + "date", + "data_var", + stat_list = config$inference$statistics, + start_date = config$start_date_groundtruth, + end_date = config$end_date_groundtruth + )) %>% dplyr::mutate(geoid = x)) %>% dplyr::bind_rows() + + # + # df_data <- lapply(node_names, function(x) purrr::flatten_df( + # inference::getStats( + # hosp_outputs_global %>% .[geoid == x], + # "date", + # "data_var", + # stat_list = config$inference$statistics, + # start_date = config$start_date_groundtruth, + # end_date = config$end_date_groundtruth + # )) %>% dplyr::mutate(geoid = x)) %>% dplyr::bind_rows() + + ## Incident + print( + df_data %>% + setDT() %>% + .[, date := lubridate::as_date(date)] %>% + .[, .(date, geoid, sim_var, slot)] %>% + .[slot == sim_sample] %>% + data.table::melt(., id.vars = c("date", "slot", "geoid")) %>% + # data.table::melt(., id.vars = c("date", "slot", "subpop")) %>% + ggplot() + + geom_line(aes(x = date, y = value)) + + # if inference, plot gt along side + geom_point(data = df_gt, + aes(lubridate::as_date(date), data_var), color = 'firebrick', alpha = 0.1) + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = statistics$name, title = "Incidence") + + theme_classic() + theme_small + ) + + ## Cumulative + print( + df_data %>% + setDT() %>% + .[, date := lubridate::as_date(date)] %>% + .[, .(date, geoid, sim_var, slot)] %>% + .[slot == sim_sample] %>% + data.table::melt(., id.vars = c("date", "slot", "geoid")) %>% + # dplyr::arrange(geoid, slot, date) %>% + .[, csum := cumsum(value), by = .(slot, geoid, variable)] %>% + ggplot() + + geom_line(aes(x = date, y = csum)) + + geom_point(data = df_gt %>% setDT() %>% + .[, csum := cumsum(data_var) , by = .(geoid)], + aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = paste0("cumulative ", statistics$name), title = "Cumulative") + + theme_classic() + theme_small + ) + + } + cat("\n\n") + +} + +``` + +### Quantiles(aggregated by fitting) {.tabset} +```{r hosp_aggregate_quantiles, fig.dim = c(8,20), results='hide',fig.keep='all'} + +if(length(unique(hosp_outputs_global$slot)) > 1 & inference){ + + cat("\n\n") + for(i in 1:length(fit_stats)){ + + cat(paste0("#### ",fit_stats[i]," \n")) + statistics <- purrr::flatten(config$inference$statistics[i]) + + # Incident + print( + df_data %>% + setDT() %>% + .[, date := lubridate::as_date(date)] %>% + .[, as.list(quantile(sim_var, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "geoid")] %>% + setnames(., paste0("V", 1:5), paste0("q", c(.05,.25,.5,.75,.95))) %>% + ggplot() + + geom_ribbon(aes(x = date, ymin = q0.05, ymax = q0.95), alpha = 0.1) + + geom_ribbon(aes(x = date, ymin = q0.25, ymax = q0.75), alpha = 0.1) + + geom_line(aes(x = date, y = q0.5)) + + # if inference, plot gt along side + geom_point(data = df_gt, + aes(lubridate::as_date(date), data_var), color = 'firebrick', alpha = 0.1) + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = statistics$name) + + theme_classic() + theme_small + ) + + ## Cumulative + print( + df_data %>% + setDT() %>% + .[, date := lubridate::as_date(date)] %>% + .[, .(date, geoid, sim_var, slot)] %>% + data.table::melt(., id.vars = c("date", "slot", "geoid")) %>% + # dplyr::arrange(geoid, slot, date) %>% + .[, csum := cumsum(value), by = .(slot, geoid, variable)] %>% + .[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$subpop_setup$subpop)] %>% + setnames(., paste0("V", 1:5), paste0("q", c(.05,.25,.5,.75,.95))) %>% + ggplot() + + geom_ribbon(aes(x = date, ymin = q0.05, ymax = q0.95), alpha = 0.1) + + geom_ribbon(aes(x = date, ymin = q0.25, ymax = q0.75), alpha = 0.1) + + geom_line(aes(x = date, y = q0.5)) + + geom_point(data = df_gt %>% setDT() %>% + .[, csum := cumsum(data_var) , by = .(geoid)], + aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) + + # facet_wrap(~subpop, scales = 'free') + + facet_wrap(~get(config$spatial_setup$nodenames), scales = 'free') + + labs(x = 'date', y = paste0("cumulative ", statistics$name)) + + theme_classic() + theme_small + ) + + } + cat("\n\n") + +} + +``` + + +## Hosp by likelihood + +Trajectories of the 5 and bottom 5 log likelihoods for each subpopulation. + +```{r hosp_trajectories_by_likelihood, fig.dim = c(8,20), results='hide',fig.keep='all'} + +if(inference){ + + for(i in 1:length(fit_stats)){ + statistics <- purrr::flatten(config$inference$statistics[i]) + cols_sim <- c("date", statistics$sim_var, config$subpop_setup$subpop,"slot") + cols_data <- c("date", config$subpop_setup$subpop, statistics$data_var) + if(exists("llik")){ + llik_rank <- llik %>% + .[, .SD[order(ll)], geoid] + high_low_llik <- rbindlist(list(data.table(llik_rank, key = eval(config$spatial_setup$nodenames)) %>% + .[, head(.SD,5), by = eval(config$spatial_setup$nodenames)] %>% + .[, llik_bin := "top"], + data.table(llik_rank, key = eval(config$spatial_setup$nodenames)) %>% + .[, tail(.SD,5), by = eval(config$spatial_setup$nodenames)]%>% + .[, llik_bin := "bottom"]) + ) + + high_low_hosp_llik <- hosp_outputs_global %>% + .[high_low_llik, on = c("slot", eval(config$spatial_setup$nodenames))] + + hosp_llik_plots <- lapply(unique(high_low_hosp_llik %>% .[, geoid]), + function(e){ + high_low_hosp_llik %>% + .[, date := lubridate::as_date(date)] %>% + # { if(config$subpop_setup$subpop == 'subpop'){ + # .[geodata %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")], on = .(subpop)]} + # } %>% + .[geoid == e] %>% + # { if(config$subpop_setup$subpop == 'subpop'){ .[, subpop := USPS]} + # } %>% + ggplot() + + geom_line(aes(lubridate::as_date(date), get(statistics$data_var), + group = slot, color = ll, linetype = llik_bin)) + + scale_linetype_manual(values = c(1, 2), name = "likelihood\nbin") + + scale_color_viridis_c(option = "D", name = "log\nlikelihood") + + {if(inference & outcome_vars_[i] %in% colnames(gt_data)) + geom_point(data = gt_data %>% .[, .(date, geoid = subpop, value = get(outcome_vars_[i]))], + aes(lubridate::as_date(date), value), color = 'firebrick', alpha = 0.1) + } + + facet_wrap(~geoid, scales = 'free') + + guides(color = guide_legend(override.aes = list(size = 0.5)), + linetype = 'none') + + labs(x = 'date', y = fit_stats[i]) + #, title = paste0("top 5, bottom 5 lliks, ", statistics$sim_var)) + + theme_classic() + theme_small + } + ) + + print(do.call("grid.arrange", c(hosp_llik_plots, ncol=4))) + + } + } +} + + +``` + + + + +