fix merge error

Merge branch 'breaking-improvments' of https://github.com/HopkinsIDD/flepiMoP into breaking-improvments # Conflicts: # flepimop/R_packages/config.writer/R/yaml_utils.R
HopkinsIDD · Sep 29, 2023 · 927b4c0 · 927b4c0
2 parents 8ea82a7 + 3567376
commit 927b4c0
Show file tree

Hide file tree

Showing 73 changed files with 1,134 additions and 393 deletions.
diff --git a/batch/inference_job_launcher.py b/batch/inference_job_launcher.py
@@ -425,7 +425,7 @@ def autodetect_params(config, data_path, *, num_jobs=None, sims_per_job=None, nu
             print(f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]")
             print(f"Setting sims per job to {sims_per_job} [via {iterations_per_slot} iterations_per_slot in config]")
         else:
-            geodata_fname = pathlib.Path(data_path, config["data_path"]) / config["spatial_setup"]["geodata"]
+            geodata_fname = pathlib.Path(data_path, config["data_path"]) / config["subpop_setup"]["geodata"]
             with open(geodata_fname) as geodata_fp:
                 num_subpops = sum(1 for line in geodata_fp)
 

diff --git a/datasetup/build_US_setup.R b/datasetup/build_US_setup.R
@@ -8,7 +8,7 @@
 #
 # ```yaml
 # data_path: <path to directory>
-# spatial_setup:
+# subpop_setup:
 #   modeled_states: <list of state postal codes> e.g. MD, CA, NY
 #   mobility: <path to file relative to data_path> optional; default is 'mobility.csv'
 #   geodata: <path to file relative to data_path> optional; default is 'geodata.csv'
@@ -23,8 +23,8 @@
 #
 # ## Output Data
 #
-# * {data_path}/{spatial_setup::mobility}
-# * {data_path}/{spatial_setup::geodata}
+# * {data_path}/{subpop_setup::mobility}
+# * {data_path}/{subpop_setup::geodata}
 #
 
 ## @cond
@@ -52,11 +52,11 @@ if (length(config) == 0) {
 }
 
 outdir <- config$data_path
-filterUSPS <- config$spatial_setup$modeled_states
+filterUSPS <- config$subpop_setup$modeled_states
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 
 # Aggregation to state level if in config
-state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE)
+state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)
 
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 # commute_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","commute_data.gz.parquet"))
@@ -80,7 +80,7 @@ tidycensus::census_api_key(key = census_key)
 
 
 census_data <- tidycensus::get_acs(geography="county", state=filterUSPS,
-                                   variables="B01003_001", year=config$spatial_setup$census_year,
+                                   variables="B01003_001", year=config$subpop_setup$census_year,
                                    keep_geo_vars=TRUE, geometry=FALSE, show_call=TRUE)
 census_data <- census_data %>%
   dplyr::rename(population=estimate, subpop=GEOID) %>%
@@ -137,12 +137,12 @@ if (state_level){
 census_data <- census_data %>%
   dplyr::arrange(population)
 
-if (!is.null(config$spatial_setup$popnodes)) {
-  names(census_data)[names(census_data) == "population"] <- config$spatial_setup$popnodes
+if (!is.null(config$subpop_setup$popnodes)) {
+  names(census_data)[names(census_data) == "population"] <- config$subpop_setup$popnodes
 }
 
-if (length(config$spatial_setup$geodata) > 0) {
-  geodata_file <- config$spatial_setup$geodata
+if (length(config$subpop_setup$geodata) > 0) {
+  geodata_file <- config$subpop_setup$geodata
 } else {
   geodata_file <- 'geodata.csv'
 }
@@ -155,13 +155,13 @@ print(paste("Wrote geodata file:", file.path(outdir, geodata_file)))
 # MOBILITY DATA (COMMUTER DATA) ------------------------------------------------------------
 
 
-if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup$mobility))){
+if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$mobility))){
 
-  warning(paste("State-level mobility files must be created manually because `build_US_setup.R` does not generate a state-level mobility file automatically. No valid mobility file named", paste0(config$data_path, "/", config$spatial_setup$mobility), "(specified in the config) currently exists. Please check again."))
+  warning(paste("State-level mobility files must be created manually because `build_US_setup.R` does not generate a state-level mobility file automatically. No valid mobility file named", paste0(config$data_path, "/", config$subpop_setup$mobility), "(specified in the config) currently exists. Please check again."))
 
-} else if(state_level & file.exists(paste0(config$data_path, "/", config$spatial_setup$mobility))){
+} else if(state_level & file.exists(paste0(config$data_path, "/", config$subpop_setup$mobility))){
 
-  warning(paste("Using existing state-level mobility file named", paste0(config$data_path, "/", config$spatial_setup$mobility)))
+  warning(paste("Using existing state-level mobility file named", paste0(config$data_path, "/", config$subpop_setup$mobility)))
 
 } else{
 
@@ -176,8 +176,8 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup
 
   if(opt$w){
     mobility_file <- 'mobility.txt'
-  } else if (length(config$spatial_setup$mobility) > 0) {
-    mobility_file <- config$spatial_setup$mobility
+  } else if (length(config$subpop_setup$mobility) > 0) {
+    mobility_file <- config$subpop_setup$mobility
   } else {
     mobility_file <- 'mobility.csv'
   }
@@ -210,7 +210,7 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup
       write.csv(file = file.path(outdir, mobility_file), rc, row.names=FALSE)
 
     } else {
-      stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's spatial_setup::mobility.")
+      stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's subpop_setup::mobility.")
     }
 
     print(paste("Wrote mobility file:", file.path(outdir, mobility_file)))

diff --git a/datasetup/build_covid_data.R b/datasetup/build_covid_data.R
@@ -31,11 +31,11 @@ if (exists("config$inference$gt_source")) {
 }
 
 outdir <- config$data_path
-filterUSPS <- config$spatial_setup$modeled_states
+filterUSPS <- config$subpop_setup$modeled_states
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 
 # Aggregation to state level if in config
-state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE)
+state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)
 
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 
@@ -218,7 +218,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
 
     max(fluview_data$Update)
 
-    census_data <- read_csv(file = file.path(config$data_path, config$spatial_setup$geodata))
+    census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata))
     fluview_data <- fluview_data %>%
         dplyr::inner_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>%
         dplyr::select(Update, source, FIPS, incidD)
@@ -235,7 +235,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
     fluview_data <- make_daily_data(data = fluview_data, current_timescale = "week") #%>%
     # mutate(gt_source = "nchs")
     # fluview_data <- fluview_data %>%
-    # filter(source %in% config$spatial_setup$modeled_states)
+    # filter(source %in% config$subpop_setup$modeled_states)
     # Update >= config$start_date,
     # Update <= config$end_date_groundtruth)
     gt_data <- append(gt_data, list(fluview_data))
@@ -283,7 +283,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
 #
 #     max(fluview_data$Update)
 #
-#     census_data <- read_csv(file = file.path(config$data_path, config$spatial_setup$geodata))
+#     census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata))
 #     fluview_data <- fluview_data %>%
 #         left_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>%
 #         dplyr::select(Update, source, FIPS, incidD)
@@ -300,7 +300,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
 #     fluview_data <- make_daily_data(data = fluview_data, current_timescale = "week") #%>%
 #     # mutate(gt_source = "nchs")
 #     # fluview_data <- fluview_data %>%
-#     # filter(source %in% config$spatial_setup$modeled_states)
+#     # filter(source %in% config$subpop_setup$modeled_states)
 #     # Update >= config$start_date,
 #     # Update <= config$end_date_groundtruth)
 #     gt_data <- append(gt_data, list(fluview_data))
@@ -372,7 +372,7 @@ us_data <- us_data %>%
     filter(Update >= lubridate::as_date(config$start_date) & Update <= lubridate::as_date(end_date_))
 
 # Filter to states we care about
-locs <- config$spatial_setup$modeled_states
+locs <- config$subpop_setup$modeled_states
 us_data <- us_data %>%
     filter(source %in% locs) %>%
     filter(!is.na(source)) %>%

diff --git a/datasetup/build_flu_data.R b/datasetup/build_flu_data.R
@@ -32,11 +32,11 @@ if (length(config) == 0) {
 }
 
 outdir <- config$data_path
-filterUSPS <- config$spatial_setup$modeled_states
+filterUSPS <- config$subpop_setup$modeled_states
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 
 # Aggregation to state level if in config
-state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE)
+state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)
 
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 
@@ -59,7 +59,7 @@ source("https://raw.githubusercontent.com/cdcepi/Flusight-forecast-data/master/d
 
 # Pull daily hospitalizations for model run
 us_data <- load_flu_hosp_data(temporal_resolution = 'daily', na.rm = TRUE)
-locs <- read_csv(file.path(config$data_path, config$spatial_setup$geodata))
+locs <- read_csv(file.path(config$data_path, config$subpop_setup$geodata))
 
 # fix string pad issue on left side
 us_data <- us_data %>%

diff --git a/datasetup/build_nonUS_setup.R b/datasetup/build_nonUS_setup.R
@@ -8,7 +8,7 @@
 #
 # ```yaml
 # data_path: <path to directory>
-# spatial_setup:
+# subpop_setup:
 #   modeled_states: <list of country ISO3 codes> e.g. ZMB, BGD, CAN
 #   mobility: <path to file relative to data_path> optional; default is 'mobility.csv'
 #   geodata: <path to file relative to data_path> optional; default is 'geodata.csv'
@@ -19,8 +19,8 @@
 #
 # ## Output Data
 #
-# * {data_path}/{spatial_setup::mobility}
-# * {data_path}/{spatial_setup::geodata}
+# * {data_path}/{subpop_setup::mobility}
+# * {data_path}/{subpop_setup::geodata}
 #
 
 ## @cond
@@ -42,7 +42,7 @@ if (length(config) == 0) {
 }
 
 outdir <- config$data_path
-filterADMIN0 <- config$spatial_setup$modeled_states
+filterADMIN0 <- config$subpop_setup$modeled_states
 
 dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 

diff --git a/flepimop/R_packages/config.writer/R/yaml_utils.R b/flepimop/R_packages/config.writer/R/yaml_utils.R
@@ -89,10 +89,11 @@ collapse_intervention<- function(dat){
         dplyr::summarize(period = paste0(period, collapse="\n            "))
 
     if (exists("mtr$spatial_groups") && (!all(is.na(mtr$spatial_groups)) & !all(is.null(mtr$spatial_groups)))) {
+
         mtr <- mtr %>%
             dplyr::group_by(dplyr::across(-subpop)) %>%
             dplyr::summarize(subpop = paste0(subpop, collapse='", "'),
-                             spatial_groups = paste0(spatial_groups, collapse='", "')) %>%
+                             subpop_groups = paste0(subpop_groups, collapse='", "')) %>%
             dplyr::mutate(period = paste0("            ", period))
 
     } else {
@@ -103,7 +104,7 @@ collapse_intervention<- function(dat){
     }
 
     reduce <- dat %>%
-        dplyr::select(USPS, subpop, contains("spatial_groups"), start_date, end_date, name, template, type, category, parameter, baseline_scenario, starts_with("value_"), starts_with("pert_")) %>%
+        dplyr::select(USPS, subpop, contains("subpop_groups"), start_date, end_date, name, template, type, category, parameter, baseline_scenario, starts_with("value_"), starts_with("pert_")) %>%
         dplyr::filter(template %in% c("SinglePeriodModifier", "ModifierModifier")) %>%
         dplyr::mutate(end_date=paste0("period_end_date: ", end_date),
                       start_date=paste0("period_start_date: ", start_date)) %>%
@@ -149,9 +150,9 @@ yaml_mtr_template <- function(dat){
             "      groups:\n",
             '        - subpop: "all"\n'
         ))
-        if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){
+        if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){
             cat(paste0(
-                '          spatial_groups: "all"\n'))
+                '          subpop_groups: "all"\n'))
         }
 
         for(j in 1:nrow(dat)){
@@ -173,9 +174,9 @@ yaml_mtr_template <- function(dat){
             cat(paste0(
                 '        - subpop: ["', dat$subpop[j], '"]\n'))
 
-            if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){
+            if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){
                 cat(paste0(
-                    '          spatial_groups: ["', dat$spatial_groups[j], '"]\n'))
+                    '          subpop_groups: ["', dat$subpop_groups[j], '"]\n'))
             }
             cat(paste0(
                 '          periods:\n',
@@ -375,12 +376,12 @@ yaml_reduce_template<- function(dat){
         } else {
             paste0('      subpop: ["', dat$subpop, '"]\n')
         },
-        if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){
-            if(all(dat$spatial_groups == "all")){
-                '      spatial_groups: "all"\n'
+        if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){
+            if(all(dat$subpop_groups == "all")){
+                '      subpop_groups: "all"\n'
             } else {
-                paste0('      spatial_groups: \n',
-                       paste(sapply(X=dat$spatial_groups, function(x = X) paste0('        - ["', paste(x, collapse = '", "'), '"]\n')), collapse = ""))
+                paste0('      subpop_groups: \n',
+                       paste(sapply(X=dat$subpop_groups, function(x = X) paste0('        - ["', paste(x, collapse = '", "'), '"]\n')), collapse = ""))
             }
         },
         dat$period,
@@ -526,7 +527,7 @@ yaml_stack2 <- function (dat, scenario = "Inference", stack = TRUE){
 
 
 #' Print Header Section
-#' @description Prints the global options and the spatial setup section of the configuration files. These typically sit at the top of the configuration file.
+#' @description Prints the global options and the subpop setup section of the configuration files. These typically sit at the top of the configuration file.
 #'
 #' @param sim_name name of simulation, typically named after the region/location you are modeling
 #' @param setup_name # SMH, FCH
@@ -539,7 +540,7 @@ yaml_stack2 <- function (dat, scenario = "Inference", stack = TRUE){
 #' @param nslots number of simulations to run
 #' @param model_output_dirname
 #' @param start_date_groundtruth
-#' @param setup_name spatial folder name
+#' @param setup_name subpop folder name
 #'
 #' @return
 #' @export
@@ -581,7 +582,7 @@ print_header <- function (
 
 
 #' Print Header Section
-#' @description Prints the global options and the spatial setup section of the configuration files. These typically sit at the top of the configuration file.
+#' @description Prints the global options and the subpop setup section of the configuration files. These typically sit at the top of the configuration file.
 #'
 #' @param census_year integer(year)
 #' @param modeled_states vector of sub-populations (i.e., locations) that will be modeled. This can be different from the subpop IDs. For the US, state abbreviations are often used. This component is only used for filtering the data to the set of populations.
@@ -596,15 +597,15 @@ print_header <- function (
 #'
 #' @examples
 #'
-print_spatial_setup <- function (
+print_subpop_setup <- function (
         census_year = 2019,
         modeled_states = NULL,
         geodata_file = "geodata.csv",
         mobility_file = "mobility.csv",
         state_level = TRUE) {
 
     cat(
-        paste0("spatial_setup:\n",
+        paste0("subpop_setup:\n",
                "  census_year: ", census_year, "\n"),
         ifelse(!is.null(modeled_states),
                 paste0("  modeled_states:\n",

diff --git a/flepimop/R_packages/config.writer/tests/testthat/sample_config.yml b/flepimop/R_packages/config.writer/tests/testthat/sample_config.yml
@@ -6,7 +6,7 @@ data_path: data
 nslots: 300
 dt: 0.25
 
-spatial_setup:
+subpop_setup:
   census_year: 2019
   modeled_states:
     - AL