Skip to content

Commit

Permalink
fix merge error
Browse files Browse the repository at this point in the history
Merge branch 'breaking-improvments' of https://github.com/HopkinsIDD/flepiMoP into breaking-improvments

# Conflicts:
#	flepimop/R_packages/config.writer/R/yaml_utils.R
  • Loading branch information
shauntruelove committed Sep 29, 2023
2 parents 8ea82a7 + 3567376 commit 927b4c0
Show file tree
Hide file tree
Showing 73 changed files with 1,134 additions and 393 deletions.
2 changes: 1 addition & 1 deletion batch/inference_job_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def autodetect_params(config, data_path, *, num_jobs=None, sims_per_job=None, nu
print(f"Setting number of blocks to {num_blocks} [via num_blocks (-k) argument]")
print(f"Setting sims per job to {sims_per_job} [via {iterations_per_slot} iterations_per_slot in config]")
else:
geodata_fname = pathlib.Path(data_path, config["data_path"]) / config["spatial_setup"]["geodata"]
geodata_fname = pathlib.Path(data_path, config["data_path"]) / config["subpop_setup"]["geodata"]
with open(geodata_fname) as geodata_fp:
num_subpops = sum(1 for line in geodata_fp)

Expand Down
34 changes: 17 additions & 17 deletions datasetup/build_US_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# ```yaml
# data_path: <path to directory>
# spatial_setup:
# subpop_setup:
# modeled_states: <list of state postal codes> e.g. MD, CA, NY
# mobility: <path to file relative to data_path> optional; default is 'mobility.csv'
# geodata: <path to file relative to data_path> optional; default is 'geodata.csv'
Expand All @@ -23,8 +23,8 @@
#
# ## Output Data
#
# * {data_path}/{spatial_setup::mobility}
# * {data_path}/{spatial_setup::geodata}
# * {data_path}/{subpop_setup::mobility}
# * {data_path}/{subpop_setup::geodata}
#

## @cond
Expand Down Expand Up @@ -52,11 +52,11 @@ if (length(config) == 0) {
}

outdir <- config$data_path
filterUSPS <- config$spatial_setup$modeled_states
filterUSPS <- config$subpop_setup$modeled_states
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Aggregation to state level if in config
state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE)
state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
# commute_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","commute_data.gz.parquet"))
Expand All @@ -80,7 +80,7 @@ tidycensus::census_api_key(key = census_key)


census_data <- tidycensus::get_acs(geography="county", state=filterUSPS,
variables="B01003_001", year=config$spatial_setup$census_year,
variables="B01003_001", year=config$subpop_setup$census_year,
keep_geo_vars=TRUE, geometry=FALSE, show_call=TRUE)
census_data <- census_data %>%
dplyr::rename(population=estimate, subpop=GEOID) %>%
Expand Down Expand Up @@ -137,12 +137,12 @@ if (state_level){
census_data <- census_data %>%
dplyr::arrange(population)

if (!is.null(config$spatial_setup$popnodes)) {
names(census_data)[names(census_data) == "population"] <- config$spatial_setup$popnodes
if (!is.null(config$subpop_setup$popnodes)) {
names(census_data)[names(census_data) == "population"] <- config$subpop_setup$popnodes
}

if (length(config$spatial_setup$geodata) > 0) {
geodata_file <- config$spatial_setup$geodata
if (length(config$subpop_setup$geodata) > 0) {
geodata_file <- config$subpop_setup$geodata
} else {
geodata_file <- 'geodata.csv'
}
Expand All @@ -155,13 +155,13 @@ print(paste("Wrote geodata file:", file.path(outdir, geodata_file)))
# MOBILITY DATA (COMMUTER DATA) ------------------------------------------------------------


if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup$mobility))){
if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$mobility))){

warning(paste("State-level mobility files must be created manually because `build_US_setup.R` does not generate a state-level mobility file automatically. No valid mobility file named", paste0(config$data_path, "/", config$spatial_setup$mobility), "(specified in the config) currently exists. Please check again."))
warning(paste("State-level mobility files must be created manually because `build_US_setup.R` does not generate a state-level mobility file automatically. No valid mobility file named", paste0(config$data_path, "/", config$subpop_setup$mobility), "(specified in the config) currently exists. Please check again."))

} else if(state_level & file.exists(paste0(config$data_path, "/", config$spatial_setup$mobility))){
} else if(state_level & file.exists(paste0(config$data_path, "/", config$subpop_setup$mobility))){

warning(paste("Using existing state-level mobility file named", paste0(config$data_path, "/", config$spatial_setup$mobility)))
warning(paste("Using existing state-level mobility file named", paste0(config$data_path, "/", config$subpop_setup$mobility)))

} else{

Expand All @@ -176,8 +176,8 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup

if(opt$w){
mobility_file <- 'mobility.txt'
} else if (length(config$spatial_setup$mobility) > 0) {
mobility_file <- config$spatial_setup$mobility
} else if (length(config$subpop_setup$mobility) > 0) {
mobility_file <- config$subpop_setup$mobility
} else {
mobility_file <- 'mobility.csv'
}
Expand Down Expand Up @@ -210,7 +210,7 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$spatial_setup
write.csv(file = file.path(outdir, mobility_file), rc, row.names=FALSE)

} else {
stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's spatial_setup::mobility.")
stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's subpop_setup::mobility.")
}

print(paste("Wrote mobility file:", file.path(outdir, mobility_file)))
Expand Down
14 changes: 7 additions & 7 deletions datasetup/build_covid_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ if (exists("config$inference$gt_source")) {
}

outdir <- config$data_path
filterUSPS <- config$spatial_setup$modeled_states
filterUSPS <- config$subpop_setup$modeled_states
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Aggregation to state level if in config
state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE)
state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

Expand Down Expand Up @@ -218,7 +218,7 @@ if (any(grepl("fluview", opt$gt_data_source))){

max(fluview_data$Update)

census_data <- read_csv(file = file.path(config$data_path, config$spatial_setup$geodata))
census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata))
fluview_data <- fluview_data %>%
dplyr::inner_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>%
dplyr::select(Update, source, FIPS, incidD)
Expand All @@ -235,7 +235,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
fluview_data <- make_daily_data(data = fluview_data, current_timescale = "week") #%>%
# mutate(gt_source = "nchs")
# fluview_data <- fluview_data %>%
# filter(source %in% config$spatial_setup$modeled_states)
# filter(source %in% config$subpop_setup$modeled_states)
# Update >= config$start_date,
# Update <= config$end_date_groundtruth)
gt_data <- append(gt_data, list(fluview_data))
Expand Down Expand Up @@ -283,7 +283,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
#
# max(fluview_data$Update)
#
# census_data <- read_csv(file = file.path(config$data_path, config$spatial_setup$geodata))
# census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata))
# fluview_data <- fluview_data %>%
# left_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>%
# dplyr::select(Update, source, FIPS, incidD)
Expand All @@ -300,7 +300,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
# fluview_data <- make_daily_data(data = fluview_data, current_timescale = "week") #%>%
# # mutate(gt_source = "nchs")
# # fluview_data <- fluview_data %>%
# # filter(source %in% config$spatial_setup$modeled_states)
# # filter(source %in% config$subpop_setup$modeled_states)
# # Update >= config$start_date,
# # Update <= config$end_date_groundtruth)
# gt_data <- append(gt_data, list(fluview_data))
Expand Down Expand Up @@ -372,7 +372,7 @@ us_data <- us_data %>%
filter(Update >= lubridate::as_date(config$start_date) & Update <= lubridate::as_date(end_date_))

# Filter to states we care about
locs <- config$spatial_setup$modeled_states
locs <- config$subpop_setup$modeled_states
us_data <- us_data %>%
filter(source %in% locs) %>%
filter(!is.na(source)) %>%
Expand Down
6 changes: 3 additions & 3 deletions datasetup/build_flu_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ if (length(config) == 0) {
}

outdir <- config$data_path
filterUSPS <- config$spatial_setup$modeled_states
filterUSPS <- config$subpop_setup$modeled_states
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Aggregation to state level if in config
state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE)
state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

Expand All @@ -59,7 +59,7 @@ source("https://raw.githubusercontent.com/cdcepi/Flusight-forecast-data/master/d

# Pull daily hospitalizations for model run
us_data <- load_flu_hosp_data(temporal_resolution = 'daily', na.rm = TRUE)
locs <- read_csv(file.path(config$data_path, config$spatial_setup$geodata))
locs <- read_csv(file.path(config$data_path, config$subpop_setup$geodata))

# fix string pad issue on left side
us_data <- us_data %>%
Expand Down
8 changes: 4 additions & 4 deletions datasetup/build_nonUS_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# ```yaml
# data_path: <path to directory>
# spatial_setup:
# subpop_setup:
# modeled_states: <list of country ISO3 codes> e.g. ZMB, BGD, CAN
# mobility: <path to file relative to data_path> optional; default is 'mobility.csv'
# geodata: <path to file relative to data_path> optional; default is 'geodata.csv'
Expand All @@ -19,8 +19,8 @@
#
# ## Output Data
#
# * {data_path}/{spatial_setup::mobility}
# * {data_path}/{spatial_setup::geodata}
# * {data_path}/{subpop_setup::mobility}
# * {data_path}/{subpop_setup::geodata}
#

## @cond
Expand All @@ -42,7 +42,7 @@ if (length(config) == 0) {
}

outdir <- config$data_path
filterADMIN0 <- config$spatial_setup$modeled_states
filterADMIN0 <- config$subpop_setup$modeled_states

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

Expand Down
33 changes: 17 additions & 16 deletions flepimop/R_packages/config.writer/R/yaml_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ collapse_intervention<- function(dat){
dplyr::summarize(period = paste0(period, collapse="\n "))

if (exists("mtr$spatial_groups") && (!all(is.na(mtr$spatial_groups)) & !all(is.null(mtr$spatial_groups)))) {

mtr <- mtr %>%
dplyr::group_by(dplyr::across(-subpop)) %>%
dplyr::summarize(subpop = paste0(subpop, collapse='", "'),
spatial_groups = paste0(spatial_groups, collapse='", "')) %>%
subpop_groups = paste0(subpop_groups, collapse='", "')) %>%
dplyr::mutate(period = paste0(" ", period))

} else {
Expand All @@ -103,7 +104,7 @@ collapse_intervention<- function(dat){
}

reduce <- dat %>%
dplyr::select(USPS, subpop, contains("spatial_groups"), start_date, end_date, name, template, type, category, parameter, baseline_scenario, starts_with("value_"), starts_with("pert_")) %>%
dplyr::select(USPS, subpop, contains("subpop_groups"), start_date, end_date, name, template, type, category, parameter, baseline_scenario, starts_with("value_"), starts_with("pert_")) %>%
dplyr::filter(template %in% c("SinglePeriodModifier", "ModifierModifier")) %>%
dplyr::mutate(end_date=paste0("period_end_date: ", end_date),
start_date=paste0("period_start_date: ", start_date)) %>%
Expand Down Expand Up @@ -149,9 +150,9 @@ yaml_mtr_template <- function(dat){
" groups:\n",
' - subpop: "all"\n'
))
if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){
if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){
cat(paste0(
' spatial_groups: "all"\n'))
' subpop_groups: "all"\n'))
}

for(j in 1:nrow(dat)){
Expand All @@ -173,9 +174,9 @@ yaml_mtr_template <- function(dat){
cat(paste0(
' - subpop: ["', dat$subpop[j], '"]\n'))

if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){
if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){
cat(paste0(
' spatial_groups: ["', dat$spatial_groups[j], '"]\n'))
' subpop_groups: ["', dat$subpop_groups[j], '"]\n'))
}
cat(paste0(
' periods:\n',
Expand Down Expand Up @@ -375,12 +376,12 @@ yaml_reduce_template<- function(dat){
} else {
paste0(' subpop: ["', dat$subpop, '"]\n')
},
if(!all(is.na(dat$spatial_groups)) & !all(is.null(dat$spatial_groups))){
if(all(dat$spatial_groups == "all")){
' spatial_groups: "all"\n'
if(!all(is.na(dat$subpop_groups)) & !all(is.null(dat$subpop_groups))){
if(all(dat$subpop_groups == "all")){
' subpop_groups: "all"\n'
} else {
paste0(' spatial_groups: \n',
paste(sapply(X=dat$spatial_groups, function(x = X) paste0(' - ["', paste(x, collapse = '", "'), '"]\n')), collapse = ""))
paste0(' subpop_groups: \n',
paste(sapply(X=dat$subpop_groups, function(x = X) paste0(' - ["', paste(x, collapse = '", "'), '"]\n')), collapse = ""))
}
},
dat$period,
Expand Down Expand Up @@ -526,7 +527,7 @@ yaml_stack2 <- function (dat, scenario = "Inference", stack = TRUE){


#' Print Header Section
#' @description Prints the global options and the spatial setup section of the configuration files. These typically sit at the top of the configuration file.
#' @description Prints the global options and the subpop setup section of the configuration files. These typically sit at the top of the configuration file.
#'
#' @param sim_name name of simulation, typically named after the region/location you are modeling
#' @param setup_name # SMH, FCH
Expand All @@ -539,7 +540,7 @@ yaml_stack2 <- function (dat, scenario = "Inference", stack = TRUE){
#' @param nslots number of simulations to run
#' @param model_output_dirname
#' @param start_date_groundtruth
#' @param setup_name spatial folder name
#' @param setup_name subpop folder name
#'
#' @return
#' @export
Expand Down Expand Up @@ -581,7 +582,7 @@ print_header <- function (


#' Print Header Section
#' @description Prints the global options and the spatial setup section of the configuration files. These typically sit at the top of the configuration file.
#' @description Prints the global options and the subpop setup section of the configuration files. These typically sit at the top of the configuration file.
#'
#' @param census_year integer(year)
#' @param modeled_states vector of sub-populations (i.e., locations) that will be modeled. This can be different from the subpop IDs. For the US, state abbreviations are often used. This component is only used for filtering the data to the set of populations.
Expand All @@ -596,15 +597,15 @@ print_header <- function (
#'
#' @examples
#'
print_spatial_setup <- function (
print_subpop_setup <- function (
census_year = 2019,
modeled_states = NULL,
geodata_file = "geodata.csv",
mobility_file = "mobility.csv",
state_level = TRUE) {

cat(
paste0("spatial_setup:\n",
paste0("subpop_setup:\n",
" census_year: ", census_year, "\n"),
ifelse(!is.null(modeled_states),
paste0(" modeled_states:\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ data_path: data
nslots: 300
dt: 0.25

spatial_setup:
subpop_setup:
census_year: 2019
modeled_states:
- AL
Expand Down
Loading

0 comments on commit 927b4c0

Please sign in to comment.