diff --git a/datasetup/build_US_setup.R b/datasetup/build_US_setup.R index 9cde61b96..825e2ea05 100644 --- a/datasetup/build_US_setup.R +++ b/datasetup/build_US_setup.R @@ -51,13 +51,10 @@ if (length(config) == 0) { stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag") } -outdir <- config$data_path -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Aggregation to state level if in config state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # commute_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","commute_data.gz.parquet")) # census_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","census_tracts_2010.gz.parquet")) @@ -156,8 +153,8 @@ if (length(config$subpop_setup$geodata) > 0) { # manually remove PR census_data <- census_data %>% filter(USPS != "PR") -write.csv(file = file.path(outdir, geodata_file), census_data, row.names=FALSE) -print(paste("Wrote geodata file:", file.path(outdir, geodata_file))) +write.csv(file = file.path(geodata_file), census_data, row.names=FALSE) +print(paste("Wrote geodata file:", file.path(geodata_file))) @@ -209,7 +206,7 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$ print(census_data$subpop) stop("There was a problem generating the mobility matrix") } - write.table(file = file.path(outdir, mobility_file), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ") + write.table(file = file.path(mobility_file), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ") } else if(endsWith(mobility_file, '.csv')) { @@ -217,13 +214,13 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$ names(rc) <- c("ori","dest","amount") rc <- rc[rc$ori != rc$dest,] - write.csv(file = file.path(outdir, mobility_file), rc, row.names=FALSE) + write.csv(file = file.path(mobility_file), rc, row.names=FALSE) } else { stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's subpop_setup::mobility.") } - print(paste("Wrote mobility file:", file.path(outdir, mobility_file))) + print(paste("Wrote mobility file:", file.path(mobility_file))) } diff --git a/datasetup/build_covid_data.R b/datasetup/build_covid_data.R index b31d526c5..1d22eac18 100644 --- a/datasetup/build_covid_data.R +++ b/datasetup/build_covid_data.R @@ -30,17 +30,14 @@ if (exists("config$inference$gt_source")) { opt$gt_data_source <- config$inference$gt_source } -outdir <- config$data_path # filterUSPS <- config$subpop_setup$modeled_states filterUSPS <- c("WY","VT","DC","AK","ND","SD","DE","MT","RI","ME","NH","HI","ID","WV","NE","NM", "KS","NV","MS","AR","UT","IA","CT","OK","OR","KY","LA","AL","SC","MN","CO","WI", "MD","MO","IN","TN","MA","AZ","WA","VA","NJ","MI","NC","GA","OH","IL","PA","NY","FL","TX","CA") -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Aggregation to state level if in config state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # source data functions source(file.path(opt$path, "datasetup/data_setup_source.R")) @@ -221,7 +218,7 @@ if (any(grepl("fluview", opt$gt_data_source))){ max(fluview_data$Update) - census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata)) + census_data <- read_csv(file = file.path(config$subpop_setup$geodata)) fluview_data <- fluview_data %>% dplyr::inner_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>% dplyr::select(Update, source, FIPS, incidD) @@ -286,7 +283,7 @@ if (any(grepl("fluview", opt$gt_data_source))){ # # max(fluview_data$Update) # -# census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata)) +# census_data <- read_csv(file = file.path(config$subpop_setup$geodata)) # fluview_data <- fluview_data %>% # left_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>% # dplyr::select(Update, source, FIPS, incidD) diff --git a/datasetup/build_flu_data.R b/datasetup/build_flu_data.R index 3bda72103..0ba058da9 100644 --- a/datasetup/build_flu_data.R +++ b/datasetup/build_flu_data.R @@ -31,14 +31,11 @@ if (length(config) == 0) { stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag") } -outdir <- config$data_path filterUSPS <- config$subpop_setup$modeled_states -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Aggregation to state level if in config state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE) -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) @@ -59,7 +56,7 @@ source("https://raw.githubusercontent.com/cdcepi/Flusight-forecast-data/master/d # Pull daily hospitalizations for model run us_data <- load_flu_hosp_data(temporal_resolution = 'daily', na.rm = TRUE) -locs <- read_csv(file.path(config$data_path, config$subpop_setup$geodata)) +locs <- read_csv(file.path(config$subpop_setup$geodata)) # fix string pad issue on left side us_data <- us_data %>% @@ -100,7 +97,7 @@ adjust_for_variant <- !is.null(variant_props_file) # if (adjust_for_variant){ # # # Variant Data (need to automate this data pull still) -# #variant_data <- read_csv(file.path(config$data_path, "variant/WHO_NREVSS_Clinical_Labs.csv"), skip = 1) +# #variant_data <- read_csv(file.path("variant/WHO_NREVSS_Clinical_Labs.csv"), skip = 1) # variant_data <- cdcfluview::who_nrevss(region="state", years = 2022)$clinical_labs # # # location data diff --git a/datasetup/build_nonUS_setup.R b/datasetup/build_nonUS_setup.R index 4ba52c8b2..7af1ccf7b 100644 --- a/datasetup/build_nonUS_setup.R +++ b/datasetup/build_nonUS_setup.R @@ -41,16 +41,14 @@ if (length(config) == 0) { stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag") } -outdir <- config$data_path filterADMIN0 <- config$subpop_setup$modeled_states -dir.create(outdir, showWarnings = FALSE, recursive = TRUE) # Read in needed data -commute_data <- readr::read_csv(file.path(config$data_path, "geodata", opt$mobility)) %>% +commute_data <- readr::read_csv(file.path("geodata", opt$mobility)) %>% mutate(OGEOID = as.character(OGEOID), DGEOID = as.character(DGEOID)) -census_data <- readr::read_csv(file.path(config$data_path, "geodata", opt$population)) %>% +census_data <- readr::read_csv(file.path("geodata", opt$population)) %>% mutate(GEOID = as.character(GEOID)) # Filter if needed @@ -97,22 +95,22 @@ if(opt$w){ if(!isTRUE(all(rc$OGEOID == census_data$GEOID))){ stop("There was a problem generating the mobility matrix") } - write.table(file = file.path(outdir,'mobility.txt'), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ") + write.table(file = file.path('mobility.txt'), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ") } else { names(rc) <- c("ori","dest","amount") rc <- rc[rc$ori != rc$dest,] - write.csv(file = file.path(outdir,'mobility.csv'), rc, row.names=FALSE) + write.csv(file = file.path('mobility.csv'), rc, row.names=FALSE) } # Save population geodata names(census_data) <- c("subpop","admin2","admin0","pop") -write.csv(file = file.path(outdir,'geodata.csv'), census_data,row.names=FALSE) +write.csv(file = file.path('geodata.csv'), census_data,row.names=FALSE) print("Census Data Check (up to 6 rows)") print(head(census_data)) print("Commute Data Check (up to 6 rows)") print(head(commute_data)) -print(paste0("mobility.csv/.txt and geodata.csv saved to: ", outdir)) +#print(paste0("mobility.csv/.txt and geodata.csv saved to: ", outdir)) diff --git a/flepimop/R_packages/flepicommon/R/config_test_new.R b/flepimop/R_packages/flepicommon/R/config_test_new.R index 617c29be7..bee197a4e 100644 --- a/flepimop/R_packages/flepicommon/R/config_test_new.R +++ b/flepimop/R_packages/flepicommon/R/config_test_new.R @@ -122,7 +122,7 @@ validation_list$subpop_setup$geodata <- function(value, full_config,config_name) print("No geodata path mentioned in the configuration file") return(FALSE) }else{ - path=paste(full_config$data_path,'/',value,sep='') + path=paste(full_config$subpop_setup$geodata,'/',value,sep='') if (!file.exists(path)) { print(paste("The mentioned geodata file :", value, "could not be found.")) return(FALSE) @@ -136,7 +136,7 @@ validation_list$subpop_setup$mobility <- function(value, full_config,config_name print("No mobility path mentioned in the configuration file") return(FALSE) }else{ - path=paste(full_config$data_path,'/',value,sep='') + path=paste(full_config$subpop_setup$mobility,'/',value,sep='') if (!file.exists(path)) { print(paste("The mentioned mobility file :", value, "could not be found.")) return(FALSE) diff --git a/flepimop/main_scripts/create_seeding.R b/flepimop/main_scripts/create_seeding.R index 2f49309c0..6027924c2 100644 --- a/flepimop/main_scripts/create_seeding.R +++ b/flepimop/main_scripts/create_seeding.R @@ -302,7 +302,7 @@ all_times <- lubridate::ymd(config$start_date) + seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date)) geodata <- flepicommon::load_geodata_file( - file.path(config$data_path, config$subpop_setup$geodata), + file.path(config$subpop_setup$geodata), 5, "0", TRUE diff --git a/flepimop/main_scripts/create_seeding_added.R b/flepimop/main_scripts/create_seeding_added.R index d9ca6403a..0ae4b1462 100644 --- a/flepimop/main_scripts/create_seeding_added.R +++ b/flepimop/main_scripts/create_seeding_added.R @@ -264,7 +264,7 @@ all_times <- lubridate::ymd(config$start_date) + seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date)) geodata <- flepicommon::load_geodata_file( - file.path(config$data_path, config$subpop_setup$geodata), + file.path(config$subpop_setup$geodata), 5, "0", TRUE diff --git a/flepimop/main_scripts/inference_slot.R b/flepimop/main_scripts/inference_slot.R index 47cd218d4..fb5513d7b 100644 --- a/flepimop/main_scripts/inference_slot.R +++ b/flepimop/main_scripts/inference_slot.R @@ -141,7 +141,6 @@ state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop suppressMessages( geodata <- flepicommon::load_geodata_file( paste( - config$data_path, config$subpop_setup$geodata, sep = "/" ), subpop_len = ifelse(config$name == "USA", opt$subpop_len, 0), @@ -154,10 +153,6 @@ obs_subpop <- "subpop" ##Define data directory and create if it does not exist gt_data_path <- config$inference$gt_data_path -data_dir <- dirname(config$data_path) -if (!dir.exists(data_dir)){ - suppressWarnings(dir.create(data_dir, recursive = TRUE)) -} ## backwards compatibility with configs that don't have inference$gt_source parameter will use the previous default data source (USA Facts) if (is.null(config$inference$gt_source)){ @@ -684,6 +679,14 @@ for(seir_modifiers_scenario in seir_modifiers_scenarios) { gempyor_inference_runner$write_last_seir(sim_id2write=this_index) } + # delete previously accepted files if using a space saving option + if(!opt$save_seir){ + file.remove(last_accepted_global_files[['seir_filename']]) # remove proposed SEIR file + } + if(!opt$save_hosp){ + file.remove(last_accepted_global_files[['hosp_filename']]) # remove proposed HOSP file + } + # delete previously accepted files if using a space saving option if(!opt$save_seir){ file.remove(last_accepted_global_files[['seir_filename']]) # remove proposed SEIR file diff --git a/postprocessing/postprocess_snapshot.R b/postprocessing/postprocess_snapshot.R index 008366600..ff509844a 100644 --- a/postprocessing/postprocess_snapshot.R +++ b/postprocessing/postprocess_snapshot.R @@ -60,7 +60,7 @@ print(opt$select_outputs) config <- flepicommon::load_config(opt$config) # Pull in subpop data -geodata <- setDT(read.csv(file.path(config$data_path, config$subpop_setup$geodata))) %>% +geodata <- setDT(read.csv(file.path(config$subpop_setup$geodata))) %>% .[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")] subpops <- unique(geodata$subpop) diff --git a/postprocessing/run_sim_processing_FluSightExample.R b/postprocessing/run_sim_processing_FluSightExample.R index 84676ad25..67a5a44e3 100644 --- a/postprocessing/run_sim_processing_FluSightExample.R +++ b/postprocessing/run_sim_processing_FluSightExample.R @@ -101,7 +101,7 @@ scenario_s3_buckets <- scenario_s3_buckets[scenario_num] # automatically pull fr override_pull_from_s3 <- override_pull_from_s3[scenario_num] # !!!! VERY IMPORTANT - LEAVE FALSE UNLESS YOU ARE REWRITING THE CURRENT S3 DATA !!!! -geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata) +geodata_file_path = file.path(config$subpop_setup$geodata) # SUBMISSION & PROCESSING SPECIFICS ---------------------------------------------------- diff --git a/postprocessing/run_sim_processing_SLURM.R b/postprocessing/run_sim_processing_SLURM.R index 085cecc47..b67b14dd9 100644 --- a/postprocessing/run_sim_processing_SLURM.R +++ b/postprocessing/run_sim_processing_SLURM.R @@ -164,7 +164,7 @@ if(tolower(smh_or_fch) == "fch"){ } scenarios <- scenarios[scenario_num] -geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata) +geodata_file_path = file.path(config$subpop_setup$geodata) print(disease) diff --git a/postprocessing/run_sim_processing_TEMPLATE.R b/postprocessing/run_sim_processing_TEMPLATE.R index ba15dfc9b..4571b14c7 100644 --- a/postprocessing/run_sim_processing_TEMPLATE.R +++ b/postprocessing/run_sim_processing_TEMPLATE.R @@ -101,7 +101,7 @@ scenario_s3_buckets <- scenario_s3_buckets[scenario_num] # automatically pull fr override_pull_from_s3 <- override_pull_from_s3[scenario_num] # !!!! VERY IMPORTANT - LEAVE FALSE UNLESS YOU ARE REWRITING THE CURRENT S3 DATA !!!! -geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata) +geodata_file_path = file.path(config$subpop_setup$geodata)