Skip to content

Commit

Permalink
Removing config$data_path
Browse files Browse the repository at this point in the history
First attempt at removing instances of `config$data_path` from R files.
  • Loading branch information
emprzy committed Apr 24, 2024
1 parent c15fe83 commit ecb9cb9
Show file tree
Hide file tree
Showing 12 changed files with 23 additions and 39 deletions.
13 changes: 5 additions & 8 deletions datasetup/build_US_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,10 @@ if (length(config) == 0) {
stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag")
}

outdir <- config$data_path
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Aggregation to state level if in config
state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
# commute_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","commute_data.gz.parquet"))
# census_data <- arrow::read_parquet(file.path(opt$p,"datasetup", "usdata","united-states-commutes","census_tracts_2010.gz.parquet"))

Expand Down Expand Up @@ -156,8 +153,8 @@ if (length(config$subpop_setup$geodata) > 0) {
# manually remove PR
census_data <- census_data %>% filter(USPS != "PR")

write.csv(file = file.path(outdir, geodata_file), census_data, row.names=FALSE)
print(paste("Wrote geodata file:", file.path(outdir, geodata_file)))
write.csv(file = file.path(geodata_file), census_data, row.names=FALSE)
print(paste("Wrote geodata file:", file.path(geodata_file)))



Expand Down Expand Up @@ -209,21 +206,21 @@ if(state_level & !file.exists(paste0(config$data_path, "/", config$subpop_setup$
print(census_data$subpop)
stop("There was a problem generating the mobility matrix")
}
write.table(file = file.path(outdir, mobility_file), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ")
write.table(file = file.path(mobility_file), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ")

} else if(endsWith(mobility_file, '.csv')) {

rc <- commute_data
names(rc) <- c("ori","dest","amount")

rc <- rc[rc$ori != rc$dest,]
write.csv(file = file.path(outdir, mobility_file), rc, row.names=FALSE)
write.csv(file = file.path(mobility_file), rc, row.names=FALSE)

} else {
stop("Only .txt and .csv extensions supported for mobility matrix. Please check config's subpop_setup::mobility.")
}

print(paste("Wrote mobility file:", file.path(outdir, mobility_file)))
print(paste("Wrote mobility file:", file.path(mobility_file)))
}


Expand Down
7 changes: 2 additions & 5 deletions datasetup/build_covid_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,14 @@ if (exists("config$inference$gt_source")) {
opt$gt_data_source <- config$inference$gt_source
}

outdir <- config$data_path
# filterUSPS <- config$subpop_setup$modeled_states
filterUSPS <- c("WY","VT","DC","AK","ND","SD","DE","MT","RI","ME","NH","HI","ID","WV","NE","NM",
"KS","NV","MS","AR","UT","IA","CT","OK","OR","KY","LA","AL","SC","MN","CO","WI",
"MD","MO","IN","TN","MA","AZ","WA","VA","NJ","MI","NC","GA","OH","IL","PA","NY","FL","TX","CA")
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Aggregation to state level if in config
state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# source data functions
source(file.path(opt$path, "datasetup/data_setup_source.R"))
Expand Down Expand Up @@ -221,7 +218,7 @@ if (any(grepl("fluview", opt$gt_data_source))){

max(fluview_data$Update)

census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata))
census_data <- read_csv(file = file.path(config$subpop_setup$geodata))
fluview_data <- fluview_data %>%
dplyr::inner_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>%
dplyr::select(Update, source, FIPS, incidD)
Expand Down Expand Up @@ -286,7 +283,7 @@ if (any(grepl("fluview", opt$gt_data_source))){
#
# max(fluview_data$Update)
#
# census_data <- read_csv(file = file.path(config$data_path, config$subpop_setup$geodata))
# census_data <- read_csv(file = file.path(config$subpop_setup$geodata))
# fluview_data <- fluview_data %>%
# left_join(census_data %>% dplyr::select(source = USPS, FIPS = subpop)) %>%
# dplyr::select(Update, source, FIPS, incidD)
Expand Down
7 changes: 2 additions & 5 deletions datasetup/build_flu_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,11 @@ if (length(config) == 0) {
stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag")
}

outdir <- config$data_path
filterUSPS <- config$subpop_setup$modeled_states
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Aggregation to state level if in config
state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop_setup$state_level, TRUE, FALSE)

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)



Expand All @@ -59,7 +56,7 @@ source("https://raw.githubusercontent.com/cdcepi/Flusight-forecast-data/master/d

# Pull daily hospitalizations for model run
us_data <- load_flu_hosp_data(temporal_resolution = 'daily', na.rm = TRUE)
locs <- read_csv(file.path(config$data_path, config$subpop_setup$geodata))
locs <- read_csv(file.path(config$subpop_setup$geodata))

# fix string pad issue on left side
us_data <- us_data %>%
Expand Down Expand Up @@ -100,7 +97,7 @@ adjust_for_variant <- !is.null(variant_props_file)
# if (adjust_for_variant){
#
# # Variant Data (need to automate this data pull still)
# #variant_data <- read_csv(file.path(config$data_path, "variant/WHO_NREVSS_Clinical_Labs.csv"), skip = 1)
# #variant_data <- read_csv(file.path("variant/WHO_NREVSS_Clinical_Labs.csv"), skip = 1)
# variant_data <- cdcfluview::who_nrevss(region="state", years = 2022)$clinical_labs
#
# # location data
Expand Down
14 changes: 6 additions & 8 deletions datasetup/build_nonUS_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,14 @@ if (length(config) == 0) {
stop("no configuration found -- please set CONFIG_PATH environment variable or use the -c command flag")
}

outdir <- config$data_path
filterADMIN0 <- config$subpop_setup$modeled_states

dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

# Read in needed data
commute_data <- readr::read_csv(file.path(config$data_path, "geodata", opt$mobility)) %>%
commute_data <- readr::read_csv(file.path("geodata", opt$mobility)) %>%
mutate(OGEOID = as.character(OGEOID),
DGEOID = as.character(DGEOID))
census_data <- readr::read_csv(file.path(config$data_path, "geodata", opt$population)) %>%
census_data <- readr::read_csv(file.path("geodata", opt$population)) %>%
mutate(GEOID = as.character(GEOID))

# Filter if needed
Expand Down Expand Up @@ -97,22 +95,22 @@ if(opt$w){
if(!isTRUE(all(rc$OGEOID == census_data$GEOID))){
stop("There was a problem generating the mobility matrix")
}
write.table(file = file.path(outdir,'mobility.txt'), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ")
write.table(file = file.path('mobility.txt'), as.matrix(rc[,-1]), row.names=FALSE, col.names = FALSE, sep = " ")
} else {
names(rc) <- c("ori","dest","amount")
rc <- rc[rc$ori != rc$dest,]
write.csv(file = file.path(outdir,'mobility.csv'), rc, row.names=FALSE)
write.csv(file = file.path('mobility.csv'), rc, row.names=FALSE)
}

# Save population geodata
names(census_data) <- c("subpop","admin2","admin0","pop")
write.csv(file = file.path(outdir,'geodata.csv'), census_data,row.names=FALSE)
write.csv(file = file.path('geodata.csv'), census_data,row.names=FALSE)

print("Census Data Check (up to 6 rows)")
print(head(census_data))
print("Commute Data Check (up to 6 rows)")
print(head(commute_data))

print(paste0("mobility.csv/.txt and geodata.csv saved to: ", outdir))
#print(paste0("mobility.csv/.txt and geodata.csv saved to: ", outdir))


4 changes: 2 additions & 2 deletions flepimop/R_packages/flepicommon/R/config_test_new.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ validation_list$subpop_setup$geodata <- function(value, full_config,config_name)
print("No geodata path mentioned in the configuration file")
return(FALSE)
}else{
path=paste(full_config$data_path,'/',value,sep='')
path=paste(full_config$subpop_setup$geodata,'/',value,sep='')
if (!file.exists(path)) {
print(paste("The mentioned geodata file :", value, "could not be found."))
return(FALSE)
Expand All @@ -136,7 +136,7 @@ validation_list$subpop_setup$mobility <- function(value, full_config,config_name
print("No mobility path mentioned in the configuration file")
return(FALSE)
}else{
path=paste(full_config$data_path,'/',value,sep='')
path=paste(full_config$subpop_setup$mobility,'/',value,sep='')
if (!file.exists(path)) {
print(paste("The mentioned mobility file :", value, "could not be found."))
return(FALSE)
Expand Down
2 changes: 1 addition & 1 deletion flepimop/main_scripts/create_seeding.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ all_times <- lubridate::ymd(config$start_date) +
seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date))

geodata <- flepicommon::load_geodata_file(
file.path(config$data_path, config$subpop_setup$geodata),
file.path(config$subpop_setup$geodata),
5,
"0",
TRUE
Expand Down
2 changes: 1 addition & 1 deletion flepimop/main_scripts/create_seeding_added.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ all_times <- lubridate::ymd(config$start_date) +
seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date))

geodata <- flepicommon::load_geodata_file(
file.path(config$data_path, config$subpop_setup$geodata),
file.path(config$subpop_setup$geodata),
5,
"0",
TRUE
Expand Down
5 changes: 0 additions & 5 deletions flepimop/main_scripts/inference_slot.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ state_level <- ifelse(!is.null(config$subpop_setup$state_level) && config$subpop
suppressMessages(
geodata <- flepicommon::load_geodata_file(
paste(
config$data_path,
config$subpop_setup$geodata, sep = "/"
),
subpop_len = ifelse(config$name == "USA", opt$subpop_len, 0),
Expand All @@ -152,10 +151,6 @@ obs_subpop <- "subpop"

##Define data directory and create if it does not exist
gt_data_path <- config$inference$gt_data_path
data_dir <- dirname(config$data_path)
if (!dir.exists(data_dir)){
suppressWarnings(dir.create(data_dir, recursive = TRUE))
}

## backwards compatibility with configs that don't have inference$gt_source parameter will use the previous default data source (USA Facts)
if (is.null(config$inference$gt_source)){
Expand Down
2 changes: 1 addition & 1 deletion postprocessing/postprocess_snapshot.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ print(opt$select_outputs)
config <- flepicommon::load_config(opt$config)

# Pull in subpop data
geodata <- setDT(read.csv(file.path(config$data_path, config$subpop_setup$geodata))) %>%
geodata <- setDT(read.csv(file.path(config$subpop_setup$geodata))) %>%
.[, subpop := stringr::str_pad(subpop, width = 5, side = "left", pad = "0")]

subpops <- unique(geodata$subpop)
Expand Down
2 changes: 1 addition & 1 deletion postprocessing/run_sim_processing_FluSightExample.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ scenario_s3_buckets <- scenario_s3_buckets[scenario_num] # automatically pull fr
override_pull_from_s3 <- override_pull_from_s3[scenario_num] # !!!! VERY IMPORTANT - LEAVE FALSE UNLESS YOU ARE REWRITING THE CURRENT S3 DATA !!!!


geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata)
geodata_file_path = file.path(config$subpop_setup$geodata)


# SUBMISSION & PROCESSING SPECIFICS ----------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion postprocessing/run_sim_processing_SLURM.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ if(tolower(smh_or_fch) == "fch"){
}
scenarios <- scenarios[scenario_num]

geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata)
geodata_file_path = file.path(config$subpop_setup$geodata)

print(disease)

Expand Down
2 changes: 1 addition & 1 deletion postprocessing/run_sim_processing_TEMPLATE.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ scenario_s3_buckets <- scenario_s3_buckets[scenario_num] # automatically pull fr
override_pull_from_s3 <- override_pull_from_s3[scenario_num] # !!!! VERY IMPORTANT - LEAVE FALSE UNLESS YOU ARE REWRITING THE CURRENT S3 DATA !!!!


geodata_file_path = file.path(config$data_path, config$subpop_setup$geodata)
geodata_file_path = file.path(config$subpop_setup$geodata)



Expand Down

0 comments on commit ecb9cb9

Please sign in to comment.