Skip to content

Commit

Permalink
change time to date in the R code, for groundtruth and python read files
Browse files Browse the repository at this point in the history
  • Loading branch information
jcblemai committed Apr 16, 2024
1 parent 30c2561 commit 7c08809
Show file tree
Hide file tree
Showing 15 changed files with 112 additions and 116 deletions.
6 changes: 3 additions & 3 deletions flepimop/R_packages/flepicommon/R/DataUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ read_file_of_type <- function(extension,...){
if(extension == 'csv'){
return(function(x){suppressWarnings(readr::read_csv(x,,col_types = cols(
.default = col_double(),
time=col_date(),
darw=col_date(),
uid=col_character(),
comp=col_character(),
subpop=col_character()
Expand All @@ -101,8 +101,8 @@ read_file_of_type <- function(extension,...){
if(extension == 'parquet'){
return(function(x){
tmp <- arrow::read_parquet(x)
if("POSIXct" %in% class(tmp$time)){
tmp$time <- lubridate::as_date(tz="GMT",tmp$time)
if("POSIXct" %in% class(tmp$date)){
tmp$date <- lubridate::as_date(tz="GMT",tmp$date)
}
tmp
})
Expand Down
8 changes: 4 additions & 4 deletions flepimop/R_packages/flepiconfig/R/create_config_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ daily_mean_reduction <- function(dat,
) %>%
dplyr::select(USPS, subpop, start_date, end_date, mean)

timeline <- tidyr::crossing(time = seq(from=min(dat$start_date), to=max(dat$end_date), by = 1),
timeline <- tidyr::crossing(date = seq(from=min(dat$start_date), to=max(dat$end_date), by = 1),
subpop = unique(dat$subpop))

if(any(stringr::str_detect(dat$subpop, '", "'))){
Expand Down Expand Up @@ -1175,12 +1175,12 @@ daily_mean_reduction <- function(dat,
dplyr::select(subpop, start_date, end_date, mean) %>%
dplyr::bind_rows(dat %>% dplyr::filter(subpop!="all") %>% dplyr::ungroup() %>% dplyr::select(-USPS)) %>%
dplyr::left_join(timeline) %>%
dplyr::filter(time >= start_date & time <= end_date) %>%
dplyr::group_by(subpop, time) %>%
dplyr::filter(date >= start_date & date <= end_date) %>%
dplyr::group_by(subpop, date) %>%
dplyr::summarize(mean = prod(1-mean))

if(plot){
dat<- ggplot2::ggplot(data= dat, ggplot2::aes(x=time, y=mean))+
dat<- ggplot2::ggplot(data= dat, ggplot2::aes(x=date, y=mean))+
ggplot2::geom_line()+
ggplot2::facet_wrap(~subpop)+
ggplot2::theme_bw()+
Expand Down
6 changes: 3 additions & 3 deletions flepimop/R_packages/inference/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,9 @@ calc_prior_likadj <- function(params,
##'
compute_cumulative_counts <- function(sim_hosp) {
res <- sim_hosp %>%
gather(var, value, -time, -subpop) %>%
gather(var, value, -date, -subpop) %>%
group_by(subpop, var) %>%
arrange(time) %>%
arrange(date) %>%
mutate(cumul = cumsum(value)) %>%
ungroup() %>%
pivot_wider(names_from = "var", values_from = c("value", "cumul")) %>%
Expand All @@ -324,7 +324,7 @@ compute_cumulative_counts <- function(sim_hosp) {
##'
compute_totals <- function(sim_hosp) {
sim_hosp %>%
group_by(time) %>%
group_by(date) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
mutate(subpop = "all") %>%
select(all_of(colnames(sim_hosp))) %>%
Expand Down
4 changes: 2 additions & 2 deletions flepimop/R_packages/inference/R/inference_slot_runner_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ aggregate_and_calc_loc_likelihoods <- function(
dplyr::filter(
modeled_outcome,
!!rlang::sym(obs_subpop) == location,
time %in% unique(obs$date[obs$subpop == location])
date %in% unique(obs$date[obs$subpop == location])
) %>%
## Reformat into form the algorithm is looking for
inference::getStats(
"time",
"date",
"sim_var",
stat_list = targets_config,
start_date = start_date,
Expand Down
20 changes: 10 additions & 10 deletions flepimop/R_packages/inference/R/inference_to_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ cum_death_forecast <- function (sim_data,
require(dplyr)

rc <- sim_data %>%
filter(time>start_date)%>%
filter(date>start_date)%>%
inner_join(cum_dat)%>%
group_by(sim_num, !!sym(loc_column))%>%
mutate(cum_deaths_corr = cumsum(incidD)+cumDeaths)%>%
Expand All @@ -39,7 +39,7 @@ cum_death_forecast <- function (sim_data,
##' @param weights if not NA, the weights for the mean
##' @param loc_col the name of the location column
##'
##' @return a forecast with columns time (end day), quantile steps_ahead and deaths
##' @return a forecast with columns date (end day), quantile steps_ahead and deaths
##'
##' @export
create_cum_death_forecast <- function(sim_data,
Expand All @@ -51,15 +51,15 @@ create_cum_death_forecast <- function(sim_data,
loc_column="USPS") {

##Sanity checks
if(forecast_date>max(obs_data$time)+1) {stop("forecast date must be within one day after the range of observed times")}
if(forecast_date+1<min(sim_data$time)) {stop("no simulation support for first forecast date")}
if(forecast_date>max(obs_data$date)+1) {stop("forecast date must be within one day after the range of observed times")}
if(forecast_date+1<min(sim_data$date)) {stop("no simulation support for first forecast date")}

if(max(obs_data$time)==forecast_date){
if(max(obs_data$date)==forecast_date){
## USA Facts Data updates mid-day so forecasts run after noon will have a forecast date that overlaps with the obs_data
##convert data to a cumdeath forecast.
print(glue::glue("Accumulate deaths through {forecast_date}, typically for USA Facts aggregation after noon."))
start_deaths <- obs_data%>%
filter(time==forecast_date)%>%
filter(date==forecast_date)%>%
select(!!sym(loc_column),cumDeaths)

forecast_sims <- cum_death_forecast(sim_data,
Expand All @@ -70,7 +70,7 @@ create_cum_death_forecast <- function(sim_data,
## CSSE data updates at midnight so forecasts will not typically have a forecast date one day after the end of the obs_data
print(glue::glue("Accumulate deaths through {forecast_date-1}, typically for CSSE aggregation."))
start_deaths <- obs_data%>%
filter(time==forecast_date-1)%>%
filter(date==forecast_date-1)%>%
select(!!sym(loc_column),cumDeaths)

forecast_sims <- cum_death_forecast(sim_data,
Expand All @@ -88,7 +88,7 @@ create_cum_death_forecast <- function(sim_data,
}

rc <- forecast_sims%>%
group_by(time, !!sym(loc_column))%>%
group_by(date, !!sym(loc_column))%>%
summarize(x=list(enframe(c(quantile(cum_deaths_corr, probs=c(0.01, 0.025,
seq(0.05, 0.95, by = 0.05), 0.975, 0.99)),
mean=mean(cum_deaths_corr)),
Expand All @@ -99,11 +99,11 @@ create_cum_death_forecast <- function(sim_data,
##Append on the the other deaths.
rc<-dplyr::bind_rows(rc,
obs_data%>%
select(time, !!sym(loc_column), cumDeaths)%>%
select(date, !!sym(loc_column), cumDeaths)%>%
mutate(quantile="data"))

rc<- rc%>%
mutate(steps_ahead=as.numeric(time-forecast_date))
mutate(steps_ahead=as.numeric(date-forecast_date))

return(rc)

Expand Down
Loading

0 comments on commit 7c08809

Please sign in to comment.