Skip to content

Commit

Permalink
more fixes for aggregation
Browse files Browse the repository at this point in the history
  • Loading branch information
saraloo committed Jul 26, 2024
1 parent befd4bf commit d507ac1
Showing 1 changed file with 168 additions and 54 deletions.
222 changes: 168 additions & 54 deletions postprocessing/postprocess_snapshot.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,20 +175,64 @@ if("hosp" %in% model_outputs){
statistics <- purrr::flatten(config$inference$statistics[i])
cols_sim <- c("date", statistics$sim_var, "subpop","slot")
cols_data <- c("date", "subpop", statistics$data_var)

# aggregate based on what is in the config
df_sim <- lapply(subpop_names, function(y) {
lapply(unique(hosp_outputs_global$slot), function(x)
purrr::flatten_df(inference::getStats(
hosp_outputs_global_tmp %>% .[subpop == y & slot == x] ,
"date",
"sim_var",
stat_list = config$inference$statistics[i],
start_date = config$start_date_groundtruth,
end_date = config$end_date_groundtruth
)) %>% dplyr::mutate(subpop = y, slot = x)) %>% dplyr::bind_rows()
}) %>% dplyr::bind_rows()

df_data <- lapply(subpop_names, function(x) {
purrr::flatten_df(
inference::getStats(
gt_data %>% .[subpop == x,..cols_data],
"date",
"data_var",
stat_list = config$inference$statistics[i],
start_date = config$start_date_groundtruth,
end_date = config$end_date_groundtruth
)) %>% dplyr::mutate(subpop = x) %>%
mutate(data_var = as.numeric(data_var)) }) %>% dplyr::bind_rows()


## summarize slots
print(outputs_global$hosp %>%
.[, ..cols_sim] %>%
# print(outputs_global$hosp %>%
# .[, ..cols_sim] %>%
# .[, date := lubridate::as_date(date)] %>%
# .[, as.list(quantile(get(statistics$sim_var), c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "subpop")] %>%
# ggplot() +
# geom_ribbon(aes(x = date, ymin = V1, ymax = V5), alpha = 0.1) +
# geom_ribbon(aes(x = date, ymin = V2, ymax = V4), alpha = 0.1) +
# geom_line(aes(x = date, y = V3)) +
# geom_point(data = gt_data %>%
# .[, ..cols_data],
# aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) +
# facet_wrap(~subpop, scales = 'free', ncol = gg_cols) +
# labs(x = 'date', y = fit_stats[i], title = statistics$sim_var) +
# theme_classic()
# )
print(
df_sim %>%
setDT() %>%
.[, date := lubridate::as_date(date)] %>%
.[, as.list(quantile(get(statistics$sim_var), c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "subpop")] %>%
.[, as.list(quantile(sim_var, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "subpop")] %>%
setnames(., paste0("V", 1:5), paste0("q", c(.05,.25,.5,.75,.95))) %>%
ggplot() +
geom_ribbon(aes(x = date, ymin = V1, ymax = V5), alpha = 0.1) +
geom_ribbon(aes(x = date, ymin = V2, ymax = V4), alpha = 0.1) +
geom_line(aes(x = date, y = V3)) +
geom_point(data = gt_data %>%
.[, ..cols_data],
aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) +
facet_wrap(~subpop, scales = 'free', ncol = gg_cols) +
labs(x = 'date', y = fit_stats[i], title = statistics$sim_var) +
geom_ribbon(aes(x = date, ymin = q0.05, ymax = q0.95), alpha = 0.1) +
geom_ribbon(aes(x = date, ymin = q0.25, ymax = q0.75), alpha = 0.1) +
geom_line(aes(x = date, y = q0.5)) +
# if inference, plot gt along side
geom_point(data = df_data,
aes(lubridate::as_date(date), data_var), color = 'firebrick', alpha = 0.2, size=1) +
facet_wrap(~subpop, scales = 'free') +
labs(x = 'date', y = fit_stats[i]) +
theme_classic()
)

Expand All @@ -206,23 +250,44 @@ if("hosp" %in% model_outputs){
# )

## plot cumulatives
print(outputs_global$hosp %>%
.[, ..cols_sim] %>%
.[, date := lubridate::as_date(date)] %>%
.[, csum := cumsum(get(statistics$sim_var)), by = .(subpop, slot)] %>%
.[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "subpop")] %>%
ggplot() +
geom_ribbon(aes(x = date, ymin = V1, ymax = V5), alpha = 0.1) +
geom_ribbon(aes(x = date, ymin = V2, ymax = V4), alpha = 0.1) +
geom_line(aes(x = date, y = V3)) +
geom_point(data = gt_data %>%
.[, ..cols_data] %>%
.[, csum := cumsum(replace_na(get(statistics$data_var), 0)) , by = .(subpop)]
,
aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) +
facet_wrap(~subpop, scales = 'free', ncol = gg_cols) +
labs(x = 'date', y = fit_stats[i], title = paste0("cumulative ", statistics$sim_var)) +
theme_classic()
print(
df_sim %>%
setDT() %>%
.[, date := lubridate::as_date(date)] %>%
.[, .(date, subpop, sim_var, slot)] %>%
data.table::melt(., id.vars = c("date", "slot", "subpop")) %>%
# dplyr::arrange(subpop, slot, date) %>%
.[, csum := cumsum(value), by = .(slot, subpop, variable)] %>%
.[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", config$subpop_setup$subpop)] %>%
setnames(., paste0("V", 1:5), paste0("q", c(.05,.25,.5,.75,.95))) %>%
ggplot() +
geom_ribbon(aes(x = date, ymin = q0.05, ymax = q0.95), alpha = 0.1) +
geom_ribbon(aes(x = date, ymin = q0.25, ymax = q0.75), alpha = 0.1) +
geom_line(aes(x = date, y = q0.5)) +
geom_point(data = df_data %>% setDT() %>%
.[, csum := cumsum(data_var) , by = .(subpop)],
aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.2, size=1) +
facet_wrap(~subpop, scales = 'free') +
# facet_wrap(~get(subpop), scales = 'free') +
labs(x = 'date', y = paste0("cumulative ", fit_stats[i])) +
theme_classic()
# outputs_global$hosp %>%
# .[, ..cols_sim] %>%
# .[, date := lubridate::as_date(date)] %>%
# .[, csum := cumsum(get(statistics$sim_var)), by = .(subpop, slot)] %>%
# .[, as.list(quantile(csum, c(.05, .25, .5, .75, .95), na.rm = TRUE, names = FALSE)), by = c("date", "subpop")] %>%
# ggplot() +
# geom_ribbon(aes(x = date, ymin = V1, ymax = V5), alpha = 0.1) +
# geom_ribbon(aes(x = date, ymin = V2, ymax = V4), alpha = 0.1) +
# geom_line(aes(x = date, y = V3)) +
# geom_point(data = gt_data %>%
# .[, ..cols_data] %>%
# .[, csum := cumsum(replace_na(get(statistics$data_var), 0)) , by = .(subpop)]
# ,
# aes(lubridate::as_date(date), csum), color = 'firebrick', alpha = 0.1) +
# facet_wrap(~subpop, scales = 'free', ncol = gg_cols) +
# labs(x = 'date', y = fit_stats[i], title = paste0("cumulative ", statistics$sim_var)) +
# theme_classic()
)

}
Expand All @@ -231,6 +296,18 @@ if("hosp" %in% model_outputs){

## hosp by highest and lowest llik

if("llik" %in% model_outputs){
llik_rank <- copy(outputs_global$llik) %>%
.[, .SD[order(ll)], subpop]
high_low_llik <- rbindlist(list(data.table(llik_rank, key = "subpop") %>%
.[, head(.SD,5), by = subpop] %>%
.[, llik_bin := "top"],
data.table(llik_rank, key = "subpop") %>%
.[, tail(.SD,5), by = subpop]%>%
.[, llik_bin := "bottom"])
)
}

fname <- paste0("pplot/hosp_by_llik_mod_outputs_", opt$run_id,".pdf")
# pdf_dims <- data.frame(width = gg_cols*2, length = num_nodes/gg_cols * 2)
# pdf(fname, width = pdf_dims$width, height = pdf_dims$length)
Expand All @@ -240,38 +317,75 @@ if("hosp" %in% model_outputs){
statistics <- purrr::flatten(config$inference$statistics[i])
cols_sim <- c("date", statistics$sim_var, "subpop","slot")
cols_data <- c("date", "subpop", statistics$data_var)
hosp_outputs_global_tmp <- hosp_outputs_global[,..cols_sim]

if("llik" %in% model_outputs){
llik_rank <- copy(outputs_global$llik) %>%
.[, .SD[order(ll)], subpop]
high_low_llik <- rbindlist(list(data.table(llik_rank, key = "subpop") %>%
.[, head(.SD,5), by = subpop] %>%
.[, llik_bin := "top"],
data.table(llik_rank, key = "subpop") %>%
.[, tail(.SD,5), by = subpop]%>%
.[, llik_bin := "bottom"])
)
# high_low_hosp_llik <- copy(outputs_global$hosp) %>%
# .[high_low_llik, on = c("slot", "subpop"), allow.cartesian = TRUE]

high_low_hosp_llik <- copy(outputs_global$hosp) %>%
.[high_low_llik, on = c("slot", "subpop"), allow.cartesian = TRUE]
# aggregate simulation output and data by time based on what is in the config
df_sim <- lapply(subpop_names, function(y) {
lapply(unique(hosp_outputs_global$slot), function(x)
purrr::flatten_df(inference::getStats(
hosp_outputs_global_tmp %>% .[subpop == y & slot == x] ,
"date",
"sim_var",
stat_list = config$inference$statistics[i],
start_date = config$start_date_groundtruth,
end_date = config$end_date_groundtruth
)) %>% dplyr::mutate(subpop = y, slot = x)) %>% dplyr::bind_rows()
}) %>% dplyr::bind_rows() %>% setDT()

df_data <- lapply(subpop_names, function(x) {
purrr::flatten_df(
inference::getStats(
gt_data %>% .[subpop == x,..cols_data],
"date",
"data_var",
stat_list = config$inference$statistics[i],
start_date = config$start_date_groundtruth,
end_date = config$end_date_groundtruth
)) %>% dplyr::mutate(subpop = x) %>%
dplyr::mutate(data_var = as.numeric(data_var)) %>%
dplyr::mutate(date = lubridate::as_date(date)) }) %>%
dplyr::bind_rows() %>% setDT()

# add likelihood ranking to simulation output
high_low_hosp_llik <- df_sim %>%
.[high_low_llik, on = c("slot", "subpop"), allow.cartesian=TRUE] %>% # right join by "on" variables
.[subpop != "Total"]

hosp_llik_plots <- lapply(unique(high_low_hosp_llik %>% .[, subpop]),
function(e){
high_low_hosp_llik %>%
.[, date := lubridate::as_date(date)] %>%
.[subpop == e] %>%
ggplot() +
geom_line(aes(lubridate::as_date(date), get(statistics$data_var),
group = slot, color = ll))+#, linetype = llik_bin)) +
# scale_linetype_manual(values = c(1, 2), name = "likelihood\nbin") +
.[, date := lubridate::as_date(date)] %>%
ggplot() +
geom_line(aes(x = date, y = sim_var, group = slot, color = ll)) +
scale_linetype_manual(values = c(1, 2), name = "likelihood\nbin") +
scale_color_viridis_c(option = "D", name = "log\nlikelihood") +
geom_point(data = gt_data %>%
.[, ..cols_data] %>%
.[subpop == e] ,
aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) +
facet_wrap(~subpop, scales = 'free', ncol = gg_cols) +
labs(x = 'date', y = fit_stats[i]) + #, title = paste0("top 5, bottom 5 lliks, ", statistics$sim_var)) +
theme_classic() +
guides(linetype = 'none')
geom_point(data = df_data,
aes(lubridate::as_date(date), data_var), color = 'firebrick', alpha = 0.2, size=1) +
facet_wrap(~subpop, scales = 'free') +
labs(x = 'date', y = fit_stats[i]) +
theme_classic() +
theme(legend.key.size = unit(0.2, "cm"))
# high_low_hosp_llik %>%
# .[, date := lubridate::as_date(date)] %>%
# .[subpop == e] %>%
# ggplot() +
# geom_line(aes(lubridate::as_date(date), get(statistics$data_var),
# group = slot, color = ll))+#, linetype = llik_bin)) +
# # scale_linetype_manual(values = c(1, 2), name = "likelihood\nbin") +
# scale_color_viridis_c(option = "D", name = "log\nlikelihood") +
# geom_point(data = gt_data %>%
# .[, ..cols_data] %>%
# .[subpop == e] ,
# aes(lubridate::as_date(date), get(statistics$data_var)), color = 'firebrick', alpha = 0.1) +
# facet_wrap(~subpop, scales = 'free', ncol = gg_cols) +
# labs(x = 'date', y = fit_stats[i]) + #, title = paste0("top 5, bottom 5 lliks, ", statistics$sim_var)) +
# theme_classic() +
# guides(linetype = 'none')
}
)

Expand All @@ -290,8 +404,8 @@ if("hnpi" %in% model_outputs){

gg_cols <- 4
num_nodes <- length(unique(outputs_global$hnpi %>% .[,subpop]))
pdf_dims <- data.frame(width = gg_cols*3, length = num_nodes/gg_cols * 2)
#pdf_dims <- data.frame(width = 20, length = 5)
# pdf_dims <- data.frame(width = gg_cols*3, length = num_nodes/gg_cols * 2)
pdf_dims <- data.frame(width = 20, length = 10)

fname <- paste0("pplot/hnpi_mod_outputs_", opt$run_id,".pdf")
pdf(fname, width = pdf_dims$width, height = pdf_dims$length)
Expand Down

0 comments on commit d507ac1

Please sign in to comment.