diff --git a/code/make_round.R b/code/make_round.R index dd2e191..26a01e7 100644 --- a/code/make_round.R +++ b/code/make_round.R @@ -11,8 +11,8 @@ source("code/utils.R") config <- hubUtils::read_config(getwd(), config = "tasks") # Validate hub config -#a <- hubAdmin::validate_config() -#hubAdmin::view_config_val_errors(a) +a <- hubAdmin::validate_config() +hubAdmin::view_config_val_errors(a) ## RSV ROUND 1 ------------- #rsv1 <- config$rounds[[4]] @@ -40,81 +40,81 @@ config <- hubUtils::read_config(getwd(), config = "tasks") # ## EQUITY ROUNDS ------------ ## Phase I -#lapply(config$rounds[5], function(x) { -# req_df <- make_df_sample(x$model_tasks, -# max_sample = max_sample(x)) -# default_pairing <- def_grp(x) -# max_sample <- max_sample(x) -# lapply(unique(req_df$team_model), function(model_id) { -# print(paste0("Generate example data for: ", model_id)) -# df <- dplyr::filter(req_df, team_model == model_id) -# df <- dplyr::select(df, -team_model) -# if (model_id %in% c("team1-modela", "team2-modelb")) { -# df2 <- dplyr::filter(df, target == "inc death") -# df2$target <- "inc case" -# df <- rbind(df, df2) -# rm(df2) -# } -# print("-- Generate value") -# if (!model_id %in% c("team1-modela", "team2-modelb")) { -# df <- update_df_val_sample(df) -# } else { -# df <- update_df_val_sample(df, quantile = TRUE, -# quant_group = c("origin_date", "scenario_id", -# "location", "target", "horizon", -# "race_ethnicity"), -# cumul_group = c("origin_date", "scenario_id", -# "location", "target", -# "output_type", -# "output_type_id", -# "race_ethnicity")) -# df <- dplyr::filter(df, grepl("inc", target)) -# } -# print("-- Generate sample ID") -# df <- dplyr::mutate(df, value = round(value, 1), -# run_grouping = ifelse(output_type == "sample", 1, NA), -# stochastic_run = ifelse(output_type == "sample", 1, NA), -# output_type_id = ifelse(output_type == "sample", NA, -# output_type_id)) -# if (model_id %in% c("team1-modela")) { -# df <- prep_sample_information(df, "stochastic_run", default_pairing, -# rep = max_sample) -# } else if (model_id %in% c("team2-modelb")) { -# df <- prep_sample_information(df, "run_grouping", default_pairing, -# rep = max_sample) -# } else if (model_id %in% c("team3-modelc")) { -# df <- -# prep_sample_information(df, "run_grouping", default_pairing, -# rep = max_sample) %>% -# prep_sample_information("stochastic_run", default_pairing, -# rep = max_sample) -# } else if (model_id %in% c("team4-modeld")) { -# df <- -# prep_sample_information(df, "run_grouping", default_pairing, -# rep = max_sample, same_rep = TRUE) %>% -# prep_sample_information("stochastic_run", default_pairing, -# rep = max_sample) -# } else { -# df <- -# prep_sample_information(df, "run_grouping", c(default_pairing, -# "scenario_id"), -# rep = max_sample, same_rep = TRUE) %>% -# prep_sample_information("stochastic_run", default_pairing, -# rep = max_sample) -# } -# print("-- Write output") -# file_name <- paste0("data-processed/", model_id, "/", x$round_id, "-", -# model_id, ".gz.parquet") -# if (model_id %in% c("team1-modela")) { -# df$value <- as.integer(df$value) -# } else { -# df$value <- round(df$value, 1) -# } -# arrow::write_parquet(df, file_name, compression = "gzip", -# compression_level = 9) -# }) -#}) -# +lapply(config$rounds[5], function(x) { + req_df <- make_df_sample(x$model_tasks, + max_sample = max_sample(x)) + default_pairing <- def_grp(x) + max_sample <- max_sample(x) + lapply(unique(req_df$team_model), function(model_id) { + print(paste0("Generate example data for: ", model_id)) + df <- dplyr::filter(req_df, team_model == model_id) + df <- dplyr::select(df, -team_model) + if (model_id %in% c("team1-modela", "team2-modelb")) { + df2 <- dplyr::filter(df, target == "inc death") + df2$target <- "inc case" + df <- rbind(df, df2) + rm(df2) + } + print("-- Generate value") + if (!model_id %in% c("team1-modela", "team2-modelb")) { + df <- update_df_val_sample(df) + } else { + df <- update_df_val_sample(df, quantile = TRUE, + quant_group = c("origin_date", "scenario_id", + "location", "target", "horizon", + "race_ethnicity"), + cumul_group = c("origin_date", "scenario_id", + "location", "target", + "output_type", + "output_type_id", + "race_ethnicity")) + df <- dplyr::filter(df, grepl("inc", target)) + } + print("-- Generate sample ID") + df <- dplyr::mutate(df, value = round(value, 1), + run_grouping = ifelse(output_type == "sample", 1, NA), + stochastic_run = ifelse(output_type == "sample", 1, NA), + output_type_id = ifelse(output_type == "sample", NA, + output_type_id)) + if (model_id %in% c("team1-modela")) { + df <- prep_sample_information(df, "stochastic_run", default_pairing, + rep = max_sample) + } else if (model_id %in% c("team2-modelb")) { + df <- prep_sample_information(df, "run_grouping", default_pairing, + rep = max_sample) + } else if (model_id %in% c("team3-modelc")) { + df <- + prep_sample_information(df, "run_grouping", default_pairing, + rep = max_sample) %>% + prep_sample_information("stochastic_run", default_pairing, + rep = max_sample) + } else if (model_id %in% c("team4-modeld")) { + df <- + prep_sample_information(df, "run_grouping", default_pairing, + rep = max_sample, same_rep = TRUE) %>% + prep_sample_information("stochastic_run", default_pairing, + rep = max_sample) + } else { + df <- + prep_sample_information(df, "run_grouping", c(default_pairing, + "scenario_id"), + rep = max_sample, same_rep = TRUE) %>% + prep_sample_information("stochastic_run", default_pairing, + rep = max_sample) + } + print("-- Write output") + file_name <- paste0("data-processed/", model_id, "/", x$round_id, "-", + model_id, ".gz.parquet") + if (model_id %in% c("team1-modela")) { + df$value <- as.integer(df$value) + } else { + df$value <- round(df$value, 1) + } + arrow::write_parquet(df, file_name, compression = "gzip", + compression_level = 9) + }) +}) + # # COVID Round 18 ------ diff --git a/hub-config/tasks.json b/hub-config/tasks.json index fd485e1..92bb348 100644 --- a/hub-config/tasks.json +++ b/hub-config/tasks.json @@ -1695,6 +1695,114 @@ "time_unit": "week" } ] + }, + { + "task_ids": { + "origin_date": { + "required": ["2024-04-28"], + "optional": null + }, + "scenario_id": { + "required": ["A-2024-03-01", "B-2024-03-01", "C-2024-03-01", "D-2024-03-01", "E-2024-03-01", "F-2024-03-01"], + "optional": null + }, + "location": { + "required": null, + "optional": ["US", "01", "02", "04", "05", "06", "08", "09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "44", "45", "46", "47", "48", "49", "50", "51", "53", "54", "55", "56", "60", "66", "69", "72", "74", "78"] + }, + "target": { + "required": null, + "optional": ["peak time hosp"] + }, + "horizon": { + "required": null, + "optional": null + }, + "age_group": { + "required": ["0-130"], + "optional": null + } + }, + "output_type": { + "cdf":{ + "output_type_id":{ + "required":["EW202418", "EW202419", "EW202420", "EW202421", "EW202422", "EW202423", "EW202424", "EW202425", "EW202426", "EW202427", "EW202428", "EW202429", "EW202430", "EW202431", "EW202432", "EW202433", "EW202434", "EW202435", "EW202436", "EW202437", "EW202438", "EW202439", "EW202440", "EW202441", "EW202442", "EW202443", "EW202444", "EW202445", "EW202446", "EW202447", "EW202448", "EW202449", "EW202450", "EW202451", "EW202452", "EW202501", "EW202502", "EW202503", "EW202504", "EW202505", "EW202506", "EW202507", "EW202508", "EW202509", "EW202510", "EW202511", "EW202512", "EW202513", "EW202514", "EW202515", "EW202516", "EW202517"], + "optional":null + }, + "value":{ + "type":"double", + "minimum":0, + "maximum":1 + } + } + }, + "target_metadata": [ + { + "target_id": "peak time hosp", + "target_name": "Peak timing of hospitalization", + "description":"Cumulative probability of the incident hospitalization peak", + "target_units":"population", + "target_keys":{ + "target":["peak time hosp"] + }, + "target_type": "discrete", + "is_step_ahead": true, + "time_unit": "week" + } + ] + }, + { + "task_ids": { + "origin_date": { + "required": ["2024-04-28"], + "optional": null + }, + "scenario_id": { + "required": ["A-2024-03-01", "B-2024-03-01", "C-2024-03-01", "D-2024-03-01", "E-2024-03-01", "F-2024-03-01"], + "optional": null + }, + "location": { + "required": null, + "optional": ["US", "01", "02", "04", "05", "06", "08", "09", "10", "11", "12", "13", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "44", "45", "46", "47", "48", "49", "50", "51", "53", "54", "55", "56", "60", "66", "69", "72", "74", "78"] + }, + "target": { + "required": null, + "optional": ["peak size hosp"] + }, + "horizon": { + "required": null, + "optional": null + }, + "age_group": { + "required": ["0-130"], + "optional": null + } + }, + "output_type": { + "quantile":{ + "output_type_id":{ + "required":[0.01,0.025,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.975,0.99], + "optional":[0,1] + }, + "value":{ + "type":"double", + "minimum":0 + } + } + }, + "target_metadata": [ + { + "target_id": "peak size hosp", + "target_name": "Peak size of hospitalization", + "description":"Magnitude of the peak of weekly incident hospitalizations", + "target_units":"population", + "target_keys":{ + "target":["peak size hosp"] + }, + "target_type": "discrete", + "is_step_ahead": false + } + ] } ], "submissions_due": {