From 8b5da6bc696311c012edb0f88f4f264b9e2285ec Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 12 Nov 2024 15:36:21 +0200 Subject: [PATCH 1/2] Add support for v4 NULL point estimate output_type_ids. Resolves #156. Related to #147 --- R/expand_model_out_grid.R | 122 ++++++++++++++------ tests/testthat/test-expand_model_out_grid.R | 14 +++ 2 files changed, 102 insertions(+), 34 deletions(-) diff --git a/R/expand_model_out_grid.R b/R/expand_model_out_grid.R index fac3c9c5..c7d24454 100644 --- a/R/expand_model_out_grid.R +++ b/R/expand_model_out_grid.R @@ -194,25 +194,10 @@ expand_model_out_grid <- function(config_tasks, # retired config_tid <- hubUtils::get_config_tid(config_tasks = config_tasks) - output_type_l <- purrr::map( - round_config[["model_tasks"]], - function(.x) { - out <- .x[["output_type"]] - if (is.null(output_types)) { - out - } else { - mt_output_types <- output_types[output_types %in% names(out)] - out[mt_output_types] - } - } - ) %>% - purrr::map( - ~ extract_mt_output_type_ids(.x, config_tid) - ) %>% + output_type_l <- subset_rnd_output_types(round_config, output_types) %>% + extract_rnd_output_type_ids(config_tid) %>% process_grid_inputs(required_vals_only = required_vals_only) %>% - purrr::map(function(.x) { - purrr::compact(.x) - }) + purrr::map(~purrr::compact(.x)) # Expand output grid individually for each modeling task and output type. grid <- purrr::map2( @@ -241,6 +226,29 @@ expand_model_out_grid <- function(config_tasks, ) } +# Subset output types according to `output_types` from all model_task objects in +# a round. If `output_types` is `NULL`, all output types for each model task are +# returned. +subset_rnd_output_types <- function(round_config, output_types) { + purrr::map( + round_config[["model_tasks"]], + ~ subset_mt_output_types(.x, output_types) + ) +} + +# Subset model_task object output types according to `output_types`. +# If `output_types` is `NULL`, all output types are returned. +subset_mt_output_types <- function(model_task, output_types) { + out <- model_task[["output_type"]] + if (is.null(output_types)) { + out + } else { + mt_output_types <- output_types[output_types %in% names(out)] + out[mt_output_types] + } +} + + # Extracts/collapses individual task ID values depending on whether all or just required # values are needed. process_grid_inputs <- function(x, required_vals_only = FALSE) { @@ -292,7 +300,7 @@ expand_output_type_grid <- function(task_id_values, fix_round_id <- function(x, round_id, round_config, round_ids) { if (round_config[["round_id_from_variable"]] && !is.null(round_id)) { round_id <- rlang::arg_match(round_id, - values = round_ids + values = round_ids ) round_id_var <- round_config[["round_id"]] purrr::map( @@ -399,11 +407,11 @@ null_taskids_to_na <- function(model_task) { model_task, ~ all(purrr::map_lgl(.x, is.null)) ) purrr::modify_if(model_task, - .p = to_na, - ~ list( - required = NA, - optional = NULL - ) + .p = to_na, + ~ list( + required = NA, + optional = NULL + ) ) } @@ -538,25 +546,71 @@ get_sample_n <- function(x, config_tid) { length() } + +# Extract the output_type_id values for each model_task object in a round. +# Input should be the output of subset_rnd_output_types. +# config_tid is the name of the output_type_id column in the config schema used +# for back-compatibility with schema versions < v2.0.0. Returns a list of +# `required` and `optional` or just `required` vectors of values as appropriate for +# each output type in each model task in the round. +extract_rnd_output_type_ids <- function(x, config_tid) { + purrr::map(x, ~ extract_mt_output_type_ids(.x, config_tid)) +} +# Extract the output_type_id values from a model_task object. +# config_tid is the name of the output_type_id column in the config schema used +# for back-compatibility with schema versions < v2.0.0. Returns a list of +# `required` and `optional` or just `required` vectors of values as appropriate for +# each output type in the model task. extract_mt_output_type_ids <- function(x, config_tid) { purrr::map( x, function(.x) { - if (config_tid %in% names(.x)) { - .x[[config_tid]] - } else if ("output_type_id_params" %in% names(.x)) { - if (.x[["output_type_id_params"]][["is_required"]]) { - list(required = NA, optional = NULL) - } else { - list(required = NULL, optional = NA) - } - } else { - NULL + output_type_ids <- .x[[config_tid]] + if (valid_output_type_ids(output_type_ids)) { + return(output_type_ids) } + + # If dealing with a `NULL` output_type_ids object or a v4 schema version point + # estimate output type, determine first if . + is_required <- isTRUE(.x[["is_required"]]) || + isTRUE(.x[["output_type_id_params"]][["is_required"]]) + + null_output_type_ids(is_required) } ) } +valid_output_type_ids <- function(output_type_ids) { + # If output_type_id values are provided and when dealing with an older + # config version that has "required" and"optional" fields or extract output_type_id values + has_output_type_ids <- !is.null(output_type_ids) + pre_v4 <- isTRUE( + all.equal( + sort(names(output_type_ids)), + sort(c("required", "optional")) + ) + ) + # In post v4 config schema versions, when not NULL, a single `required` element is + # a valid output type id configuration and should be returned as is + required_not_null <- !is.null(output_type_ids[["required"]]) + + # Valid output type id configurations cannot be `NULL` and must either: + # have both `required` and `optional` elements or + # be a single non-NULL `required` element in post v4 schema versions + has_output_type_ids && (pre_v4 || required_not_null) +} + +# Create a list of NULL or NA required and optional output type id values depending +# on whether the output type is required or optional. Allows us to use current +# infrastructure to convert `NULL`s to `NA`s in a back-compatible way. +null_output_type_ids <- function(is_required) { + if (is_required) { + list(required = NA, optional = NULL) + } else { + list(required = NULL, optional = NA) + } +} + validate_output_types <- function(output_types, config_tasks, round_id, call = rlang::caller_call()) { checkmate::assert_character(output_types, null.ok = TRUE) diff --git a/tests/testthat/test-expand_model_out_grid.R b/tests/testthat/test-expand_model_out_grid.R index 9d407ad3..24888b31 100644 --- a/tests/testthat/test-expand_model_out_grid.R +++ b/tests/testthat/test-expand_model_out_grid.R @@ -551,5 +551,19 @@ test_that("(#123) expand_output_type_grid() returns expected outputs with option ) expect_equal(nrow(i_have_eight_rows), 8) expect_equal(ncol(i_have_eight_rows), 3) +}) + +test_that("v4 point estimate output type IDs extracted correctly as NAs", { + hub_path <- system.file("testhubs", "v4", "flusight", package = "hubUtils") + file_name <- "hub-baseline/2023-05-01-hub-baseline.csv" + round_id <- parse_file_name(file_name)$round_id + config_tasks <- suppressWarnings(read_config(hub_path = hub_path)) + expect_true( + expand_model_out_grid( + config_tasks = config_tasks, + round_id = round_id, + output_types = "mean", + )[["output_type_id"]] |> is.na() |> all() + ) }) From bb36c4deeb286159e9525bc2ac7348e97f72cb6c Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 13 Nov 2024 18:13:05 +0200 Subject: [PATCH 2/2] appease lintr --- R/expand_model_out_grid.R | 14 +++++++------- tests/testthat/test-expand_model_out_grid.R | 4 +++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/expand_model_out_grid.R b/R/expand_model_out_grid.R index c7d24454..d416962d 100644 --- a/R/expand_model_out_grid.R +++ b/R/expand_model_out_grid.R @@ -197,7 +197,7 @@ expand_model_out_grid <- function(config_tasks, output_type_l <- subset_rnd_output_types(round_config, output_types) %>% extract_rnd_output_type_ids(config_tid) %>% process_grid_inputs(required_vals_only = required_vals_only) %>% - purrr::map(~purrr::compact(.x)) + purrr::map(~ purrr::compact(.x)) # Expand output grid individually for each modeling task and output type. grid <- purrr::map2( @@ -300,7 +300,7 @@ expand_output_type_grid <- function(task_id_values, fix_round_id <- function(x, round_id, round_config, round_ids) { if (round_config[["round_id_from_variable"]] && !is.null(round_id)) { round_id <- rlang::arg_match(round_id, - values = round_ids + values = round_ids ) round_id_var <- round_config[["round_id"]] purrr::map( @@ -407,11 +407,11 @@ null_taskids_to_na <- function(model_task) { model_task, ~ all(purrr::map_lgl(.x, is.null)) ) purrr::modify_if(model_task, - .p = to_na, - ~ list( - required = NA, - optional = NULL - ) + .p = to_na, + ~ list( + required = NA, + optional = NULL + ) ) } diff --git a/tests/testthat/test-expand_model_out_grid.R b/tests/testthat/test-expand_model_out_grid.R index 24888b31..6f0452d5 100644 --- a/tests/testthat/test-expand_model_out_grid.R +++ b/tests/testthat/test-expand_model_out_grid.R @@ -564,6 +564,8 @@ test_that("v4 point estimate output type IDs extracted correctly as NAs", { config_tasks = config_tasks, round_id = round_id, output_types = "mean", - )[["output_type_id"]] |> is.na() |> all() + )[["output_type_id"]] |> + is.na() |> + all() ) })