From 8b5da6bc696311c012edb0f88f4f264b9e2285ec Mon Sep 17 00:00:00 2001
From: Anna Krystalli <annakrystalli@googlemail.com>
Date: Tue, 12 Nov 2024 15:36:21 +0200
Subject: [PATCH 1/2] Add support for v4 NULL point estimate output_type_ids.
 Resolves #156. Related to #147

---
 R/expand_model_out_grid.R                   | 122 ++++++++++++++------
 tests/testthat/test-expand_model_out_grid.R |  14 +++
 2 files changed, 102 insertions(+), 34 deletions(-)

diff --git a/R/expand_model_out_grid.R b/R/expand_model_out_grid.R
index fac3c9c5..c7d24454 100644
--- a/R/expand_model_out_grid.R
+++ b/R/expand_model_out_grid.R
@@ -194,25 +194,10 @@ expand_model_out_grid <- function(config_tasks,
   # retired
   config_tid <- hubUtils::get_config_tid(config_tasks = config_tasks)
 
-  output_type_l <- purrr::map(
-    round_config[["model_tasks"]],
-    function(.x) {
-      out <- .x[["output_type"]]
-      if (is.null(output_types)) {
-        out
-      } else {
-        mt_output_types <- output_types[output_types %in% names(out)]
-        out[mt_output_types]
-      }
-    }
-  ) %>%
-    purrr::map(
-      ~ extract_mt_output_type_ids(.x, config_tid)
-    ) %>%
+  output_type_l <- subset_rnd_output_types(round_config, output_types) %>%
+    extract_rnd_output_type_ids(config_tid) %>%
     process_grid_inputs(required_vals_only = required_vals_only) %>%
-    purrr::map(function(.x) {
-      purrr::compact(.x)
-    })
+    purrr::map(~purrr::compact(.x))
 
   # Expand output grid individually for each modeling task and output type.
   grid <- purrr::map2(
@@ -241,6 +226,29 @@ expand_model_out_grid <- function(config_tasks,
   )
 }
 
+# Subset output types according to `output_types` from all model_task objects in
+# a round. If `output_types` is `NULL`, all output types for each model task are
+# returned.
+subset_rnd_output_types <- function(round_config, output_types) {
+  purrr::map(
+    round_config[["model_tasks"]],
+    ~ subset_mt_output_types(.x, output_types)
+  )
+}
+
+# Subset model_task object output types according to `output_types`.
+# If `output_types` is `NULL`, all output types are returned.
+subset_mt_output_types <- function(model_task, output_types) {
+  out <- model_task[["output_type"]]
+  if (is.null(output_types)) {
+    out
+  } else {
+    mt_output_types <- output_types[output_types %in% names(out)]
+    out[mt_output_types]
+  }
+}
+
+
 # Extracts/collapses individual task ID values depending on whether all or just required
 # values are needed.
 process_grid_inputs <- function(x, required_vals_only = FALSE) {
@@ -292,7 +300,7 @@ expand_output_type_grid <- function(task_id_values,
 fix_round_id <- function(x, round_id, round_config, round_ids) {
   if (round_config[["round_id_from_variable"]] && !is.null(round_id)) {
     round_id <- rlang::arg_match(round_id,
-      values = round_ids
+                                 values = round_ids
     )
     round_id_var <- round_config[["round_id"]]
     purrr::map(
@@ -399,11 +407,11 @@ null_taskids_to_na <- function(model_task) {
     model_task, ~ all(purrr::map_lgl(.x, is.null))
   )
   purrr::modify_if(model_task,
-    .p = to_na,
-    ~ list(
-      required = NA,
-      optional = NULL
-    )
+                   .p = to_na,
+                   ~ list(
+                     required = NA,
+                     optional = NULL
+                   )
   )
 }
 
@@ -538,25 +546,71 @@ get_sample_n <- function(x, config_tid) {
     length()
 }
 
+
+# Extract the output_type_id values for each model_task object in a round.
+# Input should be the output of subset_rnd_output_types.
+# config_tid is the name of the output_type_id column in the config schema used
+# for back-compatibility with schema versions < v2.0.0. Returns a list of
+# `required` and `optional` or just `required` vectors of values as appropriate for
+# each output type in each model task in the round.
+extract_rnd_output_type_ids <- function(x, config_tid) {
+  purrr::map(x, ~ extract_mt_output_type_ids(.x, config_tid))
+}
+# Extract the output_type_id values from a model_task object.
+# config_tid is the name of the output_type_id column in the config schema used
+# for back-compatibility with schema versions < v2.0.0. Returns a list of
+# `required` and `optional` or just `required` vectors of values as appropriate for
+# each output type in the model task.
 extract_mt_output_type_ids <- function(x, config_tid) {
   purrr::map(
     x,
     function(.x) {
-      if (config_tid %in% names(.x)) {
-        .x[[config_tid]]
-      } else if ("output_type_id_params" %in% names(.x)) {
-        if (.x[["output_type_id_params"]][["is_required"]]) {
-          list(required = NA, optional = NULL)
-        } else {
-          list(required = NULL, optional = NA)
-        }
-      } else {
-        NULL
+      output_type_ids <- .x[[config_tid]]
+      if (valid_output_type_ids(output_type_ids)) {
+        return(output_type_ids)
       }
+
+      # If dealing with a `NULL` output_type_ids object or a v4 schema version point
+      # estimate output type, determine first if .
+      is_required <- isTRUE(.x[["is_required"]]) ||
+        isTRUE(.x[["output_type_id_params"]][["is_required"]])
+
+      null_output_type_ids(is_required)
     }
   )
 }
 
+valid_output_type_ids <- function(output_type_ids) {
+  # If output_type_id values are provided and when dealing with an older
+  # config version that has "required" and"optional" fields or  extract output_type_id values
+  has_output_type_ids <- !is.null(output_type_ids)
+  pre_v4 <- isTRUE(
+    all.equal(
+      sort(names(output_type_ids)),
+      sort(c("required", "optional"))
+    )
+  )
+  # In post v4 config schema versions, when not NULL, a single `required` element is
+  # a valid output type id configuration and should be returned as is
+  required_not_null <- !is.null(output_type_ids[["required"]])
+
+  # Valid output type id configurations cannot be `NULL` and must either:
+  # have both `required` and `optional` elements or
+  # be a single non-NULL `required` element in post v4 schema versions
+  has_output_type_ids && (pre_v4 || required_not_null)
+}
+
+# Create a list of NULL or NA required and optional output type id values depending
+# on whether the output type is required or optional. Allows us to use current
+# infrastructure to convert `NULL`s to `NA`s in a back-compatible way.
+null_output_type_ids <- function(is_required) {
+  if (is_required) {
+    list(required = NA, optional = NULL)
+  } else {
+    list(required = NULL, optional = NA)
+  }
+}
+
 validate_output_types <- function(output_types, config_tasks, round_id,
                                   call = rlang::caller_call()) {
   checkmate::assert_character(output_types, null.ok = TRUE)
diff --git a/tests/testthat/test-expand_model_out_grid.R b/tests/testthat/test-expand_model_out_grid.R
index 9d407ad3..24888b31 100644
--- a/tests/testthat/test-expand_model_out_grid.R
+++ b/tests/testthat/test-expand_model_out_grid.R
@@ -551,5 +551,19 @@ test_that("(#123) expand_output_type_grid() returns expected outputs with option
   )
   expect_equal(nrow(i_have_eight_rows), 8)
   expect_equal(ncol(i_have_eight_rows), 3)
+})
+
+test_that("v4 point estimate output type IDs extracted correctly as NAs", {
+  hub_path <- system.file("testhubs", "v4", "flusight", package = "hubUtils")
+  file_name <- "hub-baseline/2023-05-01-hub-baseline.csv"
+  round_id <- parse_file_name(file_name)$round_id
+  config_tasks <- suppressWarnings(read_config(hub_path = hub_path))
 
+  expect_true(
+    expand_model_out_grid(
+      config_tasks = config_tasks,
+      round_id = round_id,
+      output_types = "mean",
+    )[["output_type_id"]] |> is.na() |> all()
+  )
 })

From bb36c4deeb286159e9525bc2ac7348e97f72cb6c Mon Sep 17 00:00:00 2001
From: Anna Krystalli <annakrystalli@googlemail.com>
Date: Wed, 13 Nov 2024 18:13:05 +0200
Subject: [PATCH 2/2] appease lintr

---
 R/expand_model_out_grid.R                   | 14 +++++++-------
 tests/testthat/test-expand_model_out_grid.R |  4 +++-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/R/expand_model_out_grid.R b/R/expand_model_out_grid.R
index c7d24454..d416962d 100644
--- a/R/expand_model_out_grid.R
+++ b/R/expand_model_out_grid.R
@@ -197,7 +197,7 @@ expand_model_out_grid <- function(config_tasks,
   output_type_l <- subset_rnd_output_types(round_config, output_types) %>%
     extract_rnd_output_type_ids(config_tid) %>%
     process_grid_inputs(required_vals_only = required_vals_only) %>%
-    purrr::map(~purrr::compact(.x))
+    purrr::map(~ purrr::compact(.x))
 
   # Expand output grid individually for each modeling task and output type.
   grid <- purrr::map2(
@@ -300,7 +300,7 @@ expand_output_type_grid <- function(task_id_values,
 fix_round_id <- function(x, round_id, round_config, round_ids) {
   if (round_config[["round_id_from_variable"]] && !is.null(round_id)) {
     round_id <- rlang::arg_match(round_id,
-                                 values = round_ids
+      values = round_ids
     )
     round_id_var <- round_config[["round_id"]]
     purrr::map(
@@ -407,11 +407,11 @@ null_taskids_to_na <- function(model_task) {
     model_task, ~ all(purrr::map_lgl(.x, is.null))
   )
   purrr::modify_if(model_task,
-                   .p = to_na,
-                   ~ list(
-                     required = NA,
-                     optional = NULL
-                   )
+    .p = to_na,
+    ~ list(
+      required = NA,
+      optional = NULL
+    )
   )
 }
 
diff --git a/tests/testthat/test-expand_model_out_grid.R b/tests/testthat/test-expand_model_out_grid.R
index 24888b31..6f0452d5 100644
--- a/tests/testthat/test-expand_model_out_grid.R
+++ b/tests/testthat/test-expand_model_out_grid.R
@@ -564,6 +564,8 @@ test_that("v4 point estimate output type IDs extracted correctly as NAs", {
       config_tasks = config_tasks,
       round_id = round_id,
       output_types = "mean",
-    )[["output_type_id"]] |> is.na() |> all()
+    )[["output_type_id"]] |>
+      is.na() |>
+      all()
   )
 })