From aa31133eaacaab94cd6934fedef1b0838e422427 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 11 Dec 2024 12:05:17 +0200 Subject: [PATCH] Surface force_output_types in submission_tmpl. Resolves #162 --- R/submission_tmpl.R | 57 ++++++++++++++++++++++++--- man/submission_tmpl.Rd | 30 +++++++++++++- tests/testthat/test-submission_tmpl.R | 57 +++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 7 deletions(-) diff --git a/R/submission_tmpl.R b/R/submission_tmpl.R index 276078ec..4d77e88c 100644 --- a/R/submission_tmpl.R +++ b/R/submission_tmpl.R @@ -20,8 +20,8 @@ #' values only. #' #' @details -#' For task IDs or output_type_ids where all values are optional, by default, columns -#' are included as columns of `NA`s when `required_vals_only = TRUE`. +#' For task IDs where all values are optional, by default, columns +#' are created as columns of `NA`s when `required_vals_only = TRUE`. #' When such columns exist, the function returns a tibble with zero rows, as no #' complete cases of required value combinations exists. #' _(Note that determination of complete cases does excludes valid `NA` @@ -29,6 +29,16 @@ #' To return a template of incomplete required cases, which includes `NA` columns, use #' `complete_cases_only = FALSE`. #' +#' To include output types that are optional in the submission template +#' when `required_vals_only = TRUE` and `complete_cases_only = FALSE`, use +#' `force_output_types = TRUE`. Use this in combination with sub-setting for +#' output types you plan to submit via argument `output_types` to create a +#' submission template customised to your submission plans. +#' _Tip: to ensure you create a template with all required output types, it's +#' a good idea to first run the functions without subsetting or forcing output +#' types and examing the unique values in `output_type` to check which output +#' types are required._ +#' #' When sample output types are included in the output, the `output_type_id` #' column contains example sample indexes which are useful for identifying the #' compound task ID structure of multivariate sampling distributions in particular, @@ -116,8 +126,19 @@ #' derived_task_ids = "target_end_date", #' complete_cases_only = FALSE #' ) +#' # Force optional output type, in this case "mean". +#' submission_tmpl( +#' config_tasks = config_tasks, +#' round_id = "2022-12-12", +#' required_vals_only = TRUE, +#' output_types = c("pmf", "quantile", "mean"), +#' force_output_types = TRUE, +#' derived_task_ids = "target_end_date", +#' complete_cases_only = FALSE +#' ) submission_tmpl <- function(hub_con, config_tasks, round_id, required_vals_only = FALSE, + force_output_types = FALSE, complete_cases_only = TRUE, compound_taskid_set = NULL, output_types = NULL, @@ -138,15 +159,42 @@ submission_tmpl <- function(hub_con, config_tasks, round_id, derived_task_ids, config_tasks, round_id ) } - tmpl_df <- expand_model_out_grid(config_tasks, round_id = round_id, required_vals_only = required_vals_only, include_sample_ids = TRUE, compound_taskid_set = compound_taskid_set, output_types = output_types, - derived_task_ids = derived_task_ids + derived_task_ids = derived_task_ids, + force_output_types = force_output_types ) + if (nrow(tmpl_df) == 0L && !complete_cases_only) { + # If all output_types are optional, expand_model_out_grid returns + # a zero row and column data.frame. To attempt to expand required task id + # values when complete_cases_only = FALSE, we use + # force_output_types = TRUE to force the output types to be included. We + # then remove output type related columns and create a data.frame of + # required task id vales only. + tmpl_df <- expand_model_out_grid( + config_tasks, + round_id = round_id, + required_vals_only = required_vals_only, + include_sample_ids = TRUE, + compound_taskid_set = compound_taskid_set, + output_types = output_types, + derived_task_ids = derived_task_ids, + force_output_types = TRUE + ) + tmpl_df <- tmpl_df[, names(tmpl_df) != hubUtils::std_colnames[c( + "output_type", + "output_type_id", + "value" + )]] |> + unique() + } + if (nrow(tmpl_df) == 0L) { + return(tmpl_df) + } tmpl_cols <- c( hubUtils::get_round_task_id_names( @@ -155,7 +203,6 @@ submission_tmpl <- function(hub_con, config_tasks, round_id, ), hubUtils::std_colnames[names(hubUtils::std_colnames) != "model_id"] ) - # Add NA columns for value and all optional cols na_cols <- tmpl_cols[!tmpl_cols %in% names(tmpl_df)] tmpl_df[, na_cols] <- NA diff --git a/man/submission_tmpl.Rd b/man/submission_tmpl.Rd index 23ae4ea9..f503a566 100644 --- a/man/submission_tmpl.Rd +++ b/man/submission_tmpl.Rd @@ -9,6 +9,7 @@ submission_tmpl( config_tasks, round_id, required_vals_only = FALSE, + force_output_types = FALSE, complete_cases_only = TRUE, compound_taskid_set = NULL, output_types = NULL, @@ -31,6 +32,11 @@ contains only a single round.} \item{required_vals_only}{Logical. Whether to return only combinations of Task ID and related output type ID required values.} +\item{force_output_types}{Logical. Whether to force all output types to be required. +If \code{TRUE}, all output type ID values are treated as required regardless +of the value of the \code{is_required} property. Useful for creating grids of required +values for optional output types.} + \item{complete_cases_only}{Logical. If \code{TRUE} (default) and \code{required_vals_only = TRUE}, only rows with complete cases of combinations of required values are returned. If \code{FALSE}, rows with incomplete cases of combinations of required values @@ -62,8 +68,8 @@ values only. Create a model output submission file template } \details{ -For task IDs or output_type_ids where all values are optional, by default, columns -are included as columns of \code{NA}s when \code{required_vals_only = TRUE}. +For task IDs where all values are optional, by default, columns +are created as columns of \code{NA}s when \code{required_vals_only = TRUE}. When such columns exist, the function returns a tibble with zero rows, as no complete cases of required value combinations exists. \emph{(Note that determination of complete cases does excludes valid \code{NA} @@ -71,6 +77,16 @@ complete cases of required value combinations exists. To return a template of incomplete required cases, which includes \code{NA} columns, use \code{complete_cases_only = FALSE}. +To include output types that are optional in the submission template +when \code{required_vals_only = TRUE} and \code{complete_cases_only = FALSE}, use +\code{force_output_types = TRUE}. Use this in combination with sub-setting for +output types you plan to submit via argument \code{output_types} to create a +submission template customised to your submission plans. +\emph{Tip: to ensure you create a template with all required output types, it's +a good idea to first run the functions without subsetting or forcing output +types and examing the unique values in \code{output_type} to check which output +types are required.} + When sample output types are included in the output, the \code{output_type_id} column contains example sample indexes which are useful for identifying the compound task ID structure of multivariate sampling distributions in particular, @@ -157,4 +173,14 @@ submission_tmpl( derived_task_ids = "target_end_date", complete_cases_only = FALSE ) +# Force optional output type, in this case "mean". +submission_tmpl( + config_tasks = config_tasks, + round_id = "2022-12-12", + required_vals_only = TRUE, + output_types = c("pmf", "quantile", "mean"), + force_output_types = TRUE, + derived_task_ids = "target_end_date", + complete_cases_only = FALSE +) } diff --git a/tests/testthat/test-submission_tmpl.R b/tests/testthat/test-submission_tmpl.R index b7bfbe6b..26732e11 100644 --- a/tests/testthat/test-submission_tmpl.R +++ b/tests/testthat/test-submission_tmpl.R @@ -255,3 +255,60 @@ test_that("submission_tmpl ignoring derived task ids works", { ) ) }) + + +test_that("submission_tmpl force_output_types works", { + config_tasks <- read_config_file( + test_path( + "testdata", "configs", + "tasks-samples-v4.json" + ) + ) + # When force_output_types is not set, all output_types are optional, a + # zero row and column data.frame is returned by default. + req_non_force_default <- suppressMessages( + suppressWarnings( + submission_tmpl( + config_tasks = config_tasks, + round_id = "2022-10-22", + required_vals_only = TRUE, + output_types = "sample" + ) + ) + ) + expect_equal(dim(req_non_force), c(0L, 0L)) + # When force_output_types is not set, all output_types are optional and + # complete_cases_only = FALSE a data.frame containing required task ID + # values is returned, with all optional task ids and output type related + # columns set to NA. + req_non_force <- suppressMessages( + suppressWarnings( + submission_tmpl( + config_tasks = config_tasks, + round_id = "2022-10-22", + required_vals_only = TRUE, + output_types = "sample", + complete_cases_only = FALSE + ) + ) + ) + expect_equal(dim(req_non_force), c(4L, 9L)) + expect_equal(unique(req_non_force$output_type), NA_character_) + + # When force_output_types is TRUE, the requested output type should be + # returned. + req_force <- suppressMessages( + suppressWarnings( + submission_tmpl( + config_tasks = config_tasks, + round_id = "2022-10-22", + required_vals_only = TRUE, + force_output_types = TRUE, + output_types = "sample", + complete_cases_only = FALSE + ) + ) + ) + expect_equal(dim(req_force), c(4L, 9L)) + expect_equal(unique(req_force$output_type), "sample") +})