Skip to content

Commit

Permalink
Surface force_output_types in submission_tmpl. Resolves #162
Browse files Browse the repository at this point in the history
  • Loading branch information
annakrystalli committed Dec 11, 2024
1 parent 4f511c2 commit aa31133
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 7 deletions.
57 changes: 52 additions & 5 deletions R/submission_tmpl.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,25 @@
#' values only.
#'
#' @details
#' For task IDs or output_type_ids where all values are optional, by default, columns
#' are included as columns of `NA`s when `required_vals_only = TRUE`.
#' For task IDs where all values are optional, by default, columns
#' are created as columns of `NA`s when `required_vals_only = TRUE`.
#' When such columns exist, the function returns a tibble with zero rows, as no
#' complete cases of required value combinations exists.
#' _(Note that determination of complete cases does excludes valid `NA`
#' `output_type_id` values in `"mean"` and `"median"` output types)._
#' To return a template of incomplete required cases, which includes `NA` columns, use
#' `complete_cases_only = FALSE`.
#'
#' To include output types that are optional in the submission template
#' when `required_vals_only = TRUE` and `complete_cases_only = FALSE`, use
#' `force_output_types = TRUE`. Use this in combination with sub-setting for
#' output types you plan to submit via argument `output_types` to create a
#' submission template customised to your submission plans.
#' _Tip: to ensure you create a template with all required output types, it's
#' a good idea to first run the functions without subsetting or forcing output
#' types and examing the unique values in `output_type` to check which output
#' types are required._
#'
#' When sample output types are included in the output, the `output_type_id`
#' column contains example sample indexes which are useful for identifying the
#' compound task ID structure of multivariate sampling distributions in particular,
Expand Down Expand Up @@ -116,8 +126,19 @@
#' derived_task_ids = "target_end_date",
#' complete_cases_only = FALSE
#' )
#' # Force optional output type, in this case "mean".
#' submission_tmpl(
#' config_tasks = config_tasks,
#' round_id = "2022-12-12",
#' required_vals_only = TRUE,
#' output_types = c("pmf", "quantile", "mean"),
#' force_output_types = TRUE,
#' derived_task_ids = "target_end_date",
#' complete_cases_only = FALSE
#' )
submission_tmpl <- function(hub_con, config_tasks, round_id,
required_vals_only = FALSE,
force_output_types = FALSE,
complete_cases_only = TRUE,
compound_taskid_set = NULL,
output_types = NULL,
Expand All @@ -138,15 +159,42 @@ submission_tmpl <- function(hub_con, config_tasks, round_id,
derived_task_ids, config_tasks, round_id
)
}

tmpl_df <- expand_model_out_grid(config_tasks,
round_id = round_id,
required_vals_only = required_vals_only,
include_sample_ids = TRUE,
compound_taskid_set = compound_taskid_set,
output_types = output_types,
derived_task_ids = derived_task_ids
derived_task_ids = derived_task_ids,
force_output_types = force_output_types
)
if (nrow(tmpl_df) == 0L && !complete_cases_only) {
# If all output_types are optional, expand_model_out_grid returns
# a zero row and column data.frame. To attempt to expand required task id
# values when complete_cases_only = FALSE, we use
# force_output_types = TRUE to force the output types to be included. We
# then remove output type related columns and create a data.frame of
# required task id vales only.
tmpl_df <- expand_model_out_grid(
config_tasks,
round_id = round_id,
required_vals_only = required_vals_only,
include_sample_ids = TRUE,
compound_taskid_set = compound_taskid_set,
output_types = output_types,
derived_task_ids = derived_task_ids,
force_output_types = TRUE
)
tmpl_df <- tmpl_df[, names(tmpl_df) != hubUtils::std_colnames[c(
"output_type",
"output_type_id",
"value"
)]] |>
unique()
}
if (nrow(tmpl_df) == 0L) {
return(tmpl_df)
}

tmpl_cols <- c(
hubUtils::get_round_task_id_names(
Expand All @@ -155,7 +203,6 @@ submission_tmpl <- function(hub_con, config_tasks, round_id,
),
hubUtils::std_colnames[names(hubUtils::std_colnames) != "model_id"]
)

# Add NA columns for value and all optional cols
na_cols <- tmpl_cols[!tmpl_cols %in% names(tmpl_df)]
tmpl_df[, na_cols] <- NA
Expand Down
30 changes: 28 additions & 2 deletions man/submission_tmpl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 57 additions & 0 deletions tests/testthat/test-submission_tmpl.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,60 @@ test_that("submission_tmpl ignoring derived task ids works", {
)
)
})


test_that("submission_tmpl force_output_types works", {
config_tasks <- read_config_file(
test_path(
"testdata", "configs",
"tasks-samples-v4.json"
)
)
# When force_output_types is not set, all output_types are optional, a
# zero row and column data.frame is returned by default.
req_non_force_default <- suppressMessages(
suppressWarnings(
submission_tmpl(
config_tasks = config_tasks,
round_id = "2022-10-22",
required_vals_only = TRUE,
output_types = "sample"
)
)
)
expect_equal(dim(req_non_force), c(0L, 0L))
# When force_output_types is not set, all output_types are optional and
# complete_cases_only = FALSE a data.frame containing required task ID
# values is returned, with all optional task ids and output type related
# columns set to NA.
req_non_force <- suppressMessages(
suppressWarnings(
submission_tmpl(
config_tasks = config_tasks,
round_id = "2022-10-22",
required_vals_only = TRUE,
output_types = "sample",
complete_cases_only = FALSE
)
)
)
expect_equal(dim(req_non_force), c(4L, 9L))
expect_equal(unique(req_non_force$output_type), NA_character_)

# When force_output_types is TRUE, the requested output type should be
# returned.
req_force <- suppressMessages(
suppressWarnings(
submission_tmpl(
config_tasks = config_tasks,
round_id = "2022-10-22",
required_vals_only = TRUE,
force_output_types = TRUE,
output_types = "sample",
complete_cases_only = FALSE
)
)
)
expect_equal(dim(req_force), c(4L, 9L))
expect_equal(unique(req_force$output_type), "sample")
})

0 comments on commit aa31133

Please sign in to comment.