Skip to content

Commit

Permalink
propagate output subsetting and derived_task_ids arg to spl checks
Browse files Browse the repository at this point in the history
  • Loading branch information
annakrystalli committed Aug 14, 2024
1 parent 2bc332c commit 7fe78b4
Show file tree
Hide file tree
Showing 19 changed files with 204 additions and 40 deletions.
18 changes: 12 additions & 6 deletions R/check_tbl_spl_compound_taskid_set.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' Column types must **all be character**.
#' @inherit check_tbl_colnames params
#' @inherit check_tbl_colnames return
#' @inheritParams expand_model_out_grid
#' @details If the check fails, the output of the check includes an `errors` element,
#' a list of items, one for each modeling task failing validation.
#' The structure depends on the reason the check failed.
Expand All @@ -31,24 +32,25 @@
#' See [hubverse documentation on samples](https://hubverse.io/en/latest/user-guide/sample-output-type.html)
#' for more details.
#' @export
check_tbl_spl_compound_taskid_set <- function(tbl, round_id, file_path, hub_path) {
check_tbl_spl_compound_taskid_set <- function(tbl, round_id, file_path, hub_path,
derived_task_ids = NULL) {
config_tasks <- hubUtils::read_config(hub_path, "tasks")

if (isFALSE(has_spls_tbl(tbl)) || isFALSE(hubUtils::is_v3_config(config_tasks))) {
return(skip_v3_spl_check(file_path))
}

compound_taskid_set <- get_tbl_compound_taskid_set(tbl, config_tasks, round_id,
compact = FALSE, error = FALSE
compound_taskid_set <- get_tbl_compound_taskid_set(
tbl, config_tasks, round_id,
compact = FALSE, error = FALSE,
derived_task_ids = NULL
)

check <- purrr::map_lgl(
compound_taskid_set,
~ is.null(attr(.x, "errors"))
) |> all()



capture_check_cnd(
check = check,
file_path = file_path,
Expand All @@ -59,7 +61,11 @@ check_tbl_spl_compound_taskid_set <- function(tbl, round_id, file_path, hub_path
details = compile_msg(compound_taskid_set),
errors = compile_errors(compound_taskid_set),
error = TRUE,
compound_taskid_set = if (check) { compound_taskid_set } else { NA }
compound_taskid_set = if (check) {
compound_taskid_set
} else {
NA
}
)
}

Expand Down
8 changes: 6 additions & 2 deletions R/check_tbl_spl_compound_tid.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' @param tbl a tibble/data.frame of the contents of the file being validated. Column types must **all be character**.
#' @inherit check_tbl_colnames params
#' @inherit check_tbl_colnames return
#' @inheritParams expand_model_out_grid
#' @param compound_taskid_set a list of `compound_taskid_set`s (characters vector of compound task IDs),
#' one for each modeling task. Used to override the compound task ID set in the config file,
#' for example, when validating coarser samples.
Expand All @@ -17,7 +18,8 @@
#' for more details.
#' @export
check_tbl_spl_compound_tid <- function(tbl, round_id, file_path, hub_path,
compound_taskid_set = NULL) {
compound_taskid_set = NULL,
derived_task_ids = NULL) {
if (!is.null(compound_taskid_set) && isTRUE(is.na(compound_taskid_set))) {
cli::cli_abort("Valid {.var compound_taskid_set} must be provided.")
}
Expand All @@ -33,7 +35,9 @@ check_tbl_spl_compound_tid <- function(tbl, round_id, file_path, hub_path,
return(skip_v3_spl_check(file_path))
}

hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks, compound_taskid_set)
hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks, compound_taskid_set,
derived_task_ids = derived_task_ids
)
# TODO: Currently, samples must strictly match the compound task ID set expectations
# and cannot handle coarser-grained compound task ID sets.
n_tbl <- hash_tbl[hash_tbl$n_compound_idx > 1L, ]
Expand Down
7 changes: 5 additions & 2 deletions R/check_tbl_spl_n.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
#' for more details.
#' @export
check_tbl_spl_n <- function(tbl, round_id, file_path, hub_path,
compound_taskid_set = NULL) {
compound_taskid_set = NULL,
derived_task_ids = NULL) {
if (!is.null(compound_taskid_set) && isTRUE(is.na(compound_taskid_set))) {
cli::cli_abort("Valid {.var compound_taskid_set} must be provided.")
}
Expand All @@ -32,7 +33,9 @@ check_tbl_spl_n <- function(tbl, round_id, file_path, hub_path,
return(skip_v3_spl_check(file_path))
}

hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks, compound_taskid_set)
hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks, compound_taskid_set,
derived_task_ids = derived_task_ids
)
n_ranges <- get_round_spl_n_ranges(config_tasks, round_id)

n_tbl <- dplyr::group_by(hash_tbl, .data$compound_idx) %>%
Expand Down
6 changes: 4 additions & 2 deletions R/check_tbl_spl_non_compound_tid.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
#' for more details.
#' @export
check_tbl_spl_non_compound_tid <- function(tbl, round_id, file_path, hub_path,
compound_taskid_set = NULL) {
compound_taskid_set = NULL,
derived_task_ids = NULL) {
if (!is.null(compound_taskid_set) && isTRUE(is.na(compound_taskid_set))) {
cli::cli_abort("Valid {.var compound_taskid_set} must be provided.")
}
Expand All @@ -33,7 +34,8 @@ check_tbl_spl_non_compound_tid <- function(tbl, round_id, file_path, hub_path,
return(skip_v3_spl_check(file_path))
}

hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks, compound_taskid_set)
hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks, compound_taskid_set,
derived_task_ids = derived_task_ids)

n_tbl <- dplyr::summarise(
hash_tbl,
Expand Down
14 changes: 10 additions & 4 deletions R/compound_taskid-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' @param error Logical. If TRUE, an error will be thrown if the compound task ID set is not valid.
#' If FALSE and an error is detected, the detected compound task ID set will be
#' returned with error attributes attached.
#' @inheritParams expand_model_out_grid
#'
#' @return A list of vectors of compound task IDs detected in the tbl, one for each
#' modeling task in the round. If `compact` is TRUE, modeling tasks returning NULL
Expand All @@ -29,11 +30,14 @@
#' )
#'
get_tbl_compound_taskid_set <- function(tbl, config_tasks, round_id,
compact = TRUE, error = TRUE) {
compact = TRUE, error = TRUE,
derived_task_ids = NULL) {
if (!inherits(tbl, "tbl_df")) {
tbl <- dplyr::as_tibble(tbl)
}

if (!is.null(derived_task_ids)) {
tbl[, derived_task_ids] <- NA_character_
}
tbl <- tbl[tbl$output_type == "sample", names(tbl) != "value"]
out_tid <- hubUtils::std_colnames["output_type_id"]

Expand All @@ -48,10 +52,12 @@ get_tbl_compound_taskid_set <- function(tbl, config_tasks, round_id,
round_id = round_id,
all_character = TRUE,
include_sample_ids = FALSE,
bind_model_tasks = FALSE
bind_model_tasks = FALSE,
output_types = "sample",
derived_task_ids = derived_task_ids
),
function(.x) {
if (!has_spls_tbl(.x)) {
if (nrow(.x) == 0L) {
return(NULL)
}
dplyr::inner_join(tbl, .x[, names(.x) != out_tid],
Expand Down
20 changes: 11 additions & 9 deletions R/v3-sample-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,32 @@
# compound task id sets across modeling tasks). This is achieved by using a full
# join between tbl sample data and the model output data sample grid for each
# modeling task. This means that only valid task id combinations are considered.
spl_hash_tbl <- function(tbl, round_id, config_tasks, compound_taskid_set = NULL) {
spl_hash_tbl <- function(tbl, round_id, config_tasks, compound_taskid_set = NULL,
derived_task_ids = NULL) {
tbl <- tbl[tbl$output_type == "sample", names(tbl) != "value"]
if (!is.null(derived_task_ids)) {
tbl[, derived_task_ids] <- NA_character_
}

mt_spl_grid <- expand_model_out_grid(
config_tasks = config_tasks,
round_id = round_id,
all_character = TRUE,
include_sample_ids = TRUE,
bind_model_tasks = FALSE,
compound_taskid_set = compound_taskid_set
compound_taskid_set = compound_taskid_set,
output_types = "sample",
derived_task_ids = derived_task_ids
) %>%
stats::setNames(seq_along(.))

mt_tbls <- purrr::map(
mt_spl_grid,
function(.x) {
if (is.null(.x) || !has_spls_tbl(.x)) {
if (nrow(.x) == 0L) {
NULL
} else {
dplyr::filter(
.x, .data$output_type == "sample"
) %>%
dplyr::rename(compound_idx = "output_type_id") %>%
dplyr::rename(.x, compound_idx = "output_type_id") %>%
dplyr::inner_join(tbl, .,
by = setdiff(
names(tbl),
Expand Down Expand Up @@ -59,12 +62,11 @@ spl_hash_tbl <- function(tbl, round_id, config_tasks, compound_taskid_set = NULL
)
) %>%
purrr::compact() %>%
purrr::imap( ~ dplyr::mutate(.x, mt_id = as.integer(.y))) %>% # add mt_id
purrr::imap(~ dplyr::mutate(.x, mt_id = as.integer(.y))) %>% # add mt_id
purrr::list_rbind()
}

get_mt_spl_hash_tbl <- function(tbl, compound_taskids, round_task_ids) {

if (is.null(tbl)) {
return(NULL)
}
Expand Down
12 changes: 11 additions & 1 deletion man/check_tbl_spl_compound_taskid_set.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/check_tbl_spl_compound_tid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion man/check_tbl_spl_n.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/check_tbl_spl_non_compound_tid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/get_tbl_compound_taskid_set.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions tests/testthat/_snaps/check_tbl_spl_compound_taskid_set.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,13 @@
$ use_cli_format : logi TRUE
- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ...

# Ignoring derived_task_ids in check_tbl_spl_compound_taskid_set works

Code
check_tbl_spl_compound_taskid_set(tbl, round_id, file_path, hub_path,
derived_task_ids = "target_end_date")
Output
<message/check_success>
Message:
All samples in a model task conform to single, unique compound task ID set that matches or is coarser than the configured `compound_taksid_set`.

10 changes: 10 additions & 0 deletions tests/testthat/_snaps/check_tbl_spl_compound_tid.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,13 @@
Message:
Each sample compound task ID contains single, unique value.

# Ignoring derived_task_ids in check_tbl_spl_compound_tid works

Code
check_tbl_spl_compound_tid(tbl, round_id, file_path, hub_path,
derived_task_ids = "target_end_date")
Output
<message/check_success>
Message:
Each sample compound task ID contains single, unique value.

11 changes: 10 additions & 1 deletion tests/testthat/_snaps/check_tbl_spl_n.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@

# Overriding compound_taskid_set in check_tbl_spl_compound_tid works
# Overriding compound_taskid_set in check_tbl_spl_n works

Code
str(check_tbl_spl_n(tbl_coarse, round_id, file_path, hub_path))
Expand Down Expand Up @@ -244,3 +244,12 @@
Warning:
Number of samples per compound idx not consistent. Sample numbers supplied per compound idx vary between 9 and 10. See `errors` attribute for details.

# Ignoring derived_task_ids in check_tbl_spl_n works

Code
check_tbl_spl_n(tbl, round_id, file_path, hub_path, derived_task_ids = "target_end_date")
Output
<message/check_success>
Message:
Required samples per compound idx task present.

10 changes: 10 additions & 0 deletions tests/testthat/_snaps/check_tbl_spl_non_compound_tid.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,13 @@
Message:
Task ID combinations of non compound task id values consistent across modeling task samples.

# Ignoring derived_task_ids in check_tbl_spl_compound_tid works

Code
check_tbl_spl_non_compound_tid(tbl, round_id, file_path, hub_path,
derived_task_ids = "target_end_date")
Output
<message/check_success>
Message:
Task ID combinations of non compound task id values consistent across modeling task samples.

Loading

0 comments on commit 7fe78b4

Please sign in to comment.