diff --git a/DESCRIPTION b/DESCRIPTION index 54aea2cb..e1f04abe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,6 +31,7 @@ Imports: config, dplyr, fs, + gh, hubUtils (>= 0.0.0.9014), jsonlite, jsonvalidate, @@ -44,6 +45,7 @@ Imports: yaml Suggests: covr, + gert, mockery, rmarkdown, testthat (>= 3.0.0), diff --git a/NAMESPACE b/NAMESPACE index b04b619f..830054b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,17 +1,21 @@ # Generated by roxygen2: do not edit by hand +S3method(combine,hub_validations) S3method(print,hub_validations) +S3method(print,pr_hub_validations) export("%>%") +export(as_hub_validations) export(capture_check_cnd) export(capture_check_info) export(capture_exec_error) export(capture_exec_warning) -export(cfg_check_tbl_col_timediff) +export(check_config_hub_valid) export(check_file_exists) export(check_file_format) export(check_file_location) export(check_file_name) export(check_file_read) +export(check_for_errors) export(check_metadata_file_exists) export(check_metadata_file_ext) export(check_metadata_file_location) @@ -19,8 +23,10 @@ export(check_metadata_file_name) export(check_metadata_matches_schema) export(check_metadata_schema_exists) export(check_submission_metadata_file_exists) +export(check_submission_time) export(check_tbl_col_types) export(check_tbl_colnames) +export(check_tbl_match_round_id) export(check_tbl_rows_unique) export(check_tbl_unique_round_id) export(check_tbl_value_col) @@ -30,14 +36,27 @@ export(check_tbl_values) export(check_tbl_values_required) export(check_valid_round_id) export(check_valid_round_id_col) +export(combine) +export(is_any_error) export(is_error) +export(is_exec_error) +export(is_exec_warn) export(is_failure) export(is_info) export(is_success) +export(new_hub_validations) export(not_pass) +export(opt_check_tbl_col_timediff) +export(opt_check_tbl_counts_lt_popn) +export(opt_check_tbl_horizon_timediff) export(read_model_out_file) +export(try_check) export(validate_model_data) export(validate_model_file) export(validate_model_metadata) +export(validate_pr) export(validate_submission) +export(validate_submission_time) +importFrom(lubridate,"%within%") importFrom(magrittr,"%>%") +importFrom(rlang,"!!!") diff --git a/R/check_config_hub_valid.R b/R/check_config_hub_valid.R new file mode 100644 index 00000000..5aaed27b --- /dev/null +++ b/R/check_config_hub_valid.R @@ -0,0 +1,32 @@ +#' Check hub correctly configured +#' +#' Checks that `admin` and `tasks` configuration files in directory `hub-config` +#' are valid. +#' @inherit check_valid_round_id return params +#' +#' @export +check_config_hub_valid <- function(hub_path) { + valid_config <- hubUtils::validate_hub_config(hub_path) %>% + suppressMessages() %>% + suppressWarnings() + + check <- all(unlist(valid_config)) + + if (check) { + details <- NULL + } else { + details <- cli::format_inline( + "Config file{?s} {.val {names(valid_config)[!unlist(valid_config)]}} invalid." + ) + } + + capture_check_cnd( + check = check, + file_path = basename(hub_path), + msg_subject = "All hub config files", + msg_attribute = "valid.", + msg_verbs = c("are", "must be"), + error = TRUE, + details = details + ) +} diff --git a/R/check_for_errors.R b/R/check_for_errors.R new file mode 100644 index 00000000..5ea2e2ba --- /dev/null +++ b/R/check_for_errors.R @@ -0,0 +1,26 @@ +#' Raise conditions stored in a `hub_validations` object +#' +#' This is meant to be used in CI workflows to raise conditions from +#' `hub_validations` objects. +#' +#' @param x A `hub_validations` object +#' +#' @return An error if one of the elements of `x` is of class `check_failure`, +#' `check_error`, `check_exec_error` or `check_exec_warning`. +#' `TRUE` invisibly otherwise. +#' +#' @export +check_for_errors <- function(x) { + flag_checks <- x[purrr::map_lgl(x, ~not_pass(.x))] + + class(flag_checks) <- c("hub_validations", "list") + + if (length(flag_checks) > 0) { + print(flag_checks) + rlang::abort( + "\nThe validation checks produced some failures/errors reported above." + ) + } + + return(invisible(TRUE)) +} diff --git a/R/check_submission_date.R b/R/check_submission_date.R deleted file mode 100644 index e69de29b..00000000 diff --git a/R/check_submission_time.R b/R/check_submission_time.R new file mode 100644 index 00000000..f09664ea --- /dev/null +++ b/R/check_submission_time.R @@ -0,0 +1,61 @@ +#' Checks submission is within the valid submission window for a given round. +#' +#' @inherit check_tbl_col_types params return +#' +#' @importFrom lubridate %within% +#' @export +check_submission_time <- function(hub_path, file_path) { + config_tasks <- hubUtils::read_config(hub_path, "tasks") + submission_config <- get_file_round_config(file_path, hub_path)[["submissions_due"]] + hub_tz <- get_hub_timezone(hub_path) + + if (!is.null(submission_config[["relative_to"]])) { + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path + ) + relative_date <- as.Date( + unique(tbl[[submission_config[["relative_to"]]]]) + ) + submission_window <- get_submission_window( + start = relative_date + submission_config[["start"]], + end = relative_date + submission_config[["end"]], + hub_tz + ) + } else { + submission_window <- get_submission_window( + start = submission_config[["start"]], + end = submission_config[["end"]], + hub_tz + ) + } + check <- Sys.time() %within% submission_window + + if (check) { + details <- NULL + } else { + details <- cli::format_inline( + "Current time {.val {Sys.time()}} is outside window {.val {submission_window}}." + ) + } + + capture_check_cnd( + check = check, + file_path = file_path, + msg_subject = "Submission time", + msg_attribute = "within accepted submission window for round.", + details = details + ) +} + +get_submission_window <- function(start, end, hub_tz) { + submit_window_start <- lubridate::ymd(start, tz = hub_tz) + submit_window_end <- lubridate::ymd_hms(paste(end, "23:59:59"), + tz = hub_tz + ) + + lubridate::interval( + start = submit_window_start, + end = submit_window_end + ) +} diff --git a/R/check_tbl_match_round_id.R b/R/check_tbl_match_round_id.R new file mode 100644 index 00000000..ae0f3dd1 --- /dev/null +++ b/R/check_tbl_match_round_id.R @@ -0,0 +1,50 @@ +#' Check model output data tbl round ID matches submission round ID. +#' +#' @inherit check_tbl_unique_round_id params details return +#' @export +check_tbl_match_round_id <- function(tbl, file_path, hub_path, + round_id_col = NULL) { + check_round_id_col <- check_valid_round_id_col( + tbl, file_path, hub_path, round_id_col) + + if (is_info(check_round_id_col)) { + return(check_round_id_col) + } + if (is_failure(check_round_id_col)) { + class(check_round_id_col)[1] <- "check_error" + check_round_id_col$call <- rlang::call_name(rlang::current_call()) + return(check_round_id_col) + } + + if (is.null(round_id_col)) { + round_id_col <- get_file_round_id_col(file_path, hub_path) + } + round_id <- parse_file_name(file_path)$round_id + + round_id_match <- tbl[[round_id_col]] == round_id + check <- all(round_id_match) + + if (check) { + details <- NULL + } else { + unmatched_round_ids <- unique(tbl[[round_id_col]][!round_id_match]) + details <- cli::format_inline( + "{.var round_id} {cli::qty(length(unmatched_round_ids))} + value{?s} {.val {unmatched_round_ids}} {?does/do} not match + submission {.var round_id} {.val {round_id}}" + ) + } + + capture_check_cnd( + check = check, + file_path = file_path, + msg_subject = cli::format_inline( + "All {.var round_id_col} {.val {round_id_col}} values" + ), + msg_attribute = "submission {.var round_id} from file name.", + msg_verbs = c("match", "must match"), + error = TRUE, + details = details + ) + +} diff --git a/R/check_tbl_values_required.R b/R/check_tbl_values_required.R index 0734226e..cd106eb7 100644 --- a/R/check_tbl_values_required.R +++ b/R/check_tbl_values_required.R @@ -22,14 +22,13 @@ check_tbl_values_required <- function(tbl, round_id, file_path, hub_path) { round_id = round_id, required_vals_only = FALSE, all_character = TRUE, - as_arrow_table = TRUE, + as_arrow_table = FALSE, bind_model_tasks = FALSE ) tbl <- purrr::map( full, - ~ dplyr::inner_join(.x, tbl, by = names(tbl))[, names(tbl)] %>% - tibble::as_tibble() + ~ dplyr::inner_join(.x, tbl, by = names(tbl))[, names(tbl)] ) missing_df <- purrr::pmap( @@ -183,7 +182,7 @@ missing_req_rows <- function(opt_cols, x, mask, req, full, split_req = FALSE) { # avoids erroneously returning missing required values that are not applicable # to a given model task or output type. expected_req <- dplyr::inner_join(req, - tibble::as_tibble(applicaple_full[, names(req)]), + applicaple_full[, names(req)], by = names(req) ) %>% unique() @@ -200,7 +199,7 @@ missing_req_rows <- function(opt_cols, x, mask, req, full, split_req = FALSE) { unique(x[, opt_cols]) )[, names(x)] } else { - tibble::as_tibble(full[1, names(x)])[0, ] + full[1, names(x)][0, ] } } diff --git a/R/cnd_utils.R b/R/cnd_utils.R index 8d98d585..ae43ef94 100644 --- a/R/cnd_utils.R +++ b/R/cnd_utils.R @@ -28,3 +28,21 @@ is_info <- function(x) { not_pass <- function(x) { !inherits(x, "check_success") & !inherits(x, "check_info") } + +#' @describeIn is_success Is exec error? +#' @export +is_exec_error <- function(x) { + inherits(x, "check_exec_error") +} + +#' @describeIn is_success Is exec warning? +#' @export +is_exec_warn <- function(x) { + inherits(x, "check_exec_warn") +} + +#' @describeIn is_success Is error or exec error? +#' @export +is_any_error <- function(x) { + inherits(x, "check_error") | inherits(x, "check_exec_error") +} diff --git a/R/execute_custom_checks.R b/R/execute_custom_checks.R index b432b68e..29f75f02 100644 --- a/R/execute_custom_checks.R +++ b/R/execute_custom_checks.R @@ -36,5 +36,5 @@ execute_custom_checks <- function(validations_cfg_path = NULL) { purrr::map( purrr::set_names(names(validations_cfg)), ~ exec_cfg_check(.x, validations_cfg, caller_env, caller_call) - ) + ) %>% as_hub_validations() } diff --git a/R/hub_validations_methods.R b/R/hub_validations_methods.R index 82d345e9..764bc9f8 100644 --- a/R/hub_validations_methods.R +++ b/R/hub_validations_methods.R @@ -6,29 +6,88 @@ #' #' @export print.hub_validations <- function(x, ...) { - + if (length(x) == 0L) { + msg <- cli::format_inline("Empty {.cls hub_validations}") + } else { msg <- stats::setNames( - paste( - fs::path_file(purrr::map_chr(x, "where")), - purrr::map_chr(x, "message"), - sep = ": " - ), - dplyr::case_when( - purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_success")) ~ "v", - purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_failure")) ~ "!", - purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_error")) ~ "x", - purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_info")) ~ "i", - TRUE ~ "*" - ) + paste( + fs::path_file(purrr::map_chr(x, "where")), + purrr::map_chr(x, "message"), + sep = ": " + ), + dplyr::case_when( + purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_success")) ~ "v", + purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_failure")) ~ "!", + purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_exec_warn")) ~ "!", + purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_error")) ~ "x", + purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_exec_error")) ~ "x", + purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_info")) ~ "i", + TRUE ~ "*" + ) ) + } + + octolog::octo_inform(msg) +} + + +#' Concatenate `hub_validations` S3 class objects +#' +#' @param ... `hub_validations` S3 class objects to be concatenated. +#' @return a `hub_validations` S3 class object. +#' +#' @export +combine <- function(...) { + UseMethod("combine") +} - octolog::octo_inform(msg) +#' @export +combine.hub_validations <- function(...) { + rlang::list2(...) %>% + purrr::compact() %>% + validate_internal_class(class = "hub_validations") + structure(c(...), + class = c("hub_validations", "list") + ) +} + +validate_internal_class <- function(x, class = c( + "hub_check", + "hub_validations" +)) { + if (length(x) == 0L) { + return(invisible(TRUE)) + } + class <- rlang::arg_match(class) + valid <- purrr::map_lgl(x, ~ inherits(.x, class)) + if (any(!valid)) { + cli::cli_abort( + c( + "!" = "All elements must inherit from class {.cls {class}}.", + "x" = "{cli::qty(sum(!valid))} Element{?s} with ind{?ex/ices} + {.val {which(!valid)}} {cli::qty(sum(!valid))} do{?es/} not." + ) + ) + } + invisible(TRUE) } summary.hub_validations <- function(x, ...) { - # TODO - NULL + # TODO + NULL +} +# TODO: Code to consider implementing more hierarchical printing of messages. +# Currently not implemented as pr_hub_validations class not implemented. +#' Print results of `validate_pr()` function as a bullet list +#' +#' @param x An object of class `pr_hub_validations` +#' @param ... Unused argument present for class consistency +#' +#' +#' @export +print.pr_hub_validations <- function(x, ...) { + purrr::map(x, print) } diff --git a/R/new_hub_validations.R b/R/new_hub_validations.R new file mode 100644 index 00000000..3d97e3a3 --- /dev/null +++ b/R/new_hub_validations.R @@ -0,0 +1,47 @@ +#' Create new or convert list to `hub_validations` S3 class object +#' +#' @param ... named elements to be included. Each element must be an object which +#' inherits from class ``. +#' @param x a list of named elements. Each element must be an object which +#' inherits from class ``. +#' +#' @return an S3 object of class ``. +#' @export +#' @describeIn new_hub_validations Create new `` S3 class object +#' @examples +#' new_hub_validations() +#' +#' hub_path <- system.file("testhubs/simple", package = "hubValidations") +#' file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" +#' new_hub_validations( +#' file_exists = check_file_exists(file_path, hub_path), +#' file_name = check_file_name(file_path) +#' ) +#' x <- list( +#' file_exists = check_file_exists(file_path, hub_path), +#' file_name = check_file_name(file_path) +#' ) +#' as_hub_validations(x) +new_hub_validations <- function(...) { + x <- rlang::dots_list(...) %>% + purrr::compact() + + validate_internal_class(x, class = "hub_check") + class(x) <- c("hub_validations", "list") + x +} + +#' @export +#' @describeIn new_hub_validations Convert list to `` S3 class object +as_hub_validations <- function(x) { + if (!inherits(x, "list")) { + cli::cli_abort( + c( + "x" = "{.var x} must inherit from class {.cls list} not {.cls {class(x)}}." + ) + ) + } + validate_internal_class(x, class = "hub_check") + class(x) <- c("hub_validations", "list") + x +} diff --git a/R/cfg_check_tbl_col_timediff.R b/R/opt_check_tbl_col_timediff.R similarity index 97% rename from R/cfg_check_tbl_col_timediff.R rename to R/opt_check_tbl_col_timediff.R index 3860a4f7..34856801 100644 --- a/R/cfg_check_tbl_col_timediff.R +++ b/R/opt_check_tbl_col_timediff.R @@ -6,7 +6,7 @@ #' @inherit check_tbl_colnames params #' @inherit check_tbl_col_types return #' @export -cfg_check_tbl_col_timediff <- function(tbl, file_path, hub_path, +opt_check_tbl_col_timediff <- function(tbl, file_path, hub_path, t0_colname, t1_colname, timediff = lubridate::weeks(2)) { diff --git a/R/opt_check_tbl_counts_lt_popn.R b/R/opt_check_tbl_counts_lt_popn.R new file mode 100644 index 00000000..5e3a0bd3 --- /dev/null +++ b/R/opt_check_tbl_counts_lt_popn.R @@ -0,0 +1,148 @@ +#' Check that predicted values per location are less than total location population. +#' +#' @param targets Either a single target key list or a list of multiple target key lists. +#' @param popn_file_path Character string. +#' Path to population data relative to the hub root. +#' Defaults to `auxiliary-data/locations.csv`. +#' @param popn_col Character string. +#' The name of the population size column in the population data set. +#' @param location_col Character string. +#' The name of the location column. +#' Used to join population data to submission file data. +#' Must be shared by both files. +#' @details +#' Should only be applied to rows containing count predictions. Use argument +#' `targets` to filter `tbl` data to appropriate count target rows. +#' +#' @inherit check_tbl_colnames params +#' @inherit check_tbl_col_types return +#' @export +#' @examples +#' hub_path <- system.file("testhubs/flusight", package = "hubValidations") +#' file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" +#' tbl <- hubValidations::read_model_out_file(file_path, hub_path) +#' # Single target key list +#' targets <- list("target" = "wk ahead inc flu hosp") +#' opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) +opt_check_tbl_counts_lt_popn <- function(tbl, file_path, hub_path, targets = NULL, + popn_file_path = "auxiliary-data/locations.csv", + popn_col = "population", + location_col = "location") { + checkmate::assert_choice(location_col, choices = names(tbl)) + tbl$row_id <- seq_along(tbl[[location_col]]) + + if (!is.null(targets)) { + assert_target_keys(targets, hub_path, file_path) + tbl <- filter_targets(tbl, targets) + if (nrow(tbl) == 0L) { + return( + capture_check_info( + file_path, + msg = "Target filtering returned tbl with zero rows. Check skipped." + ) + ) + } + } + + popn_full_path <- fs::path(hub_path, popn_file_path) + if (!fs::file_exists(popn_full_path)) { + cli::cli_abort( + "File not found at {.path {popn_file_path}}" + ) + } + popn <- switch(fs::path_ext(popn_full_path), + csv = arrow::read_csv_arrow(popn_full_path), + parquet = arrow::read_parquet(popn_full_path), + arrow = arrow::read_feather(popn_full_path) + ) + checkmate::assert_choice(location_col, choices = names(popn)) + checkmate::assert_choice(popn_col, choices = names(popn)) + popn <- popn[, c(location_col, popn_col)] + + tbl <- dplyr::left_join(tbl, popn, by = location_col) + + if (any(is.na(tbl[[popn_col]]))) { + invalid_location <- unique(tbl[[location_col]][is.na(tbl[[popn_col]])]) + cli::cli_abort( + "No match for {cli::qty(length(invalid_location))} location{?s} + {.val {invalid_location}} found in {.path {popn_file_path}}" + ) + } + + compare <- tbl[["value"]] < tbl[[popn_col]] + check <- all(compare) + + if (check) { + details <- NULL + } else { + details <- cli::format_inline("Affected rows: {.val {tbl$row_id[!compare]}}.") + } + + n_loc <- length(unique(tbl[[location_col]])) + + capture_check_cnd( + check = check, + file_path = file_path, + msg_subject = "Target counts ", + msg_verbs = c("are", "must be"), + msg_attribute = cli::format_inline( + "{cli::qty(n_loc)} less than location population size{?s}." + ), + details = details + ) +} + +assert_target_keys <- function(targets, hub_path, file_path) { + single_tk <- purrr::pluck_depth(targets) == 2L + if (single_tk) { + valid_target_keys <- validate_target_key(targets, hub_path, file_path) + } else { + valid_target_keys <- purrr::map_lgl( + targets, ~ validate_target_key(.x, hub_path, file_path) + ) + } + if (all(valid_target_keys)) { + return(invisible(TRUE)) + } + if (single_tk) { + cli::cli_abort("Target does not match any round target keys.") + } else { + n <- sum(valid_target_keys) + cli::cli_abort("{cli::qty(n)}Target{?s} with ind{?ex/ices} + {.val {which(!valid_target_keys)}} + {cli::qty(n)} do{?es/} not match any round target keys.") + } +} + +validate_target_key <- function(target, hub_path, file_path) { + any(purrr::map_lgl( + get_file_target_metadata(hub_path, file_path), + ~ identical(.x, target) + )) +} + +filter_expr <- function(filter) { + paste(paste0(".data$", names(filter)), + filter, + sep = " %in% " + ) %>% + paste(collapse = ";") %>% + rlang::parse_exprs() +} + +#' @importFrom rlang !!! +filter_targets <- function(tbl, targets) { + if (purrr::pluck_depth(targets) == 2L) { + targets <- purrr::map_if( + targets, + ~ is.character(.x) && length(.x) == 1L, + ~ paste0("'", .x, "'") + ) + dplyr::filter(tbl, !!!filter_expr(targets)) + } else { + purrr::map( + targets, + ~ dplyr::filter(tbl, !!!filter_expr(.x)) + ) %>% purrr::list_rbind() + } +} diff --git a/R/opt_check_tbl_horizon_timediff.R b/R/opt_check_tbl_horizon_timediff.R new file mode 100644 index 00000000..8b75254b --- /dev/null +++ b/R/opt_check_tbl_horizon_timediff.R @@ -0,0 +1,79 @@ +#' Check time difference between values in two date columns equal a defined period. +#' +#' @param t0_colname Character string. The name of the time zero date column. +#' @param t1_colname Character string. The name of the time zero + 1 time step date column. +#' @param horizon_colname Character string. The name of the horizon column. +#' Defaults to `"horizon"`. +#' @param timediff an object of class `lubridate` [`Period-class`] and length 1. +#' The period of a single horizon. Default to 1 week. +#' @inherit check_tbl_colnames params +#' @inherit check_tbl_col_types return +#' @export +opt_check_tbl_horizon_timediff <- function(tbl, file_path, hub_path, t0_colname, + t1_colname, horizon_colname = "horizon", + timediff = lubridate::weeks()) { + checkmate::assert_class(timediff, "Period") + checkmate::assert_scalar(timediff) + checkmate::assert_character(t0_colname, len = 1L) + checkmate::assert_character(t1_colname, len = 1L) + checkmate::assert_character(horizon_colname, len = 1L) + checkmate::assert_choice(t0_colname, choices = names(tbl)) + checkmate::assert_choice(t1_colname, choices = names(tbl)) + checkmate::assert_choice(horizon_colname, choices = names(tbl)) + + config_tasks <- hubUtils::read_config(hub_path, "tasks") + schema <- hubUtils::create_hub_schema(config_tasks, + partitions = NULL, + r_schema = TRUE + ) + assert_column_date(t0_colname, schema) + assert_column_date(t1_colname, schema) + assert_column_integer(horizon_colname, schema) + + if (!lubridate::is.Date(tbl[[t0_colname]])) { + tbl[, t0_colname] <- as.Date(tbl[[t0_colname]]) + } + if (!lubridate::is.Date(tbl[[t1_colname]])) { + tbl[, t1_colname] <- as.Date(tbl[[t1_colname]]) + } + if (!is.integer(tbl[[horizon_colname]])) { + tbl[, horizon_colname] <- as.integer(tbl[[horizon_colname]]) + } + + compare <- tbl[[t0_colname]] + (timediff * tbl[[horizon_colname]]) == tbl[[t1_colname]] + check <- all(compare) + if (check) { + details <- NULL + } else { + invalid_vals <- paste0( + tbl[[t1_colname]][!compare], + " (horizon = ", tbl[[horizon_colname]][!compare], ")" + ) %>% unique() + + details <- cli::format_inline( + "t1 var value{?s} {.val {invalid_vals}} are invalid." + ) + } + + capture_check_cnd( + check = check, + file_path = file_path, + msg_subject = cli::format_inline( + "Time differences between t0 var {.var {t0_colname}} and t1 var + {.var {t1_colname}}" + ), + msg_verbs = c("all match", "do not all match"), + msg_attribute = cli::format_inline("expected period of {.val {timediff}} * {.var {horizon_colname}}."), + details = details + ) +} + +assert_column_integer <- function(colname, schema) { + if (schema[colname] != "integer") { + cli::cli_abort( + "Column {.arg colname} must be configured as {.cls integer} not + {.cls {schema[colname]}}.", + call = rlang::caller_call() + ) + } +} diff --git a/R/try_check.R b/R/try_check.R new file mode 100644 index 00000000..165688c3 --- /dev/null +++ b/R/try_check.R @@ -0,0 +1,32 @@ +#' Wrap check expression to capture check execution errors +#' +#' @param expr check function expression to run. +#' @inheritParams check_tbl_colnames +#' +#' @return If `expr` executes correctly, the output of `expr` is returned. If +# ' execution fails, and object of class `` is returned. +#' The execution error message is attached as attribute `msg`. +#' @export +try_check <- function(expr, file_path) { + check <- try(expr, silent = TRUE) + if (inherits(check, "try-error")) { + return( + capture_exec_error( + file_path = file_path, + msg = attr(check, "condition")$message, + call = get_expr_call_name(expr) + ) + ) + } + check +} + +get_expr_call_name <- function(expr) { + call_name <- try(rlang::call_name(rlang::expr(expr)), + silent = TRUE + ) + if (inherits(call_name, "try-error")) { + return(NA) + } + call_name +} diff --git a/R/utils.R b/R/utils.R index 4f8aa505..aa2c7bb3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,6 +21,18 @@ get_hub_model_output_dir <- function(hub_path) { if (is.null(model_output_dir)) "model-output" else model_output_dir } +get_file_target_metadata <- function(hub_path, file_path) { + round_config <- get_file_round_config(file_path, hub_path) + + purrr::map( + round_config[["model_tasks"]], + ~ .x[["target_metadata"]] %>% + purrr::map(~ .x[["target_keys"]]) + ) %>% + unlist(recursive = FALSE) %>% + unique() +} + abs_file_path <- function(file_path, hub_path, subdir = c("model-output", "model-metadata", "hub-config")) { subdir <- match.arg(subdir) diff --git a/R/validate_model_data.R b/R/validate_model_data.R index 61ef3362..06c4cff8 100644 --- a/R/validate_model_data.R +++ b/R/validate_model_data.R @@ -11,8 +11,7 @@ #' validate_model_data(hub_path, file_path) validate_model_data <- function(hub_path, file_path, round_id_col = NULL, validations_cfg_path = NULL) { - checks <- list() - class(checks) <- c("hub_validations", "list") + checks <- new_hub_validations() file_meta <- parse_file_name(file_path) round_id <- file_meta$round_id @@ -54,6 +53,16 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, return(checks) } + checks$match_round_id <- check_tbl_match_round_id( + tbl, + round_id_col = round_id_col, + file_path = file_path, + hub_path = hub_path + ) + if (is_error(checks$match_round_id)) { + return(checks) + } + # -- Column level checks ---- checks$colnames <- check_tbl_colnames( tbl, @@ -113,8 +122,6 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, custom_checks <- execute_custom_checks( validations_cfg_path = validations_cfg_path ) - checks <- c(checks, custom_checks) - class(checks) <- c("hub_validations", "list") - checks + combine(checks, custom_checks) } diff --git a/R/validate_pr.R b/R/validate_pr.R new file mode 100644 index 00000000..b4287394 --- /dev/null +++ b/R/validate_pr.R @@ -0,0 +1,106 @@ +#' Validate Pull Request +#' +#' @param gh_repo GitHub repository address in the format `username/repo` +#' @param pr_number Number of the pull request to validate +#' @param round_id_col Character string. The name of the column containing +#' `round_id`s. Only required if files contain a column that contains `round_id` +#' details but has not been configured via `round_id_from_variable: true` and +#' `round_id:` in in hub `tasks.json` config file. +#' @inheritParams validate_model_file +#' @inheritParams validate_submission +#' +#' @return An object of class `hub_validations`. +#' @export +#' +#' @examples +#' \dontrun{ +#' validate_pr( +#' hub_path = "." +#' gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", +#' pr_number = 3 +#' ) +#' } +validate_pr <- function(hub_path = ".", gh_repo, pr_number, + round_id_col = NULL, validations_cfg_path = NULL, + skip_submit_window_check = FALSE) { + + model_output_dir <- get_hub_model_output_dir(hub_path) + model_metadata_dir <- "model-metadata" + validations <- new_hub_validations() + + validations$valid_config <- try_check(check_config_hub_valid(hub_path), + file_path = basename(hub_path)) + if (is_any_error(validations$valid_config)) { + return(validations) + } + + tryCatch({ + pr_files <- gh::gh( + "/repos/{gh_repo}/pulls/{pr_number}/files", + gh_repo = gh_repo, + pr_number = pr_number + ) + pr_filenames <- purrr::map_chr(pr_files, ~.x$filename) + model_output_files <- get_pr_dir_files(pr_filenames, + model_output_dir) + model_metadata_files <- get_pr_dir_files(pr_filenames, + model_metadata_dir) + + model_output_vals <- purrr::map(model_output_files, + ~validate_submission( + hub_path, file_path = .x, + validations_cfg_path = validations_cfg_path, + skip_submit_window_check = skip_submit_window_check, + skip_check_config = TRUE) + ) %>% purrr::list_flatten() %>% as_hub_validations() + + model_metadata_vals <- purrr::map(model_metadata_files, + ~validate_model_metadata( + hub_path, file_path = .x, + validations_cfg_path = validations_cfg_path) + ) %>% purrr::list_flatten() %>% as_hub_validations() + + validations <- combine( + validations, + model_output_vals, + model_metadata_vals + ) + }, + error = function(e) { + # This handler is used when an unrecoverable error is thrown. This can + # happen when, e.g., the csv file cannot be parsed by read_csv(). In this + # situation, we want to output all the validations until this point plus + # this "unrecoverable" error. + e <- capture_exec_error( + file_path = gh_repo, + msg = conditionMessage(e) + ) + validations <<- combine(validations, + new_hub_validations(e)) + }) + + inform_unvalidated_files(model_output_files, + model_metadata_files, + pr_filenames) + return(validations) +} + +get_pr_dir_files <- function(pr_filenames, dir_name) { + pr_filenames[ + fs::path_has_parent(pr_filenames, dir_name) + ] %>% fs::path_rel(dir_name) +} + +inform_unvalidated_files <- function(model_output_files, + model_metadata_files, + pr_filenames) { + validated_files <- c(model_output_files, model_metadata_files) + if (length(pr_filenames) != length(validated_files)) { + validated_idx <- purrr::map_int(validated_files, + ~grep(.x, pr_files, fixed = TRUE) + ) + cli::cli_inform( + "PR contains commits to additional files which have not been checked: + {.val {pr_filenames[-validated_idx]}}.") + } +} diff --git a/R/validate_submission.R b/R/validate_submission.R index f5f437d1..d1b4f07a 100644 --- a/R/validate_submission.R +++ b/R/validate_submission.R @@ -3,6 +3,9 @@ #' of the file. #' #' @inherit validate_model_data return params +#' @param skip_submit_window_check Logical. Whether to skip the submission window check. +#' @param skip_check_config Logical. Whether to skip the hub config validation check. +#' check. #' @export #' #' @examples @@ -10,15 +13,34 @@ #' file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" #' validate_submission(hub_path, file_path) validate_submission <- function(hub_path, file_path, round_id_col = NULL, - validations_cfg_path = NULL) { + validations_cfg_path = NULL, + skip_submit_window_check = FALSE, + skip_check_config = FALSE) { + + check_hub_config <- new_hub_validations() + if (!skip_check_config) { + check_hub_config$valid_config <- check_config_hub_valid(hub_path) + if (not_pass(check_hub_config$valid_config)) { + return(check_hub_config) + } + } + + if (skip_submit_window_check) { + checks_submission_time <- new_hub_validations() + } else { + checks_submission_time <- validate_submission_time(hub_path, file_path) + } + checks_file <- validate_model_file( hub_path = hub_path, file_path = file_path, validations_cfg_path = validations_cfg_path ) - if (any(purrr::map_lgl(checks_file, ~ is_error(.x)))) { - return(checks_file) + if (any(purrr::map_lgl(checks_file, ~ is_any_error(.x)))) { + return( + combine(check_hub_config, checks_file, checks_submission_time) + ) } checks_data <- validate_model_data( @@ -28,8 +50,5 @@ validate_submission <- function(hub_path, file_path, round_id_col = NULL, validations_cfg_path = validations_cfg_path ) - checks <- c(checks_file, checks_data) - class(checks) <- c("hub_validations", "list") - - checks + combine(check_hub_config, checks_file, checks_data, checks_submission_time) } diff --git a/R/validate_submission_time.R b/R/validate_submission_time.R new file mode 100644 index 00000000..ea831b30 --- /dev/null +++ b/R/validate_submission_time.R @@ -0,0 +1,22 @@ +#' Validate a submitted model data file submission time. +#' +#' @inherit validate_model_data return params +#' @export +#' +#' @examples +#' hub_path <- system.file("testhubs/simple", package = "hubValidations") +#' file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" +#' validate_submission_time(hub_path, file_path) +validate_submission_time <- function(hub_path, file_path) { + checks <- list() + class(checks) <- c("hub_validations", "list") + + checks$submission_time <- try_check( + check_submission_time( + file_path = file_path, + hub_path = hub_path + ), + file_path = file_path + ) + checks +} diff --git a/inst/testhubs/flusight/auxiliary-data/locations.csv b/inst/testhubs/flusight/auxiliary-data/locations.csv new file mode 100644 index 00000000..2e489b05 --- /dev/null +++ b/inst/testhubs/flusight/auxiliary-data/locations.csv @@ -0,0 +1,54 @@ +abbreviation,location,location_name,population,,count_rate1,count_rate2,count_rate2p5,count_rate3,count_rate4,count_rate5 +US,US,US,332200066,,3322,6644,8305,9966,13288,16610 +AL,01,Alabama,5063778,,51,101,127,152,203,253 +AK,02,Alaska,711426,,7,14,18,21,28,36 +AZ,04,Arizona,7341018,,73,147,184,220,294,367 +AR,05,Arkansas,3041878,,30,61,76,91,122,152 +CA,06,California,38886551,,389,778,972,1167,1555,1944 +CO,08,Colorado,5803748,,58,116,145,174,232,290 +CT,09,Connecticut,3621089,,36,72,91,109,145,181 +DE,10,Delaware,1014872,,10,20,25,30,41,51 +DC,11,District of Columbia,668576,,7,13,17,20,27,33 +FL,12,Florida,22183852,,222,444,555,666,887,1109 +GA,13,Georgia,10855454,,109,217,271,326,434,543 +HI,15,Hawaii,1398977,,14,28,35,42,56,70 +ID,16,Idaho,1935521,,19,39,48,58,77,97 +IL,17,Illinois,12563096,,126,251,314,377,503,628 +IN,18,Indiana,6831941,,68,137,171,205,273,342 +IA,19,Iowa,3200224,,32,64,80,96,128,160 +KS,20,Kansas,2916451,,29,58,73,87,117,146 +KY,21,Kentucky,4494379,,45,90,112,135,180,225 +LA,22,Louisiana,4575074,,46,92,114,137,183,229 +ME,23,Maine,1384543,,14,28,35,42,55,69 +MD,24,Maryland,6133130,,61,123,153,184,245,307 +MA,25,Massachusetts,6978662,,70,140,174,209,279,349 +MI,26,Michigan,10032075,,100,201,251,301,401,502 +MN,27,Minnesota,5716548,,57,114,143,171,229,286 +MS,28,Mississippi,2927305,,29,59,73,88,117,146 +MO,29,Missouri,6164537,,62,123,154,185,247,308 +MT,30,Montana,1119563,,11,22,28,34,45,56 +NE,31,Nebraska,1961505,,20,39,49,59,78,98 +NV,32,Nevada,3165539,,32,63,79,95,127,158 +NH,33,New Hampshire,1394692,,14,28,35,42,56,70 +NJ,34,New Jersey,9254137,,93,185,231,278,370,463 +NM,35,New Mexico,2100079,,21,42,53,63,84,105 +NY,36,New York,19657190,,197,393,491,590,786,983 +NC,37,North Carolina,10596562,,106,212,265,318,424,530 +ND,38,North Dakota,772061,,8,15,19,23,31,39 +OH,39,Ohio,11749303,,117,235,294,352,470,587 +OK,40,Oklahoma,4001266,,40,80,100,120,160,200 +OR,41,Oregon,4238665,,42,85,106,127,170,212 +PA,42,Pennsylvania,12969276,,130,259,324,389,519,648 +RI,44,Rhode Island,1090390,,11,22,27,33,44,55 +SC,45,South Carolina,5246039,,52,105,131,157,210,262 +SD,46,South Dakota,906458,,9,18,23,27,36,45 +TN,47,Tennessee,7030607,,70,141,176,211,281,352 +TX,48,Texas,29914599,,299,598,748,897,1197,1496 +UT,49,Utah,3376238,,34,68,84,101,135,169 +VT,50,Vermont,646910,,6,13,16,19,26,32 +VA,51,Virginia,8583866,,86,172,215,258,343,429 +WA,53,Washington,7735834,,77,155,193,232,309,387 +WV,54,West Virginia,1774977,,18,35,44,53,71,89 +WI,55,Wisconsin,5891022,,59,118,147,177,236,295 +WY,56,Wyoming,578583,,6,12,14,17,23,29 +PR,72,Puerto Rico,3221789,,32,64,81,97,129,161 diff --git a/inst/testhubs/flusight/forecasts/hub-baseline/2023-04-24-hub-baseline.csv b/inst/testhubs/flusight/forecasts/hub-baseline/2023-04-24-hub-baseline.csv index 87e2d2a6..4d9cddd1 100644 --- a/inst/testhubs/flusight/forecasts/hub-baseline/2023-04-24-hub-baseline.csv +++ b/inst/testhubs/flusight/forecasts/hub-baseline/2023-04-24-hub-baseline.csv @@ -1,29 +1,29 @@ "forecast_date","target_end_date","horizon","target","location","output_type","output_type_id","value" -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","mean",,1033 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","mean",,1033 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","mean",,1033 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.01",0 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.025",0 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.05",0 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.1",281 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.15",600 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.2",717 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.25",817 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.3",877 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.35",913 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.4",965 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.45",1011 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.5",1033 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.55",1055 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.6",1101 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.65",1153 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.7",1189 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.75",1249 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.8",1349 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.85",1466 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.9",1785 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.95",3443 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.975",5183 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.99",7490 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.01",0 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.025",0 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.05",0 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.1",281 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.15",600 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.2",717 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.25",817 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.3",877 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.35",913 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.4",965 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.45",1011 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.5",1033 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.55",1055 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.6",1101 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.65",1153 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.7",1189 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.75",1249 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.8",1349 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.85",1466 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.9",1785 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.95",3443 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.975",5183 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.99",7490 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","quantile","0.01",0 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","quantile","0.025",0 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","quantile","0.05",0 diff --git a/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-01-hub-baseline.csv b/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-01-hub-baseline.csv index 5e709bd8..857fe1f0 100644 --- a/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-01-hub-baseline.csv +++ b/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-01-hub-baseline.csv @@ -1,29 +1,29 @@ "forecast_date","target_end_date","horizon","target","location","output_type","output_type_id","value" -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","mean",,926 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","mean",,926 2023-05-01,2023-05-15,2,"wk ahead inc flu hosp","US","mean",,926 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.01",0 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.025",0 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.05",0 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.1",193 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.15",495 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.2",618 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.25",717 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.3",774 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.35",822 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.4",857 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.45",904 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.5",926 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.55",948 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.6",995 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.65",1030 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.7",1078 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.75",1135 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.8",1234 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.85",1357 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.9",1659 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.95",3310 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.975",4880 -2023-05-01,2023-05-15,1,"wk ahead inc flu hosp","US","quantile","0.99",7399 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.01",0 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.025",0 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.05",0 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.1",193 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.15",495 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.2",618 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.25",717 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.3",774 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.35",822 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.4",857 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.45",904 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.5",926 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.55",948 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.6",995 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.65",1030 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.7",1078 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.75",1135 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.8",1234 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.85",1357 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.9",1659 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.95",3310 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.975",4880 +2023-05-01,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.99",7399 2023-05-01,2023-05-15,2,"wk ahead inc flu hosp","US","quantile","0.01",0 2023-05-01,2023-05-15,2,"wk ahead inc flu hosp","US","quantile","0.025",0 2023-05-01,2023-05-15,2,"wk ahead inc flu hosp","US","quantile","0.05",0 diff --git a/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-08-hub-baseline.parquet b/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-08-hub-baseline.parquet index 05e175ee..03ad7b6f 100644 Binary files a/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-08-hub-baseline.parquet and b/inst/testhubs/flusight/forecasts/hub-baseline/2023-05-08-hub-baseline.parquet differ diff --git a/inst/testhubs/flusight/forecasts/hub-ensemble/2023-04-24-hub-ensemble.csv b/inst/testhubs/flusight/forecasts/hub-ensemble/2023-04-24-hub-ensemble.csv index 516bb536..39e698d6 100644 --- a/inst/testhubs/flusight/forecasts/hub-ensemble/2023-04-24-hub-ensemble.csv +++ b/inst/testhubs/flusight/forecasts/hub-ensemble/2023-04-24-hub-ensemble.csv @@ -1,27 +1,27 @@ "forecast_date","target_end_date","horizon","target","location","output_type","output_type_id","value" -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.01",232 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.025",331 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.05",422 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.1",511 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.15",609 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.2",670 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.25",724 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.3",772 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.35",811 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.4",893 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.45",963 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.5",967 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.55",978 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.6",1023 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.65",1076 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.7",1134 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.75",1199 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.8",1287 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.85",1406 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.9",1575 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.95",1794 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.975",1960 -2023-04-24,2023-05-08,1,"wk ahead inc flu hosp","US","quantile","0.99",2250 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.01",232 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.025",331 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.05",422 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.1",511 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.15",609 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.2",670 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.25",724 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.3",772 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.35",811 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.4",893 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.45",963 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.5",967 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.55",978 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.6",1023 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.65",1076 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.7",1134 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.75",1199 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.8",1287 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.85",1406 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.9",1575 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.95",1794 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.975",1960 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","US","quantile","0.99",2250 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","quantile","0.01",106 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","quantile","0.025",186 2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","US","quantile","0.05",276 diff --git a/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-01-hub-ensemble.arrow b/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-01-hub-ensemble.arrow index bccbbf12..55b61611 100644 Binary files a/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-01-hub-ensemble.arrow and b/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-01-hub-ensemble.arrow differ diff --git a/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-08-hub-ensemble.parquet b/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-08-hub-ensemble.parquet index 4140992c..18333ae6 100644 Binary files a/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-08-hub-ensemble.parquet and b/inst/testhubs/flusight/forecasts/hub-ensemble/2023-05-08-hub-ensemble.parquet differ diff --git a/inst/testhubs/flusight/hub-config/validations.yml b/inst/testhubs/flusight/hub-config/validations.yml index d09f810c..7727f434 100644 --- a/inst/testhubs/flusight/hub-config/validations.yml +++ b/inst/testhubs/flusight/hub-config/validations.yml @@ -1,9 +1,10 @@ default: validate_model_data: - col_timediff: - fn: "cfg_check_tbl_col_timediff" + horizon_timediff: + fn: "opt_check_tbl_horizon_timediff" pkg: "hubValidations" args: t0_colname: "forecast_date" t1_colname: "target_end_date" - timediff: !expr lubridate::weeks(2) + horizon_colname: "horizon" + timediff: !expr lubridate::weeks() diff --git a/man/check_config_hub_valid.Rd b/man/check_config_hub_valid.Rd new file mode 100644 index 00000000..0da090e3 --- /dev/null +++ b/man/check_config_hub_valid.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_config_hub_valid.R +\name{check_config_hub_valid} +\alias{check_config_hub_valid} +\title{Check hub correctly configured} +\usage{ +check_config_hub_valid(hub_path) +} +\arguments{ +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +Returned object also inherits from subclass \verb{}. +} +\description{ +Checks that \code{admin} and \code{tasks} configuration files in directory \code{hub-config} +are valid. +} diff --git a/man/check_for_errors.Rd b/man/check_for_errors.Rd new file mode 100644 index 00000000..a7bebeec --- /dev/null +++ b/man/check_for_errors.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_for_errors.R +\name{check_for_errors} +\alias{check_for_errors} +\title{Raise conditions stored in a \code{hub_validations} object} +\usage{ +check_for_errors(x) +} +\arguments{ +\item{x}{A \code{hub_validations} object} +} +\value{ +An error if one of the elements of \code{x} is of class \code{check_failure}, +\code{check_error}, \code{check_exec_error} or \code{check_exec_warning}. +\code{TRUE} invisibly otherwise. +} +\description{ +This is meant to be used in CI workflows to raise conditions from +\code{hub_validations} objects. +} diff --git a/man/check_submission_time.Rd b/man/check_submission_time.Rd new file mode 100644 index 00000000..05fed7a0 --- /dev/null +++ b/man/check_submission_time.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_submission_time.R +\name{check_submission_time} +\alias{check_submission_time} +\title{Checks submission is within the valid submission window for a given round.} +\usage{ +check_submission_time(hub_path, file_path) +} +\arguments{ +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +Returned object also inherits from subclass \verb{}. +} +\description{ +Checks submission is within the valid submission window for a given round. +} diff --git a/man/check_tbl_match_round_id.Rd b/man/check_tbl_match_round_id.Rd new file mode 100644 index 00000000..29b8bf7c --- /dev/null +++ b/man/check_tbl_match_round_id.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_tbl_match_round_id.R +\name{check_tbl_match_round_id} +\alias{check_tbl_match_round_id} +\title{Check model output data tbl round ID matches submission round ID.} +\usage{ +check_tbl_match_round_id(tbl, file_path, hub_path, round_id_col = NULL) +} +\arguments{ +\item{tbl}{a tibble/data.frame of the contents of the file being validated.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} + +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{round_id_col}{Character string. The name of the column containing +\code{round_id}s. Usually, the value of round property \code{round_id} in hub \code{tasks.json} +config file.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +If \code{round_id_from_variable: false} and no \code{round_id_col} name is provided, +check is skipped and a \verb{} condition class object is +returned. If no valid \code{round_id_col} name is provided or can extracted from +config (check through \code{check_valid_round_id_col}), a \verb{} +condition class object is returned and the rest of the check skipped. +} +\description{ +Check model output data tbl round ID matches submission round ID. +} +\details{ +This check only applies to files being submitted to rounds where +\code{round_id_from_variable: true} or where a \code{round_id_col} name is explicitly +provided. Skipped otherwise. +} diff --git a/man/combine.Rd b/man/combine.Rd new file mode 100644 index 00000000..992a8108 --- /dev/null +++ b/man/combine.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hub_validations_methods.R +\name{combine} +\alias{combine} +\title{Concatenate \code{hub_validations} S3 class objects} +\usage{ +combine(...) +} +\arguments{ +\item{...}{\code{hub_validations} S3 class objects to be concatenated.} +} +\value{ +a \code{hub_validations} S3 class object. +} +\description{ +Concatenate \code{hub_validations} S3 class objects +} diff --git a/man/is_success.Rd b/man/is_success.Rd index 8e852901..cca41ebc 100644 --- a/man/is_success.Rd +++ b/man/is_success.Rd @@ -6,6 +6,9 @@ \alias{is_error} \alias{is_info} \alias{not_pass} +\alias{is_exec_error} +\alias{is_exec_warn} +\alias{is_any_error} \title{Get status of a hub check} \usage{ is_success(x) @@ -17,6 +20,12 @@ is_error(x) is_info(x) not_pass(x) + +is_exec_error(x) + +is_exec_warn(x) + +is_any_error(x) } \arguments{ \item{x}{an object that inherits from class \verb{} to test.} @@ -39,4 +48,10 @@ Get status of a hub check \item \code{not_pass()}: Did check not pass? +\item \code{is_exec_error()}: Is exec error? + +\item \code{is_exec_warn()}: Is exec warning? + +\item \code{is_any_error()}: Is error or exec error? + }} diff --git a/man/new_hub_validations.Rd b/man/new_hub_validations.Rd new file mode 100644 index 00000000..3697fb34 --- /dev/null +++ b/man/new_hub_validations.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/new_hub_validations.R +\name{new_hub_validations} +\alias{new_hub_validations} +\alias{as_hub_validations} +\title{Create new or convert list to \code{hub_validations} S3 class object} +\usage{ +new_hub_validations(...) + +as_hub_validations(x) +} +\arguments{ +\item{...}{named elements to be included. Each element must be an object which +inherits from class \verb{}.} + +\item{x}{a list of named elements. Each element must be an object which +inherits from class \verb{}.} +} +\value{ +an S3 object of class \verb{}. +} +\description{ +Create new or convert list to \code{hub_validations} S3 class object +} +\section{Functions}{ +\itemize{ +\item \code{new_hub_validations()}: Create new \verb{} S3 class object + +\item \code{as_hub_validations()}: Convert list to \verb{} S3 class object + +}} +\examples{ +new_hub_validations() + +hub_path <- system.file("testhubs/simple", package = "hubValidations") +file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" +new_hub_validations( + file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path) +) +x <- list( + file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path) +) +as_hub_validations(x) +} diff --git a/man/cfg_check_tbl_col_timediff.Rd b/man/opt_check_tbl_col_timediff.Rd similarity index 91% rename from man/cfg_check_tbl_col_timediff.Rd rename to man/opt_check_tbl_col_timediff.Rd index 2ff9c218..654755a0 100644 --- a/man/cfg_check_tbl_col_timediff.Rd +++ b/man/opt_check_tbl_col_timediff.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cfg_check_tbl_col_timediff.R -\name{cfg_check_tbl_col_timediff} -\alias{cfg_check_tbl_col_timediff} +% Please edit documentation in R/opt_check_tbl_col_timediff.R +\name{opt_check_tbl_col_timediff} +\alias{opt_check_tbl_col_timediff} \title{Check time difference between values in two date columns equal a defined period.} \usage{ -cfg_check_tbl_col_timediff( +opt_check_tbl_col_timediff( tbl, file_path, hub_path, diff --git a/man/opt_check_tbl_counts_lt_popn.Rd b/man/opt_check_tbl_counts_lt_popn.Rd new file mode 100644 index 00000000..59998fba --- /dev/null +++ b/man/opt_check_tbl_counts_lt_popn.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/opt_check_tbl_counts_lt_popn.R +\name{opt_check_tbl_counts_lt_popn} +\alias{opt_check_tbl_counts_lt_popn} +\title{Check that predicted values per location are less than total location population.} +\usage{ +opt_check_tbl_counts_lt_popn( + tbl, + file_path, + hub_path, + targets = NULL, + popn_file_path = "auxiliary-data/locations.csv", + popn_col = "population", + location_col = "location" +) +} +\arguments{ +\item{tbl}{a tibble/data.frame of the contents of the file being validated.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} + +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{targets}{Either a single target key list or a list of multiple target key lists.} + +\item{popn_file_path}{Character string. +Path to population data relative to the hub root. +Defaults to \code{auxiliary-data/locations.csv}.} + +\item{popn_col}{Character string. +The name of the population size column in the population data set.} + +\item{location_col}{Character string. +The name of the location column. +Used to join population data to submission file data. +Must be shared by both files.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +Returned object also inherits from subclass \verb{}. +} +\description{ +Check that predicted values per location are less than total location population. +} +\details{ +Should only be applied to rows containing count predictions. Use argument +\code{targets} to filter \code{tbl} data to appropriate count target rows. +} +\examples{ +hub_path <- system.file("testhubs/flusight", package = "hubValidations") +file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" +tbl <- hubValidations::read_model_out_file(file_path, hub_path) +# Single target key list +targets <- list("target" = "wk ahead inc flu hosp") +opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) +} diff --git a/man/opt_check_tbl_horizon_timediff.Rd b/man/opt_check_tbl_horizon_timediff.Rd new file mode 100644 index 00000000..6a26fed0 --- /dev/null +++ b/man/opt_check_tbl_horizon_timediff.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/opt_check_tbl_horizon_timediff.R +\name{opt_check_tbl_horizon_timediff} +\alias{opt_check_tbl_horizon_timediff} +\title{Check time difference between values in two date columns equal a defined period.} +\usage{ +opt_check_tbl_horizon_timediff( + tbl, + file_path, + hub_path, + t0_colname, + t1_colname, + horizon_colname = "horizon", + timediff = lubridate::weeks() +) +} +\arguments{ +\item{tbl}{a tibble/data.frame of the contents of the file being validated.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} + +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{t0_colname}{Character string. The name of the time zero date column.} + +\item{t1_colname}{Character string. The name of the time zero + 1 time step date column.} + +\item{horizon_colname}{Character string. The name of the horizon column. +Defaults to \code{"horizon"}.} + +\item{timediff}{an object of class \code{lubridate} \code{\linkS4class{Period}} and length 1. +The period of a single horizon. Default to 1 week.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +Returned object also inherits from subclass \verb{}. +} +\description{ +Check time difference between values in two date columns equal a defined period. +} diff --git a/man/print.pr_hub_validations.Rd b/man/print.pr_hub_validations.Rd new file mode 100644 index 00000000..720f0954 --- /dev/null +++ b/man/print.pr_hub_validations.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hub_validations_methods.R +\name{print.pr_hub_validations} +\alias{print.pr_hub_validations} +\title{Print results of \code{validate_pr()} function as a bullet list} +\usage{ +\method{print}{pr_hub_validations}(x, ...) +} +\arguments{ +\item{x}{An object of class \code{pr_hub_validations}} + +\item{...}{Unused argument present for class consistency} +} +\description{ +Print results of \code{validate_pr()} function as a bullet list +} diff --git a/man/try_check.Rd b/man/try_check.Rd new file mode 100644 index 00000000..0580a77e --- /dev/null +++ b/man/try_check.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/try_check.R +\name{try_check} +\alias{try_check} +\title{Wrap check expression to capture check execution errors} +\usage{ +try_check(expr, file_path) +} +\arguments{ +\item{expr}{check function expression to run.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} +} +\value{ +If \code{expr} executes correctly, the output of \code{expr} is returned. If +The execution error message is attached as attribute \code{msg}. +} +\description{ +Wrap check expression to capture check execution errors +} diff --git a/man/validate_pr.Rd b/man/validate_pr.Rd new file mode 100644 index 00000000..88102280 --- /dev/null +++ b/man/validate_pr.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate_pr.R +\name{validate_pr} +\alias{validate_pr} +\title{Validate Pull Request} +\usage{ +validate_pr( + hub_path = ".", + gh_repo, + pr_number, + round_id_col = NULL, + validations_cfg_path = NULL, + skip_submit_window_check = FALSE +) +} +\arguments{ +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{gh_repo}{GitHub repository address in the format \code{username/repo}} + +\item{pr_number}{Number of the pull request to validate} + +\item{round_id_col}{Character string. The name of the column containing +\code{round_id}s. Only required if files contain a column that contains \code{round_id} +details but has not been configured via \code{round_id_from_variable: true} and +\verb{round_id:} in in hub \code{tasks.json} config file.} + +\item{validations_cfg_path}{Path to \code{validations.yml} file. If \code{NULL} +defaults to \code{hub-config/validations.yml}.} + +\item{skip_submit_window_check}{Logical. Whether to skip the submission window check.} +} +\value{ +An object of class \code{hub_validations}. +} +\description{ +Validate Pull Request +} +\examples{ +\dontrun{ +validate_pr( + hub_path = "." + gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + pr_number = 3 +) +} +} diff --git a/man/validate_submission.Rd b/man/validate_submission.Rd index a9c085ff..61ea8768 100644 --- a/man/validate_submission.Rd +++ b/man/validate_submission.Rd @@ -10,7 +10,9 @@ validate_submission( hub_path, file_path, round_id_col = NULL, - validations_cfg_path = NULL + validations_cfg_path = NULL, + skip_submit_window_check = FALSE, + skip_check_config = FALSE ) } \arguments{ @@ -33,6 +35,11 @@ config file.} \item{validations_cfg_path}{Path to \code{validations.yml} file. If \code{NULL} defaults to \code{hub-config/validations.yml}.} + +\item{skip_submit_window_check}{Logical. Whether to skip the submission window check.} + +\item{skip_check_config}{Logical. Whether to skip the hub config validation check. +check.} } \value{ An object of class \code{hub_validations}. Each named element contains diff --git a/man/validate_submission_time.Rd b/man/validate_submission_time.Rd new file mode 100644 index 00000000..3c957803 --- /dev/null +++ b/man/validate_submission_time.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate_submission_time.R +\name{validate_submission_time} +\alias{validate_submission_time} +\title{Validate a submitted model data file submission time.} +\usage{ +validate_submission_time(hub_path, file_path) +} +\arguments{ +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} +} +\value{ +An object of class \code{hub_validations}. Each named element contains +a \code{hub_check} class object reflecting the result of a given check. Function +will return early if a check returns an error. +} +\description{ +Validate a submitted model data file submission time. +} +\examples{ +hub_path <- system.file("testhubs/simple", package = "hubValidations") +file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" +validate_submission_time(hub_path, file_path) +} diff --git a/tests/testthat/_snaps/check_config_hub_valid.md b/tests/testthat/_snaps/check_config_hub_valid.md new file mode 100644 index 00000000..e735c9e0 --- /dev/null +++ b/tests/testthat/_snaps/check_config_hub_valid.md @@ -0,0 +1,27 @@ +# check_config_hub_valid works + + Code + check_config_hub_valid(hub_path = system.file("testhubs/simple", package = "hubValidations")) + Output + + Message: + All hub config files are valid. + +--- + + Code + check_config_hub_valid(hub_path = system.file("testhubs/flusight", package = "hubValidations")) + Output + + Message: + All hub config files are valid. + +--- + + Code + check_config_hub_valid(hub_path = system.file("testhubs/flusight", package = "hubValidations")) + Output + + Error: + ! All hub config files must be valid. Config file "tasks" invalid. + diff --git a/tests/testthat/_snaps/check_tbl_match_round_id.md b/tests/testthat/_snaps/check_tbl_match_round_id.md new file mode 100644 index 00000000..5de6a994 --- /dev/null +++ b/tests/testthat/_snaps/check_tbl_match_round_id.md @@ -0,0 +1,64 @@ +# check_tbl_match_round_id works + + Code + check_tbl_match_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path) + Output + + Message: + All `round_id_col` "origin_date" values match submission `round_id` from file name. + +--- + + Code + str(check_tbl_match_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path)) + Output + List of 4 + $ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + $ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + $ call : chr "check_tbl_match_round_id" + $ use_cli_format: logi TRUE + - attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + +--- + + Code + check_tbl_match_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path, + round_id_col = "origin_date") + Output + + Message: + All `round_id_col` "origin_date" values match submission `round_id` from file name. + +# check_tbl_match_round_id fails correctly + + Code + check_tbl_match_round_id(tbl = read_model_out_file(file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + hub_path), file_path = file_path, hub_path = hub_path) + Output + + Error: + ! All `round_id_col` "origin_date" values must match submission `round_id` from file name. `round_id` value 2022-10-08 does not match submission `round_id` "2022-10-01" + +--- + + Code + check_tbl_match_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path, + round_id_col = "random_column") + Output + + Warning: + `round_id_col` name must be valid. Must be one of "origin_date", "target", "horizon", and "location" not "random_column". + +--- + + Code + str(check_tbl_match_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path, + round_id_col = "random_column")) + Output + List of 4 + $ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"origin_date\", \""| __truncated__ + $ where : chr "hub-baseline/2022-10-01-hub-baseline.csv" + $ call : chr "check_tbl_match_round_id" + $ use_cli_format: logi TRUE + - attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_warning" "warning" ... + diff --git a/tests/testthat/_snaps/check_tbl_unique_round_id.md b/tests/testthat/_snaps/check_tbl_unique_round_id.md index 012843f0..b351d3c2 100644 --- a/tests/testthat/_snaps/check_tbl_unique_round_id.md +++ b/tests/testthat/_snaps/check_tbl_unique_round_id.md @@ -1,9 +1,7 @@ # check_tbl_unique_round_id works Code - check_tbl_unique_round_id(tbl = arrow::read_csv_arrow(system.file( - "files/2022-10-15-team1-goodmodel.csv", package = "hubValidations")), - file_path = file_path, hub_path = hub_path) + check_tbl_unique_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path) Output Message: @@ -12,9 +10,8 @@ --- Code - check_tbl_unique_round_id(tbl = arrow::read_csv_arrow(system.file( - "files/2022-10-15-team1-goodmodel.csv", package = "hubValidations")), - file_path = file_path, hub_path = hub_path, round_id_col = "origin_date") + check_tbl_unique_round_id(tbl = tbl, file_path = file_path, hub_path = hub_path, + round_id_col = "origin_date") Output Message: @@ -23,9 +20,8 @@ --- Code - str(check_tbl_unique_round_id(tbl = arrow::read_csv_arrow(system.file( - "files/2022-10-15-team1-goodmodel.csv", package = "hubValidations")), - round_id_col = "origin_date", file_path = file_path, hub_path = hub_path)) + str(check_tbl_unique_round_id(tbl = tbl, round_id_col = "origin_date", + file_path = file_path, hub_path = hub_path)) Output List of 4 $ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n " diff --git a/tests/testthat/_snaps/combine.md b/tests/testthat/_snaps/combine.md new file mode 100644 index 00000000..e8263235 --- /dev/null +++ b/tests/testthat/_snaps/combine.md @@ -0,0 +1,67 @@ +# combine works + + Code + str(combine(new_hub_validations(), new_hub_validations(), NULL)) + Output + list() + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +--- + + Code + str(combine(new_hub_validations(), new_hub_validations(file_exists = check_file_exists( + file_path, hub_path), file_name = check_file_name(file_path), NULL))) + Output + List of 2 + $ file_exists:List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +--- + + Code + str(combine(new_hub_validations(file_exists = check_file_exists(file_path, + hub_path)), new_hub_validations(file_name = check_file_name(file_path), NULL))) + Output + List of 2 + $ file_exists:List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +# combine errors correctly + + Code + combine(new_hub_validations(), new_hub_validations(), a = 1) + Error + ! All elements must inherit from class . + x Element with index 1 does not. + +--- + + Code + combine(new_hub_validations(file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path), a = 10)) + Error + ! All elements must inherit from class . + x Element with index 3 does not. + diff --git a/tests/testthat/_snaps/execute_custom_checks.md b/tests/testthat/_snaps/execute_custom_checks.md index 1eae02b7..d360a75c 100644 --- a/tests/testthat/_snaps/execute_custom_checks.md +++ b/tests/testthat/_snaps/execute_custom_checks.md @@ -5,7 +5,7 @@ "testdata", "config", "validations.yml"))) Output List of 1 - $ col_timediff:List of 4 + $ horizon_timediff:List of 4 ..$ message : chr "Time differences between t0 var `forecast_date` and t1 var\n `target_end_date` all match expected period"| __truncated__ ..$ where : chr "hub-ensemble/2023-05-08-hub-ensemble.parquet" ..$ call : NULL @@ -20,7 +20,7 @@ "testdata", "config", "validations-error.yml"))) Output List of 1 - $ col_timediff:List of 4 + $ horizon_timediff:List of 4 ..$ message : chr "Time differences between t0 var `forecast_date` and t1 var\n `target_end_date` do not all match expected"| __truncated__ ..$ where : chr "hub-ensemble/2023-05-08-hub-ensemble.parquet" ..$ call : NULL diff --git a/tests/testthat/_snaps/new_hub_validations.md b/tests/testthat/_snaps/new_hub_validations.md new file mode 100644 index 00000000..e456fb83 --- /dev/null +++ b/tests/testthat/_snaps/new_hub_validations.md @@ -0,0 +1,29 @@ +# new_hub_validations works + + Code + str(new_hub_validations()) + Output + Named list() + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +--- + + Code + str(new_hub_validations(file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path))) + Output + List of 2 + $ file_exists:List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + diff --git a/tests/testthat/_snaps/cfg_check_tbl_col_timediff.md b/tests/testthat/_snaps/opt_check_tbl_col_timediff.md similarity index 78% rename from tests/testthat/_snaps/cfg_check_tbl_col_timediff.md rename to tests/testthat/_snaps/opt_check_tbl_col_timediff.md index 4a19726a..73073b49 100644 --- a/tests/testthat/_snaps/cfg_check_tbl_col_timediff.md +++ b/tests/testthat/_snaps/opt_check_tbl_col_timediff.md @@ -1,7 +1,7 @@ -# cfg_check_tbl_col_timediff works +# opt_check_tbl_col_timediff works Code - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2)) Output @@ -11,7 +11,7 @@ --- Code - cfg_check_tbl_col_timediff(tbl_chr, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl_chr, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2)) Output @@ -21,17 +21,17 @@ --- Code - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2)) Output Warning: Time differences between t0 var `forecast_date` and t1 var `target_end_date` do not all match expected period of 14d 0H 0M 0S. t1 var value 2023-05-15 invalid. -# cfg_check_tbl_col_timediff fails correctly +# opt_check_tbl_col_timediff fails correctly Code - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_dates", timediff = lubridate::weeks(2)) Error Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'. @@ -39,7 +39,7 @@ --- Code - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = c("target_end_date", "forecast_date"), timediff = lubridate::weeks( 2)) Error @@ -48,7 +48,7 @@ --- Code - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = 14L) Error Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'. @@ -56,7 +56,7 @@ --- Code - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2)) Error Column `colname` must be configured as not . diff --git a/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md b/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md new file mode 100644 index 00000000..aa9dca94 --- /dev/null +++ b/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md @@ -0,0 +1,46 @@ +# opt_check_tbl_counts_lt_popn works + + Code + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) + Output + + Message: + Target counts are less than location population size. + +--- + + Code + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) + Output + + Warning: + Target counts must be less than location population size. Affected rows: 1 and 2. + +# opt_check_tbl_counts_lt_popn fails correctly + + Code + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) + Error + Target does not match any round target keys. + +--- + + Code + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, popn_file_path = "random/path.csv") + Error + File not found at 'random/path.csv' + +--- + + Code + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, location_col = "random_col") + Error + Assertion on 'location_col' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'random_col'. + +--- + + Code + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, popn_col = "random_col") + Error + Assertion on 'popn_col' failed: Must be element of set {'abbreviation','location','location_name','population','','count_rate1','count_rate2','count_rate2p5','count_rate3','count_rate4','count_rate5'}, but is 'random_col'. + diff --git a/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md b/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md new file mode 100644 index 00000000..129e0bf3 --- /dev/null +++ b/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md @@ -0,0 +1,72 @@ +# opt_check_tbl_horizon_timediff works + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_date") + Output + + Message: + Time differences between t0 var `forecast_date` and t1 var `target_end_date` all match expected period of 7d 0H 0M 0S * `horizon`. + +--- + + Code + opt_check_tbl_horizon_timediff(tbl_chr, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_date") + Output + + Message: + Time differences between t0 var `forecast_date` and t1 var `target_end_date` all match expected period of 7d 0H 0M 0S * `horizon`. + +--- + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_date") + Output + + Warning: + Time differences between t0 var `forecast_date` and t1 var `target_end_date` do not all match expected period of 7d 0H 0M 0S * `horizon`. t1 var value "2023-05-22 (horizon = 1)" are invalid. + +--- + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_date", timediff = lubridate::weeks(2)) + Output + + Warning: + Time differences between t0 var `forecast_date` and t1 var `target_end_date` do not all match expected period of 14d 0H 0M 0S * `horizon`. t1 var values "2023-05-15 (horizon = 1)" and "2023-05-22 (horizon = 2)" are invalid. + +# opt_check_tbl_horizon_timediff fails correctly + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_dates") + Error + Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'. + +--- + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = c("target_end_date", "forecast_date")) + Error + Assertion on 't1_colname' failed: Must have length 1, but has length 2. + +--- + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_date", timediff = 7L) + Error + Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'. + +--- + + Code + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", + t1_colname = "target_end_date") + Error + Column `colname` must be configured as not . + diff --git a/tests/testthat/_snaps/validate_model_data.md b/tests/testthat/_snaps/validate_model_data.md index 8c35bc8d..e0678872 100644 --- a/tests/testthat/_snaps/validate_model_data.md +++ b/tests/testthat/_snaps/validate_model_data.md @@ -3,7 +3,7 @@ Code str(validate_model_data(hub_path, file_path)) Output - List of 11 + List of 12 $ file_read :List of 4 ..$ message : chr "File could be read successfully. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -22,6 +22,12 @@ ..$ call : chr "check_tbl_unique_round_id" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ colnames :List of 4 ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -126,7 +132,7 @@ Code str(validate_model_data(hub_path, file_path)) Output - List of 11 + List of 12 $ file_read :List of 4 ..$ message : chr "File could be read successfully. \n " ..$ where : chr "hub-ensemble/2023-05-08-hub-ensemble.parquet" @@ -145,6 +151,12 @@ ..$ call : chr "check_tbl_unique_round_id" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"forecast_date\" values match submission `round_id` from file name. \n " + ..$ where : chr "hub-ensemble/2023-05-08-hub-ensemble.parquet" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ colnames :List of 4 ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " ..$ where : chr "hub-ensemble/2023-05-08-hub-ensemble.parquet" @@ -206,22 +218,22 @@ # validate_model_data with config function works Code - validate_model_data(hub_path, file_path)[["col_timediff"]] + validate_model_data(hub_path, file_path)[["horizon_timediff"]] Output Message: - Time differences between t0 var `forecast_date` and t1 var `target_end_date` all match expected period of 14d 0H 0M 0S. + Time differences between t0 var `forecast_date` and t1 var `target_end_date` all match expected period of 7d 0H 0M 0S * `horizon`. --- Code validate_model_data(hub_path, file_path, validations_cfg_path = system.file( "testhubs/flusight/hub-config/validations.yml", package = "hubValidations"))[[ - "col_timediff"]] + "horizon_timediff"]] Output Message: - Time differences between t0 var `forecast_date` and t1 var `target_end_date` all match expected period of 14d 0H 0M 0S. + Time differences between t0 var `forecast_date` and t1 var `target_end_date` all match expected period of 7d 0H 0M 0S * `horizon`. # validate_model_data print method work [plain] @@ -231,6 +243,7 @@ v 2022-10-08-team1-goodmodel.csv: File could be read successfully. v 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. v 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. + v 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name. v 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names. v 2022-10-08-team1-goodmodel.csv: Column data types match hub schema. v 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations @@ -245,7 +258,7 @@ Code validate_model_data(hub_path, file_path) Output - ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::v 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0Av 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0Av 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0Av 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0Av 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0Av 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0Av 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0Av 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0Av 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0Av 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Ai 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. + ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::v 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0Av 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0Av 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0Av 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name.%0Av 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0Av 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0Av 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0Av 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0Av 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0Av 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0Av 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Ai 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. # validate_model_data print method work [ansi] @@ -255,6 +268,7 @@ v 2022-10-08-team1-goodmodel.csv: File could be read successfully. v 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. v 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. + v 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name. v 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names. v 2022-10-08-team1-goodmodel.csv: Column data types match hub schema. v 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations @@ -269,7 +283,7 @@ Code validate_model_data(hub_path, file_path) Output - ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::v 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0Av 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0Av 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0Av 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0Av 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0Av 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0Av 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0Av 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0Av 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0Av 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Ai 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. + ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::v 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0Av 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0Av 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0Av 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name.%0Av 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0Av 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0Av 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0Av 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0Av 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0Av 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0Av 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Ai 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. # validate_model_data print method work [unicode] @@ -279,6 +293,7 @@ ✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully. ✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. ✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. + ✔ 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name. ✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names. ✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema. ✔ 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations @@ -293,7 +308,7 @@ Code validate_model_data(hub_path, file_path) Output - ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0A✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0A✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0A✔ 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0A✔ 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0A✔ 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Aℹ 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. + ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0A✔ 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name.%0A✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0A✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0A✔ 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0A✔ 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0A✔ 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Aℹ 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. # validate_model_data print method work [fancy] @@ -303,6 +318,7 @@ ✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully. ✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. ✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. + ✔ 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name. ✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names. ✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema. ✔ 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations @@ -317,7 +333,7 @@ Code validate_model_data(hub_path, file_path) Output - ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0A✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0A✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0A✔ 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0A✔ 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0A✔ 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Aℹ 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. + ::notice file=test-validate_model_data.R,line=57,endLine=61,col=3,endCol=3::✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.%0A✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.%0A✔ 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name.%0A✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.%0A✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.%0A✔ 2022-10-08-team1-goodmodel.csv: Data rows contain valid value combinations%0A✔ 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.%0A✔ 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.%0A✔ 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.%0Aℹ 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped. # validate_model_data errors correctly diff --git a/tests/testthat/_snaps/validate_pr.md b/tests/testthat/_snaps/validate_pr.md new file mode 100644 index 00000000..60f043ce --- /dev/null +++ b/tests/testthat/_snaps/validate_pr.md @@ -0,0 +1,212 @@ +# validate_pr works on valid PR + + Code + str(checks) + Output + List of 19 + $ valid_config :List of 4 + ..$ message : chr "All hub config files are valid. \n " + ..$ where : chr "valid_sb_hub" + ..$ call : chr "check_config_hub_valid" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-22-team1-goodmodel.csv'. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-22-team1-goodmodel.csv\" is valid. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name is valid. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ colnames :List of 4 + ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_colnames" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ col_types :List of 4 + ..$ message : chr "Column data types match hub schema. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_col_types" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_vals :List of 4 + ..$ message : chr "Data rows contain valid value combinations \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_values" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ rows_unique :List of 4 + ..$ message : chr "All combinations of task ID column/`output_type`/`output_type_id` values are unique. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_rows_unique" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ req_vals :List of 5 + ..$ message : chr "Required task ID/output type/output type ID combinations all present. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ missing : tibble [0 x 7] (S3: tbl_df/tbl/data.frame) + .. ..$ origin_date : chr(0) + .. ..$ target : chr(0) + .. ..$ horizon : chr(0) + .. ..$ location : chr(0) + .. ..$ age_group : chr(0) + .. ..$ output_type : chr(0) + .. ..$ output_type_id: chr(0) + ..$ call : chr "check_tbl_values_required" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_valid :List of 4 + ..$ message : chr "Values in column `value` all valid with respect to modeling task config. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_value_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_non_desc:List of 5 + ..$ message : chr "Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID\n value/outpu"| __truncated__ + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_value_col_ascending" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_sum1 :List of 4 + ..$ message : chr "No pmf output types to check for sum of 1. Check skipped." + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_tbl_value_col_sum1" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +# validate_pr works on invalid PR + + Code + str(checks) + Output + Classes 'hub_validations', 'list' hidden list of 12 + $ valid_config :List of 4 + ..$ message : chr "All hub config files are valid. \n " + ..$ where : chr "invalid_sb_hub" + ..$ call : chr "check_config_hub_valid" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/hub-baseline/2022-10-22-hub-baseline.parquet'. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-22-hub-baseline.parquet\" is valid. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name is valid. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ colnames :List of 6 + ..$ message : chr "Column names must be consistent with expected round task IDs and std column names. \n Expected column \"age_gro"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_tbl_colnames" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md index 9b275130..a55a5af9 100644 --- a/tests/testthat/_snaps/validate_submission.md +++ b/tests/testthat/_snaps/validate_submission.md @@ -1,9 +1,10 @@ # validate_submission works Code - str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv")) + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE, skip_check_config = TRUE)) Output - List of 17 + List of 18 $ file_exists :List of 4 ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -58,6 +59,12 @@ ..$ call : chr "check_tbl_unique_round_id" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ colnames :List of 4 ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -119,7 +126,8 @@ --- Code - str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv")) + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv", + skip_submit_window_check = TRUE, skip_check_config = TRUE)) Output Classes 'hub_validations', 'list' hidden list of 1 $ file_exists:List of 6 @@ -134,7 +142,8 @@ --- Code - str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv")) + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv", + skip_submit_window_check = TRUE, skip_check_config = TRUE)) Output Classes 'hub_validations', 'list' hidden list of 10 $ file_exists :List of 4 @@ -191,12 +200,12 @@ ..$ call : chr "check_tbl_unique_round_id" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ colnames :List of 6 - ..$ message : chr "Column names must be consistent with expected round task IDs and std column names. \n Expected column \"age_gro"| __truncated__ + $ match_round_id :List of 6 + ..$ message : chr "All `round_id_col` \"origin_date\" values must match submission `round_id` from file name. \n `round_id` \n "| __truncated__ ..$ trace : NULL ..$ parent : NULL ..$ where : chr "team1-goodmodel/2022-10-15-hub-baseline.csv" - ..$ call : chr "check_tbl_colnames" + ..$ call : chr "check_tbl_match_round_id" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... @@ -204,7 +213,8 @@ Code str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", - round_id_col = "random_col")) + round_id_col = "random_col", skip_submit_window_check = TRUE, + skip_check_config = TRUE)) Output List of 9 $ file_exists :List of 4 @@ -263,3 +273,160 @@ ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_warning" "warning" ... - attr(*, "class")= chr [1:2] "hub_validations" "list" +--- + + Code + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE)) + Output + List of 19 + $ valid_config :List of 4 + ..$ message : chr "All hub config files are valid. \n " + ..$ where : chr "simple" + ..$ call : chr "check_config_hub_valid" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ colnames :List of 4 + ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_colnames" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ col_types :List of 4 + ..$ message : chr "Column data types match hub schema. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_col_types" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_vals :List of 4 + ..$ message : chr "Data rows contain valid value combinations \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_values" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ rows_unique :List of 4 + ..$ message : chr "All combinations of task ID column/`output_type`/`output_type_id` values are unique. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_rows_unique" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ req_vals :List of 5 + ..$ message : chr "Required task ID/output type/output type ID combinations all present. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ missing : tibble [0 x 6] (S3: tbl_df/tbl/data.frame) + .. ..$ origin_date : chr(0) + .. ..$ target : chr(0) + .. ..$ horizon : chr(0) + .. ..$ location : chr(0) + .. ..$ output_type : chr(0) + .. ..$ output_type_id: chr(0) + ..$ call : chr "check_tbl_values_required" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_valid :List of 4 + ..$ message : chr "Values in column `value` all valid with respect to modeling task config. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_value_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_non_desc:List of 5 + ..$ message : chr "Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID\n value/outpu"| __truncated__ + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_value_col_ascending" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_sum1 :List of 4 + ..$ message : chr "No pmf output types to check for sum of 1. Check skipped." + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_value_col_sum1" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +# validate_submission submission within window works + + Code + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv")[[ + "submission_time"]]) + Output + List of 4 + $ message : chr "Submission time is within accepted submission window for round. \n " + $ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + $ call : chr "check_submission_time" + $ use_cli_format: logi TRUE + - attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + +# validate_submission submission outside window fails correctly + + Code + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv")[[ + "submission_time"]]) + Output + List of 4 + $ message : chr "Submission time must be within accepted submission window for round. \n Current time 2023-10-08 18:01:00 is out"| __truncated__ + $ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + $ call : chr "check_submission_time" + $ use_cli_format: logi TRUE + - attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_warning" "warning" ... + diff --git a/tests/testthat/test-check_config_hub_valid.R b/tests/testthat/test-check_config_hub_valid.R new file mode 100644 index 00000000..513606be --- /dev/null +++ b/tests/testthat/test-check_config_hub_valid.R @@ -0,0 +1,24 @@ +test_that("check_config_hub_valid works", { + expect_snapshot( + check_config_hub_valid( + hub_path = system.file("testhubs/simple", package = "hubValidations")) + ) + + expect_snapshot( + check_config_hub_valid( + hub_path = system.file("testhubs/flusight", package = "hubValidations")) + ) + + mockery::stub( + check_config_hub_valid, + "hubUtils::validate_hub_config", + list(admin = TRUE, + tasks = FALSE), + 2 + ) + expect_snapshot( + check_config_hub_valid( + hub_path = system.file("testhubs/flusight", package = "hubValidations")) + ) + +}) diff --git a/tests/testthat/test-check_tbl_match_round_id.R b/tests/testthat/test-check_tbl_match_round_id.R new file mode 100644 index 00000000..2b6dc5c9 --- /dev/null +++ b/tests/testthat/test-check_tbl_match_round_id.R @@ -0,0 +1,68 @@ +test_that("check_tbl_match_round_id works", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + tbl <- read_model_out_file(file_path, hub_path) + + expect_snapshot( + check_tbl_match_round_id( + tbl = tbl, + file_path = file_path, + hub_path = hub_path + ) + ) + expect_snapshot( + str( + check_tbl_match_round_id( + tbl = tbl, + file_path = file_path, + hub_path = hub_path + ) + ) + ) + + # Supply round_id_col + expect_snapshot( + check_tbl_match_round_id( + tbl = tbl, + file_path = file_path, hub_path = hub_path, + round_id_col = "origin_date" + ) + ) +}) + +test_that("check_tbl_match_round_id fails correctly", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "hub-baseline/2022-10-01-hub-baseline.csv" + tbl <- read_model_out_file(file_path, hub_path) + + # Fails with error when round_id detected in tbl does not match + # submission round_id + expect_snapshot( + check_tbl_match_round_id( + tbl = read_model_out_file( + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + hub_path), + file_path = file_path, + hub_path = hub_path + ) + ) + # Fails with warning when round_id_col invalid + expect_snapshot( + check_tbl_match_round_id( + tbl = tbl, + file_path = file_path, + hub_path = hub_path, + round_id_col = "random_column" + ) + ) + expect_snapshot( + str( + check_tbl_match_round_id( + tbl = tbl, + file_path = file_path, + hub_path = hub_path, + round_id_col = "random_column" + ) + ) + ) +}) diff --git a/tests/testthat/test-check_tbl_unique_round_id.R b/tests/testthat/test-check_tbl_unique_round_id.R index e9d303fc..81c276b2 100644 --- a/tests/testthat/test-check_tbl_unique_round_id.R +++ b/tests/testthat/test-check_tbl_unique_round_id.R @@ -1,24 +1,18 @@ test_that("check_tbl_unique_round_id works", { hub_path <- system.file("testhubs/simple", package = "hubValidations") file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + tbl <- read_model_out_file(file_path, hub_path) + expect_snapshot( check_tbl_unique_round_id( - tbl = arrow::read_csv_arrow( - system.file("files/2022-10-15-team1-goodmodel.csv", - package = "hubValidations" - ) - ), + tbl = tbl, file_path = file_path, hub_path = hub_path ) ) expect_snapshot( check_tbl_unique_round_id( - tbl = arrow::read_csv_arrow( - system.file("files/2022-10-15-team1-goodmodel.csv", - package = "hubValidations" - ) - ), + tbl = tbl, file_path = file_path, hub_path = hub_path, round_id_col = "origin_date" ) @@ -26,11 +20,7 @@ test_that("check_tbl_unique_round_id works", { expect_snapshot( str( check_tbl_unique_round_id( - tbl = arrow::read_csv_arrow( - system.file("files/2022-10-15-team1-goodmodel.csv", - package = "hubValidations" - ) - ), + tbl = tbl, round_id_col = "origin_date", file_path = file_path, hub_path = hub_path diff --git a/tests/testthat/test-combine.R b/tests/testthat/test-combine.R new file mode 100644 index 00000000..ae032be6 --- /dev/null +++ b/tests/testthat/test-combine.R @@ -0,0 +1,49 @@ +test_that("combine works", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + expect_snapshot( + str( + combine(new_hub_validations(), new_hub_validations(), NULL) + ) + ) + + expect_snapshot( + str(combine( + new_hub_validations(), + new_hub_validations( + file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path), + NULL + ) + )) + ) + + expect_snapshot( + str(combine( + new_hub_validations(file_exists = check_file_exists(file_path, hub_path)), + new_hub_validations( + file_name = check_file_name(file_path), + NULL + ) + )) + ) +}) + +test_that("combine errors correctly", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + expect_snapshot( + combine(new_hub_validations(), new_hub_validations(), a = 1), + error = TRUE + ) + + expect_snapshot( + combine( + new_hub_validations( + file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path), + a = 10 + ) + ), + error = TRUE) + }) diff --git a/tests/testthat/test-new_hub_validations.R b/tests/testthat/test-new_hub_validations.R new file mode 100644 index 00000000..c1a06b2d --- /dev/null +++ b/tests/testthat/test-new_hub_validations.R @@ -0,0 +1,28 @@ +test_that("new_hub_validations works", { + expect_snapshot(str(new_hub_validations())) + expect_s3_class( + new_hub_validations(), + c("hub_validations", "list") + ) + + + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + + expect_snapshot( + str( + new_hub_validations( + file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path) + ) + ) + ) + + expect_s3_class( + new_hub_validations( + file_exists = check_file_exists(file_path, hub_path), + file_name = check_file_name(file_path) + ), + c("hub_validations", "list") + ) +}) diff --git a/tests/testthat/test-cfg_check_tbl_col_timediff.R b/tests/testthat/test-opt_check_tbl_col_timediff.R similarity index 77% rename from tests/testthat/test-cfg_check_tbl_col_timediff.R rename to tests/testthat/test-opt_check_tbl_col_timediff.R index e8860132..964d3aab 100644 --- a/tests/testthat/test-cfg_check_tbl_col_timediff.R +++ b/tests/testthat/test-opt_check_tbl_col_timediff.R @@ -1,10 +1,12 @@ -test_that("cfg_check_tbl_col_timediff works", { +test_that("opt_check_tbl_col_timediff works", { hub_path <- system.file("testhubs/flusight", package = "hubValidations") file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" tbl <- hubValidations::read_model_out_file(file_path, hub_path) + tbl$target_end_date <- tbl$forecast_date + lubridate::weeks(2) + expect_snapshot( - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2) @@ -13,7 +15,7 @@ test_that("cfg_check_tbl_col_timediff works", { tbl_chr <- hubUtils::coerce_to_character(tbl) expect_snapshot( - cfg_check_tbl_col_timediff(tbl_chr, file_path, hub_path, + opt_check_tbl_col_timediff(tbl_chr, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2) @@ -22,7 +24,7 @@ test_that("cfg_check_tbl_col_timediff works", { tbl$target_end_date[1] <- tbl$forecast_date[1] + lubridate::weeks(1) expect_snapshot( - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2) @@ -31,13 +33,14 @@ test_that("cfg_check_tbl_col_timediff works", { }) -test_that("cfg_check_tbl_col_timediff fails correctly", { +test_that("opt_check_tbl_col_timediff fails correctly", { hub_path <- system.file("testhubs/flusight", package = "hubValidations") file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" tbl <- hubValidations::read_model_out_file(file_path, hub_path) + tbl$target_end_date <- tbl$forecast_date + lubridate::weeks(2) expect_snapshot( - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_dates", timediff = lubridate::weeks(2) @@ -46,7 +49,7 @@ test_that("cfg_check_tbl_col_timediff fails correctly", { ) expect_snapshot( - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = c("target_end_date", "forecast_date"), timediff = lubridate::weeks(2) @@ -55,7 +58,7 @@ test_that("cfg_check_tbl_col_timediff fails correctly", { ) expect_snapshot( - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = 14L @@ -69,13 +72,13 @@ test_that("cfg_check_tbl_col_timediff fails correctly", { value = "double", target_end_date = "character" ) mockery::stub( - cfg_check_tbl_col_timediff, + opt_check_tbl_col_timediff, "hubUtils::create_hub_schema", schema, 2 ) expect_snapshot( - cfg_check_tbl_col_timediff(tbl, file_path, hub_path, + opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2) diff --git a/tests/testthat/test-opt_check_tbl_counts_lt_popn.R b/tests/testthat/test-opt_check_tbl_counts_lt_popn.R new file mode 100644 index 00000000..baaa5c5b --- /dev/null +++ b/tests/testthat/test-opt_check_tbl_counts_lt_popn.R @@ -0,0 +1,75 @@ +test_that("opt_check_tbl_counts_lt_popn works", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" + tbl <- hubValidations::read_model_out_file(file_path, hub_path) + targets <- list("target" = "wk ahead inc flu hosp") + + expect_snapshot( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) + ) + + expect_equal( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets), + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path) + ) + + tbl$value[1:2] <- 332200066L + 10L + expect_snapshot( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) + ) +}) + +test_that("opt_check_tbl_counts_lt_popn fails correctly", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" + tbl <- hubValidations::read_model_out_file(file_path, hub_path) + targets <- list("target" = "random target") + + expect_snapshot( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets), + error = TRUE + ) + + expect_snapshot( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, + popn_file_path = "random/path.csv" + ), + error = TRUE + ) + expect_snapshot( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, + location_col = "random_col" + ), + error = TRUE + ) + expect_snapshot( + opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, + popn_col = "random_col" + ), + error = TRUE + ) +}) + +test_that("filter_targets works", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" + tbl <- hubValidations::read_model_out_file(file_path, hub_path) + + target <- list(target = "wk ahead inc flu hosp") + # Test more complex target with two sets of target keys and target keys + # comprised of multiple columns. + targets <- list( + list( + target = c("wk ahead inc flu hosp", "extra target"), + horizon = 1L + ), + list( + target = c("wk ahead inc flu hosp", "extra target"), + horizon = 2L + ) + ) + + expect_equal(nrow(filter_targets(tbl, target)), 46) + expect_equal(nrow(filter_targets(tbl, targets)), 46) + expect_equal(nrow(filter_targets(tbl, targets[[1]])), 23) +}) diff --git a/tests/testthat/test-opt_check_tbl_horizon_timediff.R b/tests/testthat/test-opt_check_tbl_horizon_timediff.R new file mode 100644 index 00000000..2568caad --- /dev/null +++ b/tests/testthat/test-opt_check_tbl_horizon_timediff.R @@ -0,0 +1,88 @@ +test_that("opt_check_tbl_horizon_timediff works", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" + tbl <- hubValidations::read_model_out_file(file_path, hub_path) + + + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_date" + ) + ) + + tbl_chr <- hubUtils::coerce_to_character(tbl) + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl_chr, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_date" + ) + ) + + tbl$target_end_date[1] <- tbl$forecast_date[1] + lubridate::weeks(2) + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_date" + ) + ) + + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_date", + timediff = lubridate::weeks(2) + ) + ) +}) + + +test_that("opt_check_tbl_horizon_timediff fails correctly", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" + tbl <- hubValidations::read_model_out_file(file_path, hub_path) + + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_dates" + ), + error = TRUE + ) + + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = c("target_end_date", "forecast_date") + ), + error = TRUE + ) + + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_date", + timediff = 7L + ), + error = TRUE + ) + + schema <- c( + forecast_date = "Date", target = "character", horizon = "integer", + location = "character", output_type = "character", output_type_id = "character", + value = "double", target_end_date = "character" + ) + mockery::stub( + opt_check_tbl_horizon_timediff, + "hubUtils::create_hub_schema", + schema, + 2 + ) + expect_snapshot( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "forecast_date", + t1_colname = "target_end_date" + ), + error = TRUE + ) +}) diff --git a/tests/testthat/test-validate_model_data.R b/tests/testthat/test-validate_model_data.R index 2c177f00..2ec528c8 100644 --- a/tests/testthat/test-validate_model_data.R +++ b/tests/testthat/test-validate_model_data.R @@ -39,7 +39,7 @@ test_that("validate_model_data with config function works", { hub_path <- system.file("testhubs/flusight", package = "hubValidations") file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" expect_snapshot( - validate_model_data(hub_path, file_path)[["col_timediff"]] + validate_model_data(hub_path, file_path)[["horizon_timediff"]] ) expect_snapshot( validate_model_data( @@ -48,7 +48,7 @@ test_that("validate_model_data with config function works", { "testhubs/flusight/hub-config/validations.yml", package = "hubValidations" ) - )[["col_timediff"]] + )[["horizon_timediff"]] ) }) diff --git a/tests/testthat/test-validate_pr.R b/tests/testthat/test-validate_pr.R new file mode 100644 index 00000000..f7de1c58 --- /dev/null +++ b/tests/testthat/test-validate_pr.R @@ -0,0 +1,36 @@ +test_that("validate_pr works on valid PR", { + + temp_hub <- fs::path(tempdir(), "valid_sb_hub") + gert::git_clone(url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + path = temp_hub, + branch = "pr-valid") + + checks <- validate_pr(hub_path = temp_hub, + gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + pr_number = 4, + skip_submit_window_check = TRUE) + + expect_snapshot(str(checks)) + expect_invisible(check_for_errors(checks)) + + +}) + +test_that("validate_pr works on invalid PR", { + + temp_hub <- fs::path(tempdir(), "invalid_sb_hub") + gert::git_clone(url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + path = temp_hub, + branch = "pr-missing-taskid") + + checks <- validate_pr(hub_path = temp_hub, + gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + pr_number = 5, + skip_submit_window_check = TRUE) + + expect_snapshot(str(checks)) + + expect_error( + suppressMessages(check_for_errors(checks)) + ) +}) diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R index ada14320..8dbe1206 100644 --- a/tests/testthat/test-validate_submission.R +++ b/tests/testthat/test-validate_submission.R @@ -5,13 +5,17 @@ test_that("validate_submission works", { expect_snapshot( str( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ) ) ) expect_s3_class( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ), c("hub_validations", "list") ) @@ -21,13 +25,17 @@ test_that("validate_submission works", { expect_snapshot( str( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv" + file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ) ) ) expect_s3_class( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv" + file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ), c("hub_validations", "list") ) @@ -36,13 +44,17 @@ test_that("validate_submission works", { expect_snapshot( str( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv" + file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ) ) ) expect_s3_class( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv" + file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ), c("hub_validations", "list") ) @@ -53,8 +65,63 @@ test_that("validate_submission works", { validate_submission( hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", - round_id_col = "random_col" + round_id_col = "random_col", + skip_submit_window_check = TRUE, + skip_check_config = TRUE ) ) ) + + # File that passes validation & checks config + expect_snapshot( + str( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE + ) + ) + ) + expect_s3_class( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE + ), + c("hub_validations", "list") + ) +}) + +test_that("validate_submission submission within window works", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + + mockery::stub( + check_submission_time, + "Sys.time", + lubridate::as_datetime("2022-10-08 18:01:00 EEST"), + 2 + ) + expect_snapshot( + str( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + )[["submission_time"]] + ) + ) +}) + +test_that("validate_submission submission outside window fails correctly", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + + mockery::stub( + check_submission_time, + "Sys.time", + lubridate::as_datetime("2023-10-08 18:01:00 EEST"), + 2 + ) + expect_snapshot( + str( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + )[["submission_time"]] + ) + ) }) diff --git a/tests/testthat/testdata/config/validations-error.yml b/tests/testthat/testdata/config/validations-error.yml index acbcf6cd..7303d5f4 100644 --- a/tests/testthat/testdata/config/validations-error.yml +++ b/tests/testthat/testdata/config/validations-error.yml @@ -1,19 +1,18 @@ default: test_custom_checks_caller: - col_timediff: - fn: "cfg_check_tbl_col_timediff" + horizon_timediff: + fn: "opt_check_tbl_horizon_timediff" pkg: "hubValidations" args: t0_colname: "forecast_date" t1_colname: "target_end_date" - timediff: !expr lubridate::weeks(2) 2023-05-08: test_custom_checks_caller: - col_timediff: - fn: "cfg_check_tbl_col_timediff" + horizon_timediff: + fn: "opt_check_tbl_horizon_timediff" pkg: "hubValidations" args: t0_colname: "forecast_date" t1_colname: "target_end_date" - timediff: !expr lubridate::weeks(1) + timediff: !expr lubridate::weeks(2) diff --git a/tests/testthat/testdata/config/validations.yml b/tests/testthat/testdata/config/validations.yml index 04e61ce1..f512456c 100644 --- a/tests/testthat/testdata/config/validations.yml +++ b/tests/testthat/testdata/config/validations.yml @@ -1,9 +1,8 @@ default: test_custom_checks_caller: - col_timediff: - fn: "cfg_check_tbl_col_timediff" + horizon_timediff: + fn: "opt_check_tbl_horizon_timediff" pkg: "hubValidations" args: t0_colname: "forecast_date" t1_colname: "target_end_date" - timediff: !expr lubridate::weeks(2) diff --git a/vignettes/articles/custom-functions.Rmd b/vignettes/articles/custom-functions.Rmd index 2fead5ff..77152010 100644 --- a/vignettes/articles/custom-functions.Rmd +++ b/vignettes/articles/custom-functions.Rmd @@ -56,36 +56,50 @@ Note that each of the `validate_*()` functions contain a standard objects in the - `round_id`: character string of `round_id` - `file_meta`: named list containing `round_id`, `team_abbr`, `model_abbr` and `model_id` details. -The `args` configuration can be used to override objects from the caller environment. +The `args` configuration can be used to override objects from the caller environment as well as defaults. -Here's an example configuration for a single check (`cfg_check_tbl_col_timediff()`) to be run as part of the `validate_model_data()` validation function which checks the content of the model data submission files. +Here's an example configuration for a single check (`opt_check_tbl_horizon_timediff()`) to be run as part of the `validate_model_data()` validation function which checks the content of the model data submission files. ```{r, eval=FALSE, code=readLines(system.file('testhubs/flusight/hub-config/validations.yml', package = 'hubValidations'))} ``` +The above configuration file relies on default values for arguments `horizon_colname` (`"horizon"`) and `timediff` (`lubridate::weeks()`). We can use the `validation.yml` `args` list to override the default values. Here's an example that includes **executable r code** as the value of an argument. + +``` +default: + validate_model_data: + horizon_timediff: + fn: "opt_check_tbl_horizon_timediff" + pkg: "hubValidations" + args: + t0_colname: "forecast_date" + t1_colname: "target_end_date" + horizon_colname: "horizons" + timediff: !expr lubridate::weeks(2) +``` + ### Round specific configuration Additional round specific configurations can be included in `validation.yml` that can add to or override default configurations. -For example, in the following `validation.yml`, if the file being validated is being submitted to a round with round ID `"2023-08-15"`, default `col_timediff` check configuration will be overiden by the `2023-08-15` configuration. +For example, in the following `validation.yml` which deploys the `opt_check_tbl_col_timediff()` optional check, if the file being validated is being submitted to a round with round ID `"2023-08-15"`, default `col_timediff` check configuration will be overridden by the `2023-08-15` configuration. ```yml default: validate_model_data: col_timediff: - fn: "cfg_check_tbl_col_timediff" + fn: "opt_check_tbl_col_timediff" pkg: "hubValidations" args: t0_colname: "forecast_date" t1_colname: "target_end_date" - timediff: !expr lubridate::weeks(2) 2023-08-15: validate_model_data: col_timediff: - fn: "cfg_check_tbl_col_timediff" + fn: "opt_check_tbl_col_timediff" pkg: "hubValidations" args: t0_colname: "forecast_date"