diff --git a/DESCRIPTION b/DESCRIPTION index f572c254..c2f88768 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hubValidations Title: Testing framework for hubverse hub validations -Version: 0.0.0.9005 +Version: 0.0.0.9006 Authors@R: c( person( given = "Anna", diff --git a/NEWS.md b/NEWS.md index 1404f2ca..7ba8f506 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# hubValidations 0.0.0.9006 + +* `validate_pr()` now checks for deletions of previously submitted model metadata files and modifications or deletions of previously submitted model output files, adding an `` class object to the function output for each detected modified/deleted file (#43 & #44). + # hubValidations 0.0.0.9005 * Improved handling of numeric output type IDs (including high precision floating points / values with trailing zeros), especially when overall hub output type ID column is character. This previously lead to a number of bugs and false validation failures (#58 & #54) which are addressed in this version. diff --git a/R/validate_pr.R b/R/validate_pr.R index 79c70dec..be28894e 100644 --- a/R/validate_pr.R +++ b/R/validate_pr.R @@ -41,15 +41,39 @@ validate_pr <- function(hub_path = ".", gh_repo, pr_number, gh_repo = gh_repo, pr_number = pr_number ) - pr_filenames <- purrr::map_chr(pr_files, ~ .x$filename) - model_output_files <- get_pr_dir_files( - pr_filenames, - model_output_dir - ) - model_metadata_files <- get_pr_dir_files( - pr_filenames, - model_metadata_dir - ) + pr_df <- tibble::tibble( + filename = purrr::map_chr(pr_files, ~ .x$filename), + status = purrr::map_chr(pr_files, ~ .x$status), + model_output = purrr::map_lgl( + .data$filename, + ~ fs::path_has_parent(.x, model_output_dir) && + stringr::str_detect(.x, "README", negate = TRUE) + ), + model_metadata = purrr::map_lgl( + .data$filename, + ~ fs::path_has_parent(.x, model_metadata_dir) && + stringr::str_detect(.x, "README", negate = TRUE) + ), + ) %>% + dplyr::mutate( + rel_path = dplyr::case_when( + .data$model_output ~ fs::path_rel(.data$filename, model_output_dir), + .data$model_metadata ~ fs::path_rel(.data$filename, model_metadata_dir), + .default = .data$filename + ) + ) + inform_unvalidated_files(pr_df) + + model_output_files <- pr_df$rel_path[pr_df$model_output & + pr_df$status != "removed"] + model_metadata_files <- pr_df$rel_path[pr_df$model_metadata & + pr_df$status != "removed"] + + file_modifications <- purrr::map( + c("model_output", "model_metadata"), + ~ check_pr_modf_del_files(pr_df, file_type = .x) + ) %>% + purrr::reduce(combine) model_output_vals <- purrr::map( model_output_files, @@ -75,8 +99,10 @@ validate_pr <- function(hub_path = ".", gh_repo, pr_number, purrr::list_flatten() %>% as_hub_validations() + validations <- combine( validations, + file_modifications, model_output_vals, model_metadata_vals ) @@ -96,39 +122,15 @@ validate_pr <- function(hub_path = ".", gh_repo, pr_number, ) } ) - - inform_unvalidated_files( - model_output_files, - model_metadata_files, - pr_filenames - ) return(validations) } -get_pr_dir_files <- function(pr_filenames, dir_name) { - pr_filenames[ - fs::path_has_parent(pr_filenames, dir_name) - ] %>% - stringr::str_subset("README", negate = TRUE) %>% - fs::path_rel(dir_name) -} - -inform_unvalidated_files <- function(model_output_files, - model_metadata_files, - pr_filenames) { - validated_files <- c(model_output_files, model_metadata_files) - if (length(pr_filenames) == length(validated_files)) { +# Sends message reporting any files with changes in PR that were not validated. +inform_unvalidated_files <- function(pr_df) { + unvalidated_files <- pr_df[!pr_df$model_output & !pr_df$model_metadata, "filename", drop = TRUE] + if (length(unvalidated_files) == 0L) { return(invisible(NULL)) } - if (length(validated_files) == 0L) { - unvalidated_files <- pr_filenames - } else { - validated_idx <- purrr::map_int( - validated_files, - ~ grep(.x, pr_filenames, fixed = TRUE) - ) - unvalidated_files <- pr_filenames[-validated_idx] - } unvalidated_bullets <- purrr::map_chr( unvalidated_files, @@ -143,3 +145,37 @@ inform_unvalidated_files <- function(model_output_files, ) ) } +# Checks for model output file modifications and model output & model metadata +# file deletions. Returns an ⁠ condition class object if any +# modification or deletion detected. +check_pr_modf_del_files <- function(pr_df, file_type = c( + "model_output", + "model_metadata" + )) { + file_type <- rlang::arg_match(file_type) + df <- pr_df[pr_df[[file_type]], ] + df <- switch(file_type, + model_output = df[df$status %in% c("removed", "modified"), ], + model_metadata = df[df$status == "removed", ] + ) + + purrr::imap( + .x = df$rel_path, + ~ hubValidations::capture_check_cnd( + check = FALSE, + file_path = .x, + msg_subject = paste( + "Previously submitted", + stringr::str_replace(file_type, "_", " "), + "files" + ), + msg_verbs = c("were not", "must not be"), + msg_attribute = paste0(df$status[.y], "."), + details = cli::format_inline( + "{.path {df$filename[.y]}} {df$status[.y]}." + ), + error = TRUE + ) + ) %>% + hubValidations::as_hub_validations() +} diff --git a/tests/testthat/_snaps/validate_pr.md b/tests/testthat/_snaps/validate_pr.md index e81cbd87..15d76172 100644 --- a/tests/testthat/_snaps/validate_pr.md +++ b/tests/testthat/_snaps/validate_pr.md @@ -211,3 +211,195 @@ ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... +# validate_pr flags modifications and deletions in PR + + Code + str(checks) + Output + Classes 'hub_validations', 'list' hidden list of 28 + $ valid_config :List of 4 + ..$ message : chr "All hub config files are valid. \n " + ..$ where : chr "mod_del_hub" + ..$ call : chr "check_config_hub_valid" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ :List of 6 + ..$ message : chr "Previously submitted model output files must not be modified. \n 'model-output/hub-baseline/2022-10-08-hub-base"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr ".f" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + $ :List of 6 + ..$ message : chr "Previously submitted model output files must not be removed. \n 'model-output/team1-goodmodel/2022-10-15-team1-"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-15-team1-goodmodel.csv" + ..$ call : chr ".f" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + $ :List of 6 + ..$ message : chr "Previously submitted model metadata files must not be removed. \n 'model-metadata/team1-goodmodel.yaml' removed." + ..$ trace : NULL + ..$ parent : NULL + ..$ where : 'fs_path' chr "team1-goodmodel.yaml" + ..$ call : chr ".f" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/hub-baseline/2022-10-08-hub-baseline.csv'. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-hub-baseline.csv\" is valid. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name is valid. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"origin_date\" values match submission `round_id` from file name. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ colnames :List of 4 + ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_colnames" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ col_types :List of 4 + ..$ message : chr "Column data types match hub schema. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_col_types" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_vals :List of 5 + ..$ message : chr "`tbl` contains valid values/value combinations. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_values" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ rows_unique :List of 4 + ..$ message : chr "All combinations of task ID column/`output_type`/`output_type_id` values are unique. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_rows_unique" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ req_vals :List of 5 + ..$ message : chr "Required task ID/output type/output type ID combinations all present. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ missing : tibble [0 x 6] (S3: tbl_df/tbl/data.frame) + .. ..$ origin_date : chr(0) + .. ..$ target : chr(0) + .. ..$ horizon : chr(0) + .. ..$ location : chr(0) + .. ..$ output_type : chr(0) + .. ..$ output_type_id: chr(0) + ..$ call : chr "check_tbl_values_required" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_valid :List of 4 + ..$ message : chr "Values in column `value` all valid with respect to modeling task config. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_value_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_non_desc:List of 5 + ..$ message : chr "Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID\n value/outpu"| __truncated__ + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_value_col_ascending" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_sum1 :List of 4 + ..$ message : chr "No pmf output types to check for sum of 1. Check skipped." + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_tbl_value_col_sum1" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-22-team1-goodmodel.csv'. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-22-team1-goodmodel.csv\" is valid. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 6 + ..$ message : chr "Metadata file does not exist at path 'model-metadata/team1-goodmodel.yml' or\n "| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + diff --git a/tests/testthat/test-validate_pr.R b/tests/testthat/test-validate_pr.R index ddae6155..de2c59de 100644 --- a/tests/testthat/test-validate_pr.R +++ b/tests/testthat/test-validate_pr.R @@ -1,39 +1,72 @@ test_that("validate_pr works on valid PR", { - skip_if_offline() + skip_if_offline() - temp_hub <- fs::path(tempdir(), "valid_sb_hub") - gert::git_clone(url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", - path = temp_hub, - branch = "pr-valid") + temp_hub <- fs::path(tempdir(), "valid_sb_hub") + gert::git_clone( + url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + path = temp_hub, + branch = "pr-valid" + ) - checks <- validate_pr(hub_path = temp_hub, - gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", - pr_number = 4, - skip_submit_window_check = TRUE) - - expect_snapshot(str(checks)) - expect_invisible(suppressMessages(check_for_errors(checks))) - expect_message(check_for_errors(checks), - regexp = "All validation checks have been successful.") + checks <- validate_pr( + hub_path = temp_hub, + gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + pr_number = 4, + skip_submit_window_check = TRUE + ) + expect_snapshot(str(checks)) + expect_invisible(suppressMessages(check_for_errors(checks))) + expect_message(check_for_errors(checks), + regexp = "All validation checks have been successful." + ) }) test_that("validate_pr works on invalid PR", { - skip_if_offline() + skip_if_offline() + + temp_hub <- fs::path(tempdir(), "invalid_sb_hub") + gert::git_clone( + url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + path = temp_hub, + branch = "pr-missing-taskid" + ) + + checks <- validate_pr( + hub_path = temp_hub, + gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + pr_number = 5, + skip_submit_window_check = TRUE + ) - temp_hub <- fs::path(tempdir(), "invalid_sb_hub") - gert::git_clone(url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", - path = temp_hub, - branch = "pr-missing-taskid") + expect_snapshot(str(checks)) - checks <- validate_pr(hub_path = temp_hub, - gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", - pr_number = 5, - skip_submit_window_check = TRUE) + expect_error( + suppressMessages(check_for_errors(checks)) + ) +}) + +test_that("validate_pr flags modifications and deletions in PR", { + skip_if_offline() - expect_snapshot(str(checks)) + temp_hub <- fs::path(tempdir(), "mod_del_hub") + gert::git_clone( + url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + path = temp_hub, + branch = "test-mod-del" + ) - expect_error( - suppressMessages(check_for_errors(checks)) + checks <- suppressMessages( + validate_pr( + hub_path = temp_hub, + gh_repo = "Infectious-Disease-Modeling-Hubs/ci-testhub-simple", + pr_number = 6, + skip_submit_window_check = TRUE ) + ) + + expect_snapshot(str(checks)) + expect_error( + suppressMessages(check_for_errors(checks)) + ) })