Merge pull request #116 from hubverse-org/ak/hubval-print

Improve `hub_validations` print method
hubverse-org · Sep 5, 2024 · 4faebe9 · 4faebe9
2 parents be60ccb + c20802c
commit 4faebe9
Show file tree

Hide file tree

Showing 7 changed files with 279 additions and 167 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,10 @@
   - `✖` : `check_failure` class object.  This indicates an error that does not impact the validation process. 
   - `ⓧ` : `check_error` class object. This also indicates early termination of the validation process.
   - `☒` : `check_exec_error` class object. This indicates an error in the execution of a check function.
+* `hub_validations` class object `combine()` method now ensures that check names are made unique across all `hub_validations` objects being combined.
+* Additional improvements to `hub_validations` class object `print()` method. 
+  - Check results for each file validated are now split and printed under file name header. 
+  - The check name that can be used to access the check result from the `hub_validations` object is now included as the prefix to the check result message instead of the file name (#76).
 * `octolog` dependency removed. This removes the annotation of validation results onto GitHub Action workflow logs (#113).
 
 # hubValidations 0.5.1

diff --git a/R/hub_validations_methods.R b/R/hub_validations_methods.R
@@ -8,27 +8,39 @@
 print.hub_validations <- function(x, ...) {
   if (length(x) == 0L) {
     msg <- cli::format_inline("Empty {.cls hub_validations}")
+    cli::cli_inform(msg)
   } else {
-    msg <- stats::setNames(
-      paste(
-        fs::path_file(purrr::map_chr(x, "where")),
-        purrr::map_chr(x, "message"),
-        sep = ": "
-      ),
-      dplyr::case_when(
-        purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_success")) ~ "v",
-        purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_failure")) ~ "x",
-        purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_exec_warn")) ~ "!",
-        purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_error")) ~ "circle_cross",
-        purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_exec_error")) ~ "lower_block_8",
-        purrr::map_lgl(x, ~ rlang::inherits_any(.x, "check_info")) ~ "i",
-        TRUE ~ "*"
+    print_file <- function(file_name, x) {
+      x <- x[get_filenames(x) == file_name]
+      msg <- stats::setNames(
+        paste(
+          apply_cli_span_class(names(x), class = "check_name"),
+          purrr::map_chr(x, "message"),
+          sep = ": "
+        ),
+        dplyr::case_when(
+          is_check_class(x, "check_success") ~ "v",
+          is_check_class(x, "check_failure") ~ "x",
+          is_check_class(x, "check_exec_warn") ~ "!",
+          is_check_class(x, "check_error") ~ "circle_cross",
+          is_check_class(x, "check_exec_error") ~ "lower_block_8",
+          is_check_class(x, "check_info") ~ "i",
+          TRUE ~ "*"
+        )
       )
+
+      cli::cli_div(class = "hub_validations", theme = hub_validation_theme)
+      cli::cli_h2(file_name)
+      cli::cli_inform(msg)
+      cli::cli_end()
+    }
+
+    purrr::walk(
+      .x = get_filenames(x, unique = TRUE),
+      .f = function(file_name, x) print_file(file_name, x),
+      x = x
     )
   }
-  cli::cli_div(class = "hub_validations", theme = hub_validation_theme)
-  cli::cli_inform(msg)
-  cli::cli_end()
 }
 
 
@@ -48,8 +60,16 @@ combine.hub_validations <- function(...) {
     purrr::compact() %>%
     validate_internal_class(class = "hub_validations")
 
-  structure(c(...),
-    class = c("hub_validations", "list")
+  combined <- c(...)
+  if (is.null(names(combined))) {
+    combined_names <- NULL
+  } else {
+    combined_names <- make.unique(names(combined), sep = "_")
+  }
+  structure(
+    combined,
+    class = c("hub_validations", "list"),
+    names = combined_names
   )
 }
 
@@ -102,5 +122,45 @@ hub_validation_theme <- list(
       paste0(cli::col_red(cli::symbol$checkbox_on), " ")
     },
     "text-exdent" = 2L
+  ),
+  "span.check_name" = list(
+    "before" = "[",
+    "after" = "]",
+    color = "grey"
+  ),
+  "h2" = list(
+    fmt = function(x) {
+      cli::col_br_cyan(
+        paste0(
+          cli::symbol$line, cli::symbol$line,
+          " ", cli::style_underline(x), " ",
+          cli::symbol$line, cli::symbol$line,
+          cli::symbol$line, cli::symbol$line
+        )
+      )
+    }
   )
 )
+
+apply_cli_span_class <- function(x, class = "check_name") {
+  paste0("{.", class, " ", x, "}")
+}
+
+is_check_class <- function(x,
+                           class = c(
+                             "check_success", "check_failure",
+                             "check_exec_warn", "check_error",
+                             "check_exec_error", "check_info"
+                           )) {
+  class <- rlang::arg_match(class)
+  purrr::map_lgl(x, ~ rlang::inherits_any(.x, class))
+}
+
+get_filenames <- function(x, unique = FALSE) {
+  filenames <- fs::path_file(purrr::map_chr(x, "where"))
+  if (unique) {
+    unique(filenames)
+  } else {
+    filenames
+  }
+}
diff --git a/R/validate_pr.R b/R/validate_pr.R
@@ -285,7 +285,7 @@ check_pr_modf_del_files <- function(pr_df, file_type = c(
     purrr::compact()
 
   as_hub_validations(out) %>%
-    purrr::set_names(sprintf("%s_mod_%i", file_type, seq_along(out)))
+    purrr::set_names(sprintf("%s_mod", file_type))
 }
 
 

diff --git a/tests/testthat/_snaps/validate_model_data.md b/tests/testthat/_snaps/validate_model_data.md
@@ -249,72 +249,84 @@
     Code
       validate_model_data(hub_path, file_path)
     Message
-      v 2022-10-08-team1-goodmodel.csv: File could be read successfully.
-      v 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.
-      v 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.
-      v 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name.
-      v 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.
-      v 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.
-      v 2022-10-08-team1-goodmodel.csv: `tbl` contains valid values/value combinations.
-      v 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
-      v 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.
-      v 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.
-      v 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
-      i 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped.
+      
+      -- 2022-10-08-team1-goodmodel.csv ----
+      
+      v [file_read]: File could be read successfully.
+      v [valid_round_id_col]: `round_id_col` name is valid.
+      v [unique_round_id]: `round_id` column "origin_date" contains a single, unique round ID value.
+      v [match_round_id]: All `round_id_col` "origin_date" values match submission `round_id` from file name.
+      v [colnames]: Column names are consistent with expected round task IDs and std column names.
+      v [col_types]: Column data types match hub schema.
+      v [valid_vals]: `tbl` contains valid values/value combinations.
+      v [rows_unique]: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
+      v [req_vals]: Required task ID/output type/output type ID combinations all present.
+      v [value_col_valid]: Values in column `value` all valid with respect to modeling task config.
+      v [value_col_non_desc]: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
+      i [value_col_sum1]: No pmf output types to check for sum of 1. Check skipped.
 
 # validate_model_data print method work [ansi]
 
     Code
       validate_model_data(hub_path, file_path)
     Message
-      [1m[22m[32mv[39m 2022-10-08-team1-goodmodel.csv: File could be read successfully.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: `round_id` column [34m"origin_date"[39m contains a single, unique round ID value.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: All `round_id_col` [34m"origin_date"[39m values match submission `round_id` from file name.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: `tbl` contains valid values/value combinations.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.
-      [32mv[39m 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
-      [36mi[39m 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped.
+      
+      [96m-- [4m[1m2022-10-08-team1-goodmodel.csv[22m[24m ----[39m
+      
+      [1m[22m[32mv[39m [90m[file_read][39m: File could be read successfully.
+      [32mv[39m [90m[valid_round_id_col][39m: `round_id_col` name is valid.
+      [32mv[39m [90m[unique_round_id][39m: `round_id` column [34m"origin_date"[39m contains a single, unique round ID value.
+      [32mv[39m [90m[match_round_id][39m: All `round_id_col` [34m"origin_date"[39m values match submission `round_id` from file name.
+      [32mv[39m [90m[colnames][39m: Column names are consistent with expected round task IDs and std column names.
+      [32mv[39m [90m[col_types][39m: Column data types match hub schema.
+      [32mv[39m [90m[valid_vals][39m: `tbl` contains valid values/value combinations.
+      [32mv[39m [90m[rows_unique][39m: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
+      [32mv[39m [90m[req_vals][39m: Required task ID/output type/output type ID combinations all present.
+      [32mv[39m [90m[value_col_valid][39m: Values in column `value` all valid with respect to modeling task config.
+      [32mv[39m [90m[value_col_non_desc][39m: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
+      [36mi[39m [90m[value_col_sum1][39m: No pmf output types to check for sum of 1. Check skipped.
 
 # validate_model_data print method work [unicode]
 
     Code
       validate_model_data(hub_path, file_path)
     Message
-      ✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully.
-      ✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.
-      ✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value.
-      ✔ 2022-10-08-team1-goodmodel.csv: All `round_id_col` "origin_date" values match submission `round_id` from file name.
-      ✔ 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.
-      ✔ 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.
-      ✔ 2022-10-08-team1-goodmodel.csv: `tbl` contains valid values/value combinations.
-      ✔ 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
-      ✔ 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.
-      ✔ 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.
-      ✔ 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
-      ℹ 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped.
+      
+      ── 2022-10-08-team1-goodmodel.csv ────
+      
+      ✔ [file_read]: File could be read successfully.
+      ✔ [valid_round_id_col]: `round_id_col` name is valid.
+      ✔ [unique_round_id]: `round_id` column "origin_date" contains a single, unique round ID value.
+      ✔ [match_round_id]: All `round_id_col` "origin_date" values match submission `round_id` from file name.
+      ✔ [colnames]: Column names are consistent with expected round task IDs and std column names.
+      ✔ [col_types]: Column data types match hub schema.
+      ✔ [valid_vals]: `tbl` contains valid values/value combinations.
+      ✔ [rows_unique]: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
+      ✔ [req_vals]: Required task ID/output type/output type ID combinations all present.
+      ✔ [value_col_valid]: Values in column `value` all valid with respect to modeling task config.
+      ✔ [value_col_non_desc]: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
+      ℹ [value_col_sum1]: No pmf output types to check for sum of 1. Check skipped.
 
 # validate_model_data print method work [fancy]
 
     Code
       validate_model_data(hub_path, file_path)
     Message
-      [1m[22m[32m✔[39m 2022-10-08-team1-goodmodel.csv: File could be read successfully.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: `round_id` column [34m"origin_date"[39m contains a single, unique round ID value.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: All `round_id_col` [34m"origin_date"[39m values match submission `round_id` from file name.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: Column names are consistent with expected round task IDs and std column names.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: Column data types match hub schema.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: `tbl` contains valid values/value combinations.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: Required task ID/output type/output type ID combinations all present.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: Values in column `value` all valid with respect to modeling task config.
-      [32m✔[39m 2022-10-08-team1-goodmodel.csv: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
-      [36mℹ[39m 2022-10-08-team1-goodmodel.csv: No pmf output types to check for sum of 1. Check skipped.
+      
+      [96m── [4m[1m2022-10-08-team1-goodmodel.csv[22m[24m ────[39m
+      
+      [1m[22m[32m✔[39m [90m[file_read][39m: File could be read successfully.
+      [32m✔[39m [90m[valid_round_id_col][39m: `round_id_col` name is valid.
+      [32m✔[39m [90m[unique_round_id][39m: `round_id` column [34m"origin_date"[39m contains a single, unique round ID value.
+      [32m✔[39m [90m[match_round_id][39m: All `round_id_col` [34m"origin_date"[39m values match submission `round_id` from file name.
+      [32m✔[39m [90m[colnames][39m: Column names are consistent with expected round task IDs and std column names.
+      [32m✔[39m [90m[col_types][39m: Column data types match hub schema.
+      [32m✔[39m [90m[valid_vals][39m: `tbl` contains valid values/value combinations.
+      [32m✔[39m [90m[rows_unique][39m: All combinations of task ID column/`output_type`/`output_type_id` values are unique.
+      [32m✔[39m [90m[req_vals][39m: Required task ID/output type/output type ID combinations all present.
+      [32m✔[39m [90m[value_col_valid][39m: Values in column `value` all valid with respect to modeling task config.
+      [32m✔[39m [90m[value_col_non_desc][39m: Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID value/output type combinations of quantile or cdf output types.
+      [36mℹ[39m [90m[value_col_sum1][39m: No pmf output types to check for sum of 1. Check skipped.
 
 # validate_model_data errors correctly