Merge branch 'main' into num-type-ids

hubverse-org · Nov 1, 2023 · ea92169 · ea92169
2 parents e8da5b4 + 096d826
commit ea92169
Show file tree

Hide file tree

Showing 29 changed files with 907 additions and 94 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: hubValidations
 Title: Testing framework for hubverse hub validations
-Version: 0.0.0.9003
+Version: 0.0.0.9005
 Authors@R: c(
     person(
         given = "Anna", 
@@ -53,7 +53,7 @@ Suggests:
     gert,
     mockery,
     rmarkdown,
-    testthat (>= 3.0.0),
+    testthat (>= 3.2.0),
     testthis,
     withr
 Remotes:

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,17 @@
-# hubValidations 0.0.0.9003
+# hubValidations 0.0.0.9005
 
 * Improved handling of numeric output type IDs (including high precision floating points / values with trailing zeros), especially when overall hub output type ID column is character. This previously lead to a number of bugs and false validation failures (#58 & #54) which are addressed in this version.
 
+# hubValidations 0.0.0.9004
+
+This release contains a bug fix for reading in and validating CSV column types correctly. (#54) 
+
+# hubValidations 0.0.0.9003
+
+This release includes a number of bug fixes:
+- Deployment of custom/optional functions via `validations.yml` can now be accessed directly form `pkg` namespace, addressing bug which required `pkg` library to be loaded. (#51)
+- Use `all.equal` to check that sums of `pmf` probabilities equal 1. (#52)
+
 # hubValidations 0.0.0.9002
 
 This release includes improvements designed after the first round of sandbox testing on setting up the CDC FluSight hub. Improvements include:

diff --git a/R/check_tbl_value_col_sum1.R b/R/check_tbl_value_col_sum1.R
@@ -49,14 +49,14 @@ check_values_sum1 <- function(tbl) {
 
   check_tbl <- dplyr::group_by(tbl, dplyr::across(dplyr::all_of(group_cols))) %>%
     dplyr::arrange("output_type_id", .by_group = TRUE) %>%
-    dplyr::summarise(not_sum1 = !sum(.data[["value"]]) == 1L)
+    dplyr::summarise(sum1 = isTRUE(all.equal(sum(.data[["value"]]), 1L)))
 
-  if (!any(check_tbl$not_sum1)) {
+  if (all(check_tbl$sum1)) {
     return(NULL)
   }
 
-  dplyr::filter(check_tbl, .data[["not_sum1"]]) %>%
-    dplyr::select(-dplyr::all_of("not_sum1")) %>%
+  dplyr::filter(check_tbl, !.data[["sum1"]]) %>%
+    dplyr::select(-dplyr::all_of("sum1")) %>%
     dplyr::ungroup() %>%
     dplyr::mutate(output_type = "pmf")
 }
diff --git a/R/exec_cfg_check.R b/R/exec_cfg_check.R
@@ -1,17 +1,19 @@
 exec_cfg_check <- function(check_name, validations_cfg, caller_env, caller_call) {
   fn_cfg <- validations_cfg[[check_name]]
   if (!is.null(fn_cfg[["pkg"]])) {
-      fn <- get(fn_cfg[["fn"]],
-                envir = rlang::as_environment(fn_cfg[["pkg"]])
-      )
-  } else if (!is.null(fn_cfg[["source"]])){
-      # TODO Validate source script.
-      source(fn_cfg[["source"]], local = TRUE)
-      fn <- get(fn_cfg[["fn"]])
+    fn <- get(fn_cfg[["fn"]],
+      envir = getNamespace(fn_cfg[["pkg"]])
+    )
+  } else if (!is.null(fn_cfg[["source"]])) {
+    # TODO Validate source script.
+    source(fn_cfg[["source"]], local = TRUE)
+    fn <- get(fn_cfg[["fn"]])
   }
 
   caller_env_formals <- get_caller_env_formals(
-      fn, caller_env, cfg_args = fn_cfg[["args"]])
+    fn, caller_env,
+    cfg_args = fn_cfg[["args"]]
+  )
   args <- c(
     caller_env_formals,
     fn_cfg[["args"]]
@@ -34,7 +36,7 @@ exec_cfg_check <- function(check_name, validations_cfg, caller_env, caller_call)
 get_caller_env_formals <- function(fn, caller_env, cfg_args) {
   caller_env_fmls <- rlang::fn_fmls_names(fn)[
     rlang::fn_fmls_names(fn) %in% rlang::env_names(caller_env) &
-        !rlang::fn_fmls_names(fn) %in% cfg_args
+      !rlang::fn_fmls_names(fn) %in% cfg_args
   ]
- rlang::env_get_list(caller_env, nms = caller_env_fmls, default = NULL)
+  rlang::env_get_list(caller_env, nms = caller_env_fmls, default = NULL)
 }
diff --git a/R/read_model_out_file.R b/R/read_model_out_file.R
@@ -6,7 +6,7 @@
 #' @return a tibble of contents of the model output file.
 #' @export
 read_model_out_file <- function(file_path, hub_path = ".",
-                                use_hub_schema = FALSE) {
+                                use_hub_schema = TRUE) {
   full_path <- abs_file_path(file_path, hub_path)
 
   if (!fs::file_exists(full_path)) {

diff --git a/man/read_model_out_file.Rd b/man/read_model_out_file.Rd
diff --git a/tests/testthat/_snaps/capture_check_cnd.md b/tests/testthat/_snaps/capture_check_cnd.md
@@ -70,17 +70,19 @@
       capture_check_cnd(check = FALSE, file_path = "test/file.csv", msg_subject = "Column names",
         msg_attribute = "consistent with expected round task IDs and std column names.",
         msg_verbs = 1:2)
-    Error <rlang_error>
-      `msg_verbs` must be a character vector of length 2, not class <integer> of length 2
+    Condition
+      Error in `capture_check_cnd()`:
+      ! `msg_verbs` must be a character vector of length 2, not class <integer> of length 2
 
 ---
 
     Code
       capture_check_cnd(check = FALSE, file_path = "test/file.csv", msg_subject = "Column names",
         msg_attribute = "consistent with expected round task IDs and std column names.",
         msg_verbs = c("are"))
-    Error <rlang_error>
-      `msg_verbs` must be a character vector of length 2, not class <character> of length 1
+    Condition
+      Error in `capture_check_cnd()`:
+      ! `msg_verbs` must be a character vector of length 2, not class <character> of length 1
 
 # capture_check_cnd works correctly
 

diff --git a/tests/testthat/_snaps/check_tbl_col_types.md b/tests/testthat/_snaps/check_tbl_col_types.md
@@ -16,3 +16,12 @@
       Warning:
       Column data types do not match hub schema.  `origin_date ` should be "character " not "Date ", `horizon ` should be "double " not "integer "
 
+---
+
+    Code
+      check_tbl_col_types(tbl, file_path, hub_path)
+    Output
+      <warning/check_failure>
+      Warning:
+      Column data types do not match hub schema.  `NA ` should be "NA " not "NA ", `NA ` should be "NA " not "NA ", `horizon ` should be "double " not "integer ", `output_type_id ` should be "double " not "character ", `value ` should be "integer " not "double "
+
diff --git a/tests/testthat/_snaps/check_tbl_value_col_sum1.md b/tests/testthat/_snaps/check_tbl_value_col_sum1.md
@@ -43,6 +43,19 @@
        $ use_cli_format: logi TRUE
        - attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_warning" "warning" ...
 
+---
+
+    Code
+      str(check_tbl_value_col_sum1(tbl, file_path))
+    Output
+      List of 5
+       $ message       : chr "Values in `value` column do sum to 1 for all unique task ID value combination of pmf\n    output types. \n "
+       $ where         : chr "umass_ens/2023-05-08-umass_ens.csv"
+       $ error_tbl     : NULL
+       $ call          : chr "check_tbl_value_col_sum1"
+       $ use_cli_format: logi TRUE
+       - attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
+
 # check_tbl_value_col_sum1 skips correctly
 
     Code

diff --git a/tests/testthat/_snaps/combine.md b/tests/testthat/_snaps/combine.md
@@ -52,7 +52,8 @@
 
     Code
       combine(new_hub_validations(), new_hub_validations(), a = 1)
-    Error <rlang_error>
+    Condition
+      Error in `validate_internal_class()`:
       ! All elements must inherit from class <hub_validations>.
       x Element with index 1 does not.
 
@@ -61,7 +62,8 @@
     Code
       combine(new_hub_validations(file_exists = check_file_exists(file_path, hub_path),
       file_name = check_file_name(file_path), a = 10))
-    Error <rlang_error>
+    Condition
+      Error in `validate_internal_class()`:
       ! All elements must inherit from class <hub_check>.
       x Element with index 3 does not.
 
diff --git a/tests/testthat/_snaps/opt_check_tbl_col_timediff.md b/tests/testthat/_snaps/opt_check_tbl_col_timediff.md
@@ -33,31 +33,35 @@
     Code
       opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = "target_end_dates", timediff = lubridate::weeks(2))
-    Error <simpleError>
-      Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'.
+    Condition
+      Error in `opt_check_tbl_col_timediff()`:
+      ! Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'.
 
 ---
 
     Code
       opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = c("target_end_date", "forecast_date"), timediff = lubridate::weeks(
           2))
-    Error <simpleError>
-      Assertion on 't1_colname' failed: Must have length 1, but has length 2.
+    Condition
+      Error in `opt_check_tbl_col_timediff()`:
+      ! Assertion on 't1_colname' failed: Must have length 1, but has length 2.
 
 ---
 
     Code
       opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = "target_end_date", timediff = 14L)
-    Error <simpleError>
-      Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'.
+    Condition
+      Error in `opt_check_tbl_col_timediff()`:
+      ! Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'.
 
 ---
 
     Code
       opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = "target_end_date", timediff = lubridate::weeks(2))
-    Error <rlang_error>
-      Column `colname` must be configured as <Date> not <character>.
+    Condition
+      Error in `opt_check_tbl_col_timediff()`:
+      ! Column `colname` must be configured as <Date> not <character>.
 
diff --git a/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md b/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md
@@ -20,27 +20,31 @@
 
     Code
       opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets)
-    Error <rlang_error>
-      Target does not match any round target keys.
+    Condition
+      Error in `assert_target_keys()`:
+      ! Target does not match any round target keys.
 
 ---
 
     Code
       opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, popn_file_path = "random/path.csv")
-    Error <rlang_error>
-      File not found at 'random/path.csv'
+    Condition
+      Error in `opt_check_tbl_counts_lt_popn()`:
+      ! File not found at 'random/path.csv'
 
 ---
 
     Code
       opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, location_col = "random_col")
-    Error <simpleError>
-      Assertion on 'location_col' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'random_col'.
+    Condition
+      Error in `opt_check_tbl_counts_lt_popn()`:
+      ! Assertion on 'location_col' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'random_col'.
 
 ---
 
     Code
       opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, popn_col = "random_col")
-    Error <simpleError>
-      Assertion on 'popn_col' failed: Must be element of set {'abbreviation','location','location_name','population','','count_rate1','count_rate2','count_rate2p5','count_rate3','count_rate4','count_rate5'}, but is 'random_col'.
+    Condition
+      Error in `opt_check_tbl_counts_lt_popn()`:
+      ! Assertion on 'popn_col' failed: Must be element of set {'abbreviation','location','location_name','population','','count_rate1','count_rate2','count_rate2p5','count_rate3','count_rate4','count_rate5'}, but is 'random_col'.
 
diff --git a/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md b/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md
@@ -43,30 +43,34 @@
     Code
       opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = "target_end_dates")
-    Error <simpleError>
-      Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'.
+    Condition
+      Error in `opt_check_tbl_horizon_timediff()`:
+      ! Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'.
 
 ---
 
     Code
       opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = c("target_end_date", "forecast_date"))
-    Error <simpleError>
-      Assertion on 't1_colname' failed: Must have length 1, but has length 2.
+    Condition
+      Error in `opt_check_tbl_horizon_timediff()`:
+      ! Assertion on 't1_colname' failed: Must have length 1, but has length 2.
 
 ---
 
     Code
       opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = "target_end_date", timediff = 7L)
-    Error <simpleError>
-      Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'.
+    Condition
+      Error in `opt_check_tbl_horizon_timediff()`:
+      ! Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'.
 
 ---
 
     Code
       opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date",
         t1_colname = "target_end_date")
-    Error <rlang_error>
-      Column `colname` must be configured as <Date> not <character>.
+    Condition
+      Error in `opt_check_tbl_horizon_timediff()`:
+      ! Column `colname` must be configured as <Date> not <character>.
 
diff --git a/tests/testthat/_snaps/parse_file_name.md b/tests/testthat/_snaps/parse_file_name.md
@@ -65,6 +65,7 @@
 
     Code
       parse_file_name("model-output/team1-goodmodel/2022-10-08-team1_goodmodel.csv")
-    Error <rlang_error>
-      Could not parse file name '2022-10-08-team1_goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing.
+    Condition
+      Error in `parse_file_name()`:
+      ! Could not parse file name '2022-10-08-team1_goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing.
 
diff --git a/tests/testthat/_snaps/read_model_out_file.md b/tests/testthat/_snaps/read_model_out_file.md
@@ -29,3 +29,19 @@
        $ output_type_id: num [1:47] 0.01 0.025 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 ...
        $ value         : int [1:47] 135 137 139 140 141 141 142 143 144 145 ...
 
+# read_model_out_file correctly uses hub schema to read character cols in csvs
+
+    Code
+      str(read_model_out_file(hub_path = test_path("testdata/hub"),
+      "hub-baseline/2023-04-24-hub-baseline.csv"))
+    Output
+      tibble [48 x 8] (S3: tbl_df/tbl/data.frame)
+       $ forecast_date  : Date[1:48], format: "2023-04-24" "2023-04-24" ...
+       $ target_end_date: Date[1:48], format: "2023-05-01" "2023-05-08" ...
+       $ horizon        : int [1:48] 1 2 1 1 1 1 1 1 1 1 ...
+       $ target         : chr [1:48] "wk ahead inc flu hosp" "wk ahead inc flu hosp" "wk ahead inc flu hosp" "wk ahead inc flu hosp" ...
+       $ location       : chr [1:48] "06" "06" "06" "06" ...
+       $ output_type    : chr [1:48] "mean" "mean" "quantile" "quantile" ...
+       $ output_type_id : chr [1:48] NA NA "0.01" "0.025" ...
+       $ value          : num [1:48] 1033 1033 0 0 0 ...
+
diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md
@@ -39,8 +39,9 @@
 
     Code
       get_file_round_id(file_path = "team1-goodmodel/2022-10-08-team-1-goodmodel.csv")
-    Error <rlang_error>
-      Could not parse file name '2022-10-08-team-1-goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing.
+    Condition
+      Error in `parse_file_name()`:
+      ! Could not parse file name '2022-10-08-team-1-goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing.
 
 # get_file_* utils work