From 659a00648c4b09b9539063b8588e4c33fbbe2db6 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Fri, 6 Oct 2023 12:54:00 +0300 Subject: [PATCH 1/9] get opt function directly from namespace. Resolves #51 --- R/exec_cfg_check.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/R/exec_cfg_check.R b/R/exec_cfg_check.R index 590ad29d..6c97bbb0 100644 --- a/R/exec_cfg_check.R +++ b/R/exec_cfg_check.R @@ -1,17 +1,19 @@ exec_cfg_check <- function(check_name, validations_cfg, caller_env, caller_call) { fn_cfg <- validations_cfg[[check_name]] if (!is.null(fn_cfg[["pkg"]])) { - fn <- get(fn_cfg[["fn"]], - envir = rlang::as_environment(fn_cfg[["pkg"]]) - ) - } else if (!is.null(fn_cfg[["source"]])){ - # TODO Validate source script. - source(fn_cfg[["source"]], local = TRUE) - fn <- get(fn_cfg[["fn"]]) + fn <- get(fn_cfg[["fn"]], + envir = getNamespace(fn_cfg[["pkg"]]) + ) + } else if (!is.null(fn_cfg[["source"]])) { + # TODO Validate source script. + source(fn_cfg[["source"]], local = TRUE) + fn <- get(fn_cfg[["fn"]]) } caller_env_formals <- get_caller_env_formals( - fn, caller_env, cfg_args = fn_cfg[["args"]]) + fn, caller_env, + cfg_args = fn_cfg[["args"]] + ) args <- c( caller_env_formals, fn_cfg[["args"]] @@ -34,7 +36,7 @@ exec_cfg_check <- function(check_name, validations_cfg, caller_env, caller_call) get_caller_env_formals <- function(fn, caller_env, cfg_args) { caller_env_fmls <- rlang::fn_fmls_names(fn)[ rlang::fn_fmls_names(fn) %in% rlang::env_names(caller_env) & - !rlang::fn_fmls_names(fn) %in% cfg_args + !rlang::fn_fmls_names(fn) %in% cfg_args ] - rlang::env_get_list(caller_env, nms = caller_env_fmls, default = NULL) + rlang::env_get_list(caller_env, nms = caller_env_fmls, default = NULL) } From 973370ee10e468cea2fbdfdd276486ec55369815 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Mon, 9 Oct 2023 10:33:52 +0300 Subject: [PATCH 2/9] Use all.equal to compare double pmf prob sums. Resolves #52 --- R/check_tbl_value_col_sum1.R | 8 ++++---- tests/testthat/_snaps/check_tbl_value_col_sum1.md | 13 +++++++++++++ tests/testthat/test-check_tbl_value_col_sum1.R | 12 ++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/R/check_tbl_value_col_sum1.R b/R/check_tbl_value_col_sum1.R index f454eb71..df70171e 100644 --- a/R/check_tbl_value_col_sum1.R +++ b/R/check_tbl_value_col_sum1.R @@ -49,14 +49,14 @@ check_values_sum1 <- function(tbl) { check_tbl <- dplyr::group_by(tbl, dplyr::across(dplyr::all_of(group_cols))) %>% dplyr::arrange("output_type_id", .by_group = TRUE) %>% - dplyr::summarise(not_sum1 = !sum(.data[["value"]]) == 1L) + dplyr::summarise(sum1 = isTRUE(all.equal(sum(.data[["value"]]), 1L))) - if (!any(check_tbl$not_sum1)) { + if (all(check_tbl$sum1)) { return(NULL) } - dplyr::filter(check_tbl, .data[["not_sum1"]]) %>% - dplyr::select(-dplyr::all_of("not_sum1")) %>% + dplyr::filter(check_tbl, !.data[["sum1"]]) %>% + dplyr::select(-dplyr::all_of("sum1")) %>% dplyr::ungroup() %>% dplyr::mutate(output_type = "pmf") } diff --git a/tests/testthat/_snaps/check_tbl_value_col_sum1.md b/tests/testthat/_snaps/check_tbl_value_col_sum1.md index 372625ec..9bb11458 100644 --- a/tests/testthat/_snaps/check_tbl_value_col_sum1.md +++ b/tests/testthat/_snaps/check_tbl_value_col_sum1.md @@ -43,6 +43,19 @@ $ use_cli_format: logi TRUE - attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_warning" "warning" ... +--- + + Code + str(check_tbl_value_col_sum1(tbl, file_path)) + Output + List of 5 + $ message : chr "Values in `value` column do sum to 1 for all unique task ID value combination of pmf\n output types. \n " + $ where : chr "umass_ens/2023-05-08-umass_ens.csv" + $ error_tbl : NULL + $ call : chr "check_tbl_value_col_sum1" + $ use_cli_format: logi TRUE + - attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + # check_tbl_value_col_sum1 skips correctly Code diff --git a/tests/testthat/test-check_tbl_value_col_sum1.R b/tests/testthat/test-check_tbl_value_col_sum1.R index 4f612c3a..ebb81d4c 100644 --- a/tests/testthat/test-check_tbl_value_col_sum1.R +++ b/tests/testthat/test-check_tbl_value_col_sum1.R @@ -26,6 +26,18 @@ test_that("check_tbl_value_col_sum1 errors correctly", { check_tbl_value_col_sum1(tbl, file_path) ) ) + + tbl$value[1] <- 0.818 + tbl$value[2] <- 0.180 + tbl$value[3] <- 0.002 + expect_snapshot( + str( + check_tbl_value_col_sum1(tbl, file_path) + ) + ) + + + }) test_that("check_tbl_value_col_sum1 skips correctly", { From f84eee0b7d62b0d7ca1022cf6632d92130da6dde Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Mon, 9 Oct 2023 11:15:13 +0300 Subject: [PATCH 3/9] Bump version --- DESCRIPTION | 2 +- NEWS.md | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e875dd4f..767ef6b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hubValidations Title: Testing framework for hubverse hub validations -Version: 0.0.0.9002 +Version: 0.0.0.9003 Authors@R: c( person( given = "Anna", diff --git a/NEWS.md b/NEWS.md index 4e4939bc..b90b933c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# hubValidations 0.0.0.9003 + +This release includes a number of bug fixes: +- Deployment of custom/optional functions via `validations.yml` can now be accessed directly form `pkg` namespace, addressing bug which required `pkg` library to be loaded. (#51) +- Use `all.equal` to check that sums of `pmf` probabilities equal 1. (#52) + # hubValidations 0.0.0.9002 This release includes improvements desgined after the first round of sandbox testing on setting up the CDC FluSight hub. Improvements include: From 3a1d9106322f0ef09d146e6bd8d5c7580fdeddf6 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Mon, 9 Oct 2023 11:45:58 +0300 Subject: [PATCH 4/9] Bump to 3.2.0 version of testthat. Update snapshots. See https://www.tidyverse.org/blog/2023/10/testthat-3-2-0/#snapshotting-changes for details --- DESCRIPTION | 2 +- tests/testthat/_snaps/capture_check_cnd.md | 10 ++++---- tests/testthat/_snaps/combine.md | 6 +++-- .../_snaps/opt_check_tbl_col_timediff.md | 20 +++++++++------- .../_snaps/opt_check_tbl_counts_lt_popn.md | 20 +++++++++------- .../_snaps/opt_check_tbl_horizon_timediff.md | 20 +++++++++------- tests/testthat/_snaps/parse_file_name.md | 5 ++-- tests/testthat/_snaps/utils.md | 5 ++-- tests/testthat/_snaps/validate_model_data.md | 13 +++++----- tests/testthat/_snaps/validate_model_file.md | 24 +++++++++---------- 10 files changed, 72 insertions(+), 53 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 767ef6b0..ba768040 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,7 +53,7 @@ Suggests: gert, mockery, rmarkdown, - testthat (>= 3.0.0), + testthat (>= 3.2.0), testthis, withr Remotes: diff --git a/tests/testthat/_snaps/capture_check_cnd.md b/tests/testthat/_snaps/capture_check_cnd.md index cf6cb28b..4c7b10dc 100644 --- a/tests/testthat/_snaps/capture_check_cnd.md +++ b/tests/testthat/_snaps/capture_check_cnd.md @@ -70,8 +70,9 @@ capture_check_cnd(check = FALSE, file_path = "test/file.csv", msg_subject = "Column names", msg_attribute = "consistent with expected round task IDs and std column names.", msg_verbs = 1:2) - Error - `msg_verbs` must be a character vector of length 2, not class of length 2 + Condition + Error in `capture_check_cnd()`: + ! `msg_verbs` must be a character vector of length 2, not class of length 2 --- @@ -79,8 +80,9 @@ capture_check_cnd(check = FALSE, file_path = "test/file.csv", msg_subject = "Column names", msg_attribute = "consistent with expected round task IDs and std column names.", msg_verbs = c("are")) - Error - `msg_verbs` must be a character vector of length 2, not class of length 1 + Condition + Error in `capture_check_cnd()`: + ! `msg_verbs` must be a character vector of length 2, not class of length 1 # capture_check_cnd works correctly diff --git a/tests/testthat/_snaps/combine.md b/tests/testthat/_snaps/combine.md index e8263235..0d3884aa 100644 --- a/tests/testthat/_snaps/combine.md +++ b/tests/testthat/_snaps/combine.md @@ -52,7 +52,8 @@ Code combine(new_hub_validations(), new_hub_validations(), a = 1) - Error + Condition + Error in `validate_internal_class()`: ! All elements must inherit from class . x Element with index 1 does not. @@ -61,7 +62,8 @@ Code combine(new_hub_validations(file_exists = check_file_exists(file_path, hub_path), file_name = check_file_name(file_path), a = 10)) - Error + Condition + Error in `validate_internal_class()`: ! All elements must inherit from class . x Element with index 3 does not. diff --git a/tests/testthat/_snaps/opt_check_tbl_col_timediff.md b/tests/testthat/_snaps/opt_check_tbl_col_timediff.md index 73073b49..774d4d3a 100644 --- a/tests/testthat/_snaps/opt_check_tbl_col_timediff.md +++ b/tests/testthat/_snaps/opt_check_tbl_col_timediff.md @@ -33,8 +33,9 @@ Code opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_dates", timediff = lubridate::weeks(2)) - Error - Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'. + Condition + Error in `opt_check_tbl_col_timediff()`: + ! Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'. --- @@ -42,22 +43,25 @@ opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = c("target_end_date", "forecast_date"), timediff = lubridate::weeks( 2)) - Error - Assertion on 't1_colname' failed: Must have length 1, but has length 2. + Condition + Error in `opt_check_tbl_col_timediff()`: + ! Assertion on 't1_colname' failed: Must have length 1, but has length 2. --- Code opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = 14L) - Error - Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'. + Condition + Error in `opt_check_tbl_col_timediff()`: + ! Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'. --- Code opt_check_tbl_col_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = lubridate::weeks(2)) - Error - Column `colname` must be configured as not . + Condition + Error in `opt_check_tbl_col_timediff()`: + ! Column `colname` must be configured as not . diff --git a/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md b/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md index aa9dca94..b48f6cbb 100644 --- a/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md +++ b/tests/testthat/_snaps/opt_check_tbl_counts_lt_popn.md @@ -20,27 +20,31 @@ Code opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, targets = targets) - Error - Target does not match any round target keys. + Condition + Error in `assert_target_keys()`: + ! Target does not match any round target keys. --- Code opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, popn_file_path = "random/path.csv") - Error - File not found at 'random/path.csv' + Condition + Error in `opt_check_tbl_counts_lt_popn()`: + ! File not found at 'random/path.csv' --- Code opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, location_col = "random_col") - Error - Assertion on 'location_col' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'random_col'. + Condition + Error in `opt_check_tbl_counts_lt_popn()`: + ! Assertion on 'location_col' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'random_col'. --- Code opt_check_tbl_counts_lt_popn(tbl, file_path, hub_path, popn_col = "random_col") - Error - Assertion on 'popn_col' failed: Must be element of set {'abbreviation','location','location_name','population','','count_rate1','count_rate2','count_rate2p5','count_rate3','count_rate4','count_rate5'}, but is 'random_col'. + Condition + Error in `opt_check_tbl_counts_lt_popn()`: + ! Assertion on 'popn_col' failed: Must be element of set {'abbreviation','location','location_name','population','','count_rate1','count_rate2','count_rate2p5','count_rate3','count_rate4','count_rate5'}, but is 'random_col'. diff --git a/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md b/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md index 129e0bf3..d054869e 100644 --- a/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md +++ b/tests/testthat/_snaps/opt_check_tbl_horizon_timediff.md @@ -43,30 +43,34 @@ Code opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_dates") - Error - Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'. + Condition + Error in `opt_check_tbl_horizon_timediff()`: + ! Assertion on 't1_colname' failed: Must be element of set {'forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'}, but is 'target_end_dates'. --- Code opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = c("target_end_date", "forecast_date")) - Error - Assertion on 't1_colname' failed: Must have length 1, but has length 2. + Condition + Error in `opt_check_tbl_horizon_timediff()`: + ! Assertion on 't1_colname' failed: Must have length 1, but has length 2. --- Code opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date", timediff = 7L) - Error - Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'. + Condition + Error in `opt_check_tbl_horizon_timediff()`: + ! Assertion on 'timediff' failed: Must inherit from class 'Period', but has class 'integer'. --- Code opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "forecast_date", t1_colname = "target_end_date") - Error - Column `colname` must be configured as not . + Condition + Error in `opt_check_tbl_horizon_timediff()`: + ! Column `colname` must be configured as not . diff --git a/tests/testthat/_snaps/parse_file_name.md b/tests/testthat/_snaps/parse_file_name.md index af4aca1b..95abd6bc 100644 --- a/tests/testthat/_snaps/parse_file_name.md +++ b/tests/testthat/_snaps/parse_file_name.md @@ -65,6 +65,7 @@ Code parse_file_name("model-output/team1-goodmodel/2022-10-08-team1_goodmodel.csv") - Error - Could not parse file name '2022-10-08-team1_goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. + Condition + Error in `parse_file_name()`: + ! Could not parse file name '2022-10-08-team1_goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md index 65ae4d17..5280832e 100644 --- a/tests/testthat/_snaps/utils.md +++ b/tests/testthat/_snaps/utils.md @@ -39,8 +39,9 @@ Code get_file_round_id(file_path = "team1-goodmodel/2022-10-08-team-1-goodmodel.csv") - Error - Could not parse file name '2022-10-08-team-1-goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. + Condition + Error in `parse_file_name()`: + ! Could not parse file name '2022-10-08-team-1-goodmodel' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. # get_file_* utils work diff --git a/tests/testthat/_snaps/validate_model_data.md b/tests/testthat/_snaps/validate_model_data.md index 0f4812ef..a1617301 100644 --- a/tests/testthat/_snaps/validate_model_data.md +++ b/tests/testthat/_snaps/validate_model_data.md @@ -241,7 +241,7 @@ Code validate_model_data(hub_path, file_path) - Message + Message v 2022-10-08-team1-goodmodel.csv: File could be read successfully. v 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. v 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. @@ -266,7 +266,7 @@ Code validate_model_data(hub_path, file_path) - Message + Message v 2022-10-08-team1-goodmodel.csv: File could be read successfully. v 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. v 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. @@ -291,7 +291,7 @@ Code validate_model_data(hub_path, file_path) - Message + Message ✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully. ✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. ✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. @@ -316,7 +316,7 @@ Code validate_model_data(hub_path, file_path) - Message + Message ✔ 2022-10-08-team1-goodmodel.csv: File could be read successfully. ✔ 2022-10-08-team1-goodmodel.csv: `round_id_col` name is valid. ✔ 2022-10-08-team1-goodmodel.csv: `round_id` column "origin_date" contains a single, unique round ID value. @@ -341,6 +341,7 @@ Code validate_model_data(hub_path, file_path = "random-path.csv") - Error - Could not parse file name 'random-path' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. + Condition + Error in `parse_file_name()`: + ! Could not parse file name 'random-path' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. diff --git a/tests/testthat/_snaps/validate_model_file.md b/tests/testthat/_snaps/validate_model_file.md index 1f463c78..3a32ae4c 100644 --- a/tests/testthat/_snaps/validate_model_file.md +++ b/tests/testthat/_snaps/validate_model_file.md @@ -61,7 +61,7 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv") - Message + Message v 2022-10-08-team1-goodmodel.csv: File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. v 2022-10-08-team1-goodmodel.csv: File name "2022-10-08-team1-goodmodel.csv" is valid. v 2022-10-08-team1-goodmodel.csv: File directory name matches `model_id` metadata in file name. @@ -73,14 +73,14 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv") - Message + Message x 2022-10-15-team1-goodmodel.csv: File does not exist at path 'model-output/team1-goodmodel/2022-10-15-team1-goodmodel.csv'. --- Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv") - Message + Message v 2022-10-15-hub-baseline.csv: File exists at path 'model-output/team1-goodmodel/2022-10-15-hub-baseline.csv'. v 2022-10-15-hub-baseline.csv: File name "2022-10-15-hub-baseline.csv" is valid. ! 2022-10-15-hub-baseline.csv: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" @@ -113,7 +113,7 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv") - Message + Message v 2022-10-08-team1-goodmodel.csv: File exists at path model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv. v 2022-10-08-team1-goodmodel.csv: File name "2022-10-08-team1-goodmodel.csv" is valid. v 2022-10-08-team1-goodmodel.csv: File directory name matches `model_id` metadata in file name. @@ -125,14 +125,14 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv") - Message + Message x 2022-10-15-team1-goodmodel.csv: File does not exist at path model-output/team1-goodmodel/2022-10-15-team1-goodmodel.csv. --- Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv") - Message + Message v 2022-10-15-hub-baseline.csv: File exists at path model-output/team1-goodmodel/2022-10-15-hub-baseline.csv. v 2022-10-15-hub-baseline.csv: File name "2022-10-15-hub-baseline.csv" is valid. ! 2022-10-15-hub-baseline.csv: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" @@ -165,7 +165,7 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv") - Message + Message ✔ 2022-10-08-team1-goodmodel.csv: File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. ✔ 2022-10-08-team1-goodmodel.csv: File name "2022-10-08-team1-goodmodel.csv" is valid. ✔ 2022-10-08-team1-goodmodel.csv: File directory name matches `model_id` metadata in file name. @@ -177,14 +177,14 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv") - Message + Message ✖ 2022-10-15-team1-goodmodel.csv: File does not exist at path 'model-output/team1-goodmodel/2022-10-15-team1-goodmodel.csv'. --- Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv") - Message + Message ✔ 2022-10-15-hub-baseline.csv: File exists at path 'model-output/team1-goodmodel/2022-10-15-hub-baseline.csv'. ✔ 2022-10-15-hub-baseline.csv: File name "2022-10-15-hub-baseline.csv" is valid. ! 2022-10-15-hub-baseline.csv: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" @@ -217,7 +217,7 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv") - Message + Message ✔ 2022-10-08-team1-goodmodel.csv: File exists at path model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv. ✔ 2022-10-08-team1-goodmodel.csv: File name "2022-10-08-team1-goodmodel.csv" is valid. ✔ 2022-10-08-team1-goodmodel.csv: File directory name matches `model_id` metadata in file name. @@ -229,14 +229,14 @@ Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv") - Message + Message ✖ 2022-10-15-team1-goodmodel.csv: File does not exist at path model-output/team1-goodmodel/2022-10-15-team1-goodmodel.csv. --- Code validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv") - Message + Message ✔ 2022-10-15-hub-baseline.csv: File exists at path model-output/team1-goodmodel/2022-10-15-hub-baseline.csv. ✔ 2022-10-15-hub-baseline.csv: File name "2022-10-15-hub-baseline.csv" is valid. ! 2022-10-15-hub-baseline.csv: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" From fe36d97e9f35f5004577cf60e4635a45474282f5 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 11 Oct 2023 11:10:05 +0300 Subject: [PATCH 5/9] Coerce col data types to schema when reading in csvs by default. Related to #54 --- R/read_model_out_file.R | 2 +- tests/testthat/_snaps/check_tbl_col_types.md | 9 + tests/testthat/_snaps/read_model_out_file.md | 25 ++ tests/testthat/_snaps/validate_submission.md | 133 +++++++++ tests/testthat/test-check_tbl_col_types.R | 47 ++-- tests/testthat/test-read_model_out_file.R | 26 ++ tests/testthat/test-validate_submission.R | 24 +- .../hub-baseline/2023-04-24-hub-baseline.csv | 49 ++++ .../hub-baseline/2023-05-01-hub-baseline.csv | 49 ++++ .../testdata/hub/hub-config/admin.json | 14 + .../hub/hub-config/model-metadata-schema.json | 128 +++++++++ .../testdata/hub/hub-config/tasks.json | 257 ++++++++++++++++++ .../hub/model-metadata/hub-baseline.yml | 39 +++ 13 files changed, 778 insertions(+), 24 deletions(-) create mode 100644 tests/testthat/testdata/hub/forecasts/hub-baseline/2023-04-24-hub-baseline.csv create mode 100644 tests/testthat/testdata/hub/forecasts/hub-baseline/2023-05-01-hub-baseline.csv create mode 100644 tests/testthat/testdata/hub/hub-config/admin.json create mode 100644 tests/testthat/testdata/hub/hub-config/model-metadata-schema.json create mode 100644 tests/testthat/testdata/hub/hub-config/tasks.json create mode 100644 tests/testthat/testdata/hub/model-metadata/hub-baseline.yml diff --git a/R/read_model_out_file.R b/R/read_model_out_file.R index 9926c0e9..d2b7cc7f 100644 --- a/R/read_model_out_file.R +++ b/R/read_model_out_file.R @@ -6,7 +6,7 @@ #' @return a tibble of contents of the model output file. #' @export read_model_out_file <- function(file_path, hub_path = ".", - use_hub_schema = FALSE) { + use_hub_schema = TRUE) { full_path <- abs_file_path(file_path, hub_path) if (!fs::file_exists(full_path)) { diff --git a/tests/testthat/_snaps/check_tbl_col_types.md b/tests/testthat/_snaps/check_tbl_col_types.md index a5b13f0c..45189838 100644 --- a/tests/testthat/_snaps/check_tbl_col_types.md +++ b/tests/testthat/_snaps/check_tbl_col_types.md @@ -16,3 +16,12 @@ Warning: Column data types do not match hub schema. `origin_date ` should be "character " not "Date ", `horizon ` should be "double " not "integer " +--- + + Code + check_tbl_col_types(tbl, file_path, hub_path) + Output + + Warning: + Column data types do not match hub schema. `NA ` should be "NA " not "NA ", `NA ` should be "NA " not "NA ", `horizon ` should be "double " not "integer ", `output_type_id ` should be "double " not "character ", `value ` should be "integer " not "double " + diff --git a/tests/testthat/_snaps/read_model_out_file.md b/tests/testthat/_snaps/read_model_out_file.md index 005f5d0d..4199bdc7 100644 --- a/tests/testthat/_snaps/read_model_out_file.md +++ b/tests/testthat/_snaps/read_model_out_file.md @@ -29,3 +29,28 @@ $ output_type_id: num [1:47] 0.01 0.025 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 ... $ value : int [1:47] 135 137 139 140 141 141 142 143 144 145 ... +# read_model_out_file correctly uses hub schema to read character cols in csvs + + Code + str(read_model_out_file(hub_path = test_path("testdata/hub"), + "hub-baseline/2023-04-24-hub-baseline.csv")) + Output + tibble [48 x 8] (S3: tbl_df/tbl/data.frame) + $ forecast_date : Date[1:48], format: "2023-04-24" "2023-04-24" ... + $ target_end_date: Date[1:48], format: "2023-05-01" "2023-05-08" ... + $ horizon : int [1:48] 1 2 1 1 1 1 1 1 1 1 ... + $ target : chr [1:48] "wk ahead inc flu hosp" "wk ahead inc flu hosp" "wk ahead inc flu hosp" "wk ahead inc flu hosp" ... + $ location : chr [1:48] "06" "06" "06" "06" ... + $ output_type : chr [1:48] "mean" "mean" "quantile" "quantile" ... + $ output_type_id : chr [1:48] NA NA "0.01" "0.025" ... + $ value : num [1:48] 1033 1033 0 0 0 ... + +# read_model_out_file errors when file contents cannot be coerced to hub schema. + + Code + read_model_out_file(hub_path = test_path("testdata/hub"), + "hub-baseline/2023-05-01-hub-baseline.csv") + Condition + Error: + ! Invalid: In CSV column #2: CSV conversion error to int32: invalid value 'horizon 1' + diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md index bdc526b1..0c11ba5b 100644 --- a/tests/testthat/_snaps/validate_submission.md +++ b/tests/testthat/_snaps/validate_submission.md @@ -432,3 +432,136 @@ $ use_cli_format: logi TRUE - attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_warning" "warning" ... +# validate_submission csv file read in and validated according to schema. + + Code + str(validate_submission(hub_path = test_path("testdata/hub"), file_path = "hub-baseline/2023-04-24-hub-baseline.csv", + skip_submit_window_check = TRUE)) + Output + List of 19 + $ valid_config :List of 4 + ..$ message : chr "All hub config files are valid. \n " + ..$ where : chr "hub" + ..$ call : chr "check_config_hub_valid" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'forecasts/hub-baseline/2023-04-24-hub-baseline.csv'. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2023-04-24-hub-baseline.csv\" is valid. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name is valid. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id` column \"forecast_date\" contains a single, unique round ID value. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ match_round_id :List of 4 + ..$ message : chr "All `round_id_col` \"forecast_date\" values match submission `round_id` from file name. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_match_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ colnames :List of 4 + ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_colnames" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ col_types :List of 4 + ..$ message : chr "Column data types match hub schema. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_col_types" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_vals :List of 5 + ..$ message : chr "`tbl` contains valid values/value combinations. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_values" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ rows_unique :List of 4 + ..$ message : chr "All combinations of task ID column/`output_type`/`output_type_id` values are unique. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_rows_unique" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ req_vals :List of 5 + ..$ message : chr "Required task ID/output type/output type ID combinations all present. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ missing : tibble [0 x 7] (S3: tbl_df/tbl/data.frame) + .. ..$ forecast_date : chr(0) + .. ..$ target_end_date: chr(0) + .. ..$ horizon : chr(0) + .. ..$ target : chr(0) + .. ..$ location : chr(0) + .. ..$ output_type : chr(0) + .. ..$ output_type_id : chr(0) + ..$ call : chr "check_tbl_values_required" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_valid :List of 4 + ..$ message : chr "Values in column `value` all valid with respect to modeling task config. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_value_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_non_desc:List of 5 + ..$ message : chr "Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID\n value/outpu"| __truncated__ + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_value_col_ascending" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_sum1 :List of 4 + ..$ message : chr "No pmf output types to check for sum of 1. Check skipped." + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_tbl_value_col_sum1" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + diff --git a/tests/testthat/test-check_tbl_col_types.R b/tests/testthat/test-check_tbl_col_types.R index c179c4da..aa538236 100644 --- a/tests/testthat/test-check_tbl_col_types.R +++ b/tests/testthat/test-check_tbl_col_types.R @@ -1,21 +1,34 @@ test_that("check_tbl_col_types works", { - hub_path <- system.file("testhubs/simple", package = "hubValidations") - file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" - tbl <- hubValidations::read_model_out_file(file_path, hub_path) + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + tbl <- read_model_out_file(file_path, hub_path) - expect_snapshot( - check_tbl_col_types(tbl, file_path, hub_path) - ) + expect_snapshot( + check_tbl_col_types(tbl, file_path, hub_path) + ) - mockery::stub( - check_tbl_col_types, - "hubUtils::create_hub_schema", - c(origin_date = "character", target = "character", horizon = "double", - location = "character", age_group = "character", output_type = "character", - output_type_id = "double", value = "integer"), - 1 - ) - expect_snapshot( - check_tbl_col_types(tbl, file_path, hub_path) - ) + mockery::stub( + check_tbl_col_types, + "hubUtils::create_hub_schema", + c( + origin_date = "character", target = "character", horizon = "double", + location = "character", age_group = "character", output_type = "character", + output_type_id = "double", value = "integer" + ), + 1 + ) + expect_snapshot( + check_tbl_col_types(tbl, file_path, hub_path) + ) + + # Check "06" location value read and validated correctly + hub_path <- test_path("testdata/hub") + file_path <- "hub-baseline/2023-04-24-hub-baseline.csv" + tbl <- read_model_out_file(file_path, + hub_path, + use_hub_schema = TRUE + ) + expect_snapshot( + check_tbl_col_types(tbl, file_path, hub_path) + ) }) diff --git a/tests/testthat/test-read_model_out_file.R b/tests/testthat/test-read_model_out_file.R index e73bb886..bde4dd23 100644 --- a/tests/testthat/test-read_model_out_file.R +++ b/tests/testthat/test-read_model_out_file.R @@ -17,3 +17,29 @@ test_that("read_model_out_file works", { ) ) }) + +test_that( + "read_model_out_file correctly uses hub schema to read character cols in csvs", + { + expect_snapshot( + str( + read_model_out_file( + hub_path = test_path("testdata/hub"), + "hub-baseline/2023-04-24-hub-baseline.csv" + ) + ) + ) + } +) +test_that( + "read_model_out_file errors when file contents cannot be coerced to hub schema.", + { + expect_snapshot( + read_model_out_file( + hub_path = test_path("testdata/hub"), + "hub-baseline/2023-05-01-hub-baseline.csv" + ), + error = TRUE + ) + } +) diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R index 8dbe1206..5c221be2 100644 --- a/tests/testthat/test-validate_submission.R +++ b/tests/testthat/test-validate_submission.R @@ -76,15 +76,15 @@ test_that("validate_submission works", { expect_snapshot( str( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", - skip_submit_window_check = TRUE + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE ) ) ) expect_s3_class( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", - skip_submit_window_check = TRUE + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE ), c("hub_validations", "list") ) @@ -102,7 +102,7 @@ test_that("validate_submission submission within window works", { expect_snapshot( str( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" )[["submission_time"]] ) ) @@ -120,8 +120,20 @@ test_that("validate_submission submission outside window fails correctly", { expect_snapshot( str( validate_submission(hub_path, - file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" )[["submission_time"]] ) ) }) + +test_that("validate_submission csv file read in and validated according to schema.", { + expect_snapshot( + str( + validate_submission( + hub_path = test_path("testdata/hub"), + file_path = "hub-baseline/2023-04-24-hub-baseline.csv", + skip_submit_window_check = TRUE + ) + ) + ) +}) diff --git a/tests/testthat/testdata/hub/forecasts/hub-baseline/2023-04-24-hub-baseline.csv b/tests/testthat/testdata/hub/forecasts/hub-baseline/2023-04-24-hub-baseline.csv new file mode 100644 index 00000000..3ab776c3 --- /dev/null +++ b/tests/testthat/testdata/hub/forecasts/hub-baseline/2023-04-24-hub-baseline.csv @@ -0,0 +1,49 @@ +"forecast_date","target_end_date","horizon","target","location","output_type","output_type_id","value" +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","mean",,1033 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","mean",,1033 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.01",0 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.025",0 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.05",0 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.1",281 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.15",600 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.2",717 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.25",817 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.3",877 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.35",913 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.4",965 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.45",1011 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.5",1033 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.55",1055 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.6",1101 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.65",1153 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.7",1189 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.75",1249 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.8",1349 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.85",1466 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.9",1785 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.95",3443 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.975",5183 +2023-04-24,2023-05-01,1,"wk ahead inc flu hosp","06","quantile","0.99",7490 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.01",0 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.025",0 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.05",0 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.1",0 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.15",0 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.2",378 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.25",579 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.3",706 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.35",806 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.4",887 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.45",963 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.5",1033 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.55",1102 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.6",1179 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.65",1262 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.7",1363 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.75",1489 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.8",1690 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.85",2387 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.9",3348 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.95",5048 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.975",7471 +2023-04-24,2023-05-08,2,"wk ahead inc flu hosp","06","quantile","0.99",8402 diff --git a/tests/testthat/testdata/hub/forecasts/hub-baseline/2023-05-01-hub-baseline.csv b/tests/testthat/testdata/hub/forecasts/hub-baseline/2023-05-01-hub-baseline.csv new file mode 100644 index 00000000..43485e81 --- /dev/null +++ b/tests/testthat/testdata/hub/forecasts/hub-baseline/2023-05-01-hub-baseline.csv @@ -0,0 +1,49 @@ +"forecast_date","target_end_date","horizon","target","location","output_type","output_type_id","value" +2023-05-01,2023-05-08,"horizon 1","wk ahead inc flu hosp","US","mean",,926 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","mean",,926 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.01",0 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.025",0 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.05",0 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.1",193 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.15",495 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.2",618 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.25",717 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.3",774 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.35",822 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.4",857 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.45",904 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.5",926 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.55",948 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.6",995 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.65",1030 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.7",1078 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.75",1135 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.8",1234 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.85",1357 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.9",1659 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.95",3310 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.975",4880 +2023-05-01,2023-05-08,"1","wk ahead inc flu hosp","US","quantile","0.99",7399 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.01",0 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.025",0 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.05",0 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.1",0 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.15",0 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.2",283 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.25",477 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.3",601 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.35",701 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.4",784 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.45",858 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.5",926 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.55",995 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.6",1069 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.65",1151 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.7",1250 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.75",1372 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.8",1567 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.85",2207 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.9",3228 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.95",4884 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.975",7365 +2023-05-01,2023-05-15,"2","wk ahead inc flu hosp","US","quantile","0.99",8239 diff --git a/tests/testthat/testdata/hub/hub-config/admin.json b/tests/testthat/testdata/hub/hub-config/admin.json new file mode 100644 index 00000000..d2200d73 --- /dev/null +++ b/tests/testthat/testdata/hub/hub-config/admin.json @@ -0,0 +1,14 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json", + "name": "US CDC FluSight", + "maintainer": "US CDC", + "contact": { + "name": "Joe Bloggs", + "email": "joe.blogs@cdc.gov" + }, + "repository_host": "GitHub", + "repository_url": "https://github.com/cdcepi/Flusight-forecast-data", + "file_format": ["csv", "parquet", "arrow"], + "timezone": "US/Eastern", + "model_output_dir": "forecasts" +} diff --git a/tests/testthat/testdata/hub/hub-config/model-metadata-schema.json b/tests/testthat/testdata/hub/hub-config/model-metadata-schema.json new file mode 100644 index 00000000..0001eec9 --- /dev/null +++ b/tests/testthat/testdata/hub/hub-config/model-metadata-schema.json @@ -0,0 +1,128 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Schema for Modeling Hub model metadata", + "description": "This is the schema for model metadata files, please refer to https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-metadata/README.md for more information.", + "type": "object", + "properties": { + "team_name": { + "description": "The name of the team submitting the model", + "type": "string" + }, + "team_abbr": { + "description": "Abbreviated name of the team submitting the model", + "type": "string", + "pattern": "^[a-zA-Z0-9_+]+$", + "maxLength": 16 + }, + "model_name": { + "description": "The name of the model", + "type": "string" + }, + "model_abbr": { + "description": "Abbreviated name of the model", + "type": "string", + "pattern": "^[a-zA-Z0-9_+]+$", + "maxLength": 16 + }, + "model_version": { + "description": "Identifier of the version of the model", + "type": "string" + }, + "model_contributors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "affiliation": { + "type": "string" + }, + "email": { + "type": "string", + "format": "email" + }, + "orcid": { + "type": "string", + "pattern": "^\\d{4}\\-\\d{4}\\-\\d{4}\\-[\\dX]{4}$" + } + }, + "additionalProperties": false, + "required": ["name", "affiliation", "email"] + } + }, + "website_url": { + "description": "Public facing website for the model", + "type": "string", + "format": "uri" + }, + "repo_url": { + "description": "Repository containing code for the model", + "type": "string", + "format": "uri" + }, + "license": { + "description": "License for use of model output data", + "type": "string", + "enum": [ + "CC0-1.0", + "CC-BY-4.0", + "CC-BY_SA-4.0", + "PPDL", + "ODC-by", + "ODbL", + "OGL-3.0" + ] + }, + "designated_model": { + "description": "Team-specified indicator for whether the model should be eligible for inclusion in a Hub ensemble and public visualization. A team may designate up to two models.", + "type": "boolean" + }, + "citation": { + "description": "One or more citations for this model", + "type": "string", + "examples": ["Gibson GC , Reich NG , Sheldon D. Real-time mechanistic bayesian forecasts of Covid-19 mortality. medRxiv. 2020. https://doi.org/10.1101/2020.12.22.20248736"] + }, + "team_funding": { + "description": "Any information about funding source for the team or members of the team.", + "type": "string", + "examples": ["National Institutes of General Medical Sciences (R01GM123456). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS."] + }, + "data_inputs": { + "description": "List or description of data inputs used by the model", + "type": "string" + }, + "methods": { + "description": "A brief (200 char.) description of the methods used by this model", + "type": "string", + "maxLength": 200 + }, + "methods_long": { + "description": "A full description of the methods used by this model. Among other details, this should include whether spatial correlation is considered and how the model accounts for uncertainty.", + "type": "string" + }, + "ensemble_of_models": { + "description": "Indicator for whether this model is an ensemble of any separate component models", + "type": "boolean" + }, + "ensemble_of_hub_models": { + "description": "Indicator for whether this model is an ensemble specifically of other models submitted to this Hub", + "type": "boolean" + } + }, + "additionalProperties": true, + "required": [ + "team_name", + "team_abbr", + "model_name", + "model_abbr", + "model_contributors", + "license", + "data_inputs", + "methods", + "methods_long", + "ensemble_of_models", + "ensemble_of_hub_models" + ] +} diff --git a/tests/testthat/testdata/hub/hub-config/tasks.json b/tests/testthat/testdata/hub/hub-config/tasks.json new file mode 100644 index 00000000..fc57b13b --- /dev/null +++ b/tests/testthat/testdata/hub/hub-config/tasks.json @@ -0,0 +1,257 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json", + "rounds": [{ + "round_id_from_variable": true, + "round_id": "forecast_date", + "model_tasks": [{ + "task_ids": { + "forecast_date": { + "required": null, + "optional": [ + "2022-12-12", "2022-12-19", "2022-12-26", "2023-01-02", "2023-01-09", + "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06", "2023-02-13", + "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13", "2023-03-20", + "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17", "2023-04-24", + "2023-05-01", "2023-05-08", "2023-05-15" + ] + }, + "target_end_date": { + "required": null, + "optional": [ + "2022-12-26", "2023-01-02", "2023-01-09", + "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06", "2023-02-13", + "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13", "2023-03-20", + "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17", "2023-04-24", + "2023-05-01", "2023-05-08", "2023-05-15", "2023-05-22", "2023-05-29" + ] + }, + "target": { + "required": null, + "optional": ["wk flu hosp rate change"] + }, + "horizon": { + "required": null, + "optional": [1, 2] + }, + "location": { + "required": null, + "optional": [ + "US", + "01", + "02", + "04", + "05", + "06", + "08", + "09", + "10", + "11", + "12", + "13", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "53", + "54", + "55", + "56", + "72", + "78" + ] + } + }, + "output_type": { + "pmf": { + "output_type_id": { + "required": ["large_decrease", "decrease", "stable", "increase", "large_increase"], + "optional": null + }, + "value": { + "type": "double", + "minimum": 0, + "maximum": 1 + } + } + }, + "target_metadata": [{ + "target_id": "wk flu hosp rate change", + "target_name": "weekly influenza hospitalization rate change", + "target_units": "rate per 100,000 population", + "target_keys": { + "target": ["wk flu hosp rate change"] + }, + "target_type": "nominal", + "description": "This target represents the change in the rate of new hospitalizations per week comparing the week ending two days prior to the forecast_date to the week ending h weeks after the forecast_date.", + "is_step_ahead": true, + "time_unit": "week" + }] + }, { + "task_ids": { + "forecast_date": { + "required": null, + "optional": [ + "2022-12-12", "2022-12-19", "2022-12-26", "2023-01-02", "2023-01-09", + "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06", "2023-02-13", + "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13", "2023-03-20", + "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17", "2023-04-24", + "2023-05-01", "2023-05-08", "2023-05-15" + ] + }, + "target_end_date": { + "required": null, + "optional": [ + "2022-12-26", "2023-01-02", "2023-01-09", + "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06", "2023-02-13", + "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13", "2023-03-20", + "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17", "2023-04-24", + "2023-05-01", "2023-05-08", "2023-05-15", "2023-05-22", "2023-05-29" + ] + }, + "target": { + "required": null, + "optional": ["wk ahead inc flu hosp"] + }, + "horizon": { + "required": null, + "optional": [1, 2] + }, + "location": { + "required": null, + "optional": [ + "US", + "01", + "02", + "04", + "05", + "06", + "08", + "09", + "10", + "11", + "12", + "13", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "53", + "54", + "55", + "56", + "72", + "78" + ] + } + }, + "output_type": { + "quantile": { + "output_type_id": { + "required": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, + 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, + 0.75, 0.8, 0.85, 0.9, 0.95, 0.975, 0.99 + ], + "optional": null + }, + "value": { + "type": "integer", + "minimum": 0 + } + }, + "mean": { + "output_type_id": { + "required": null, + "optional": ["NA"] + }, + "value": { + "type": "double", + "minimum": 0 + } + } + }, + "target_metadata": [{ + "target_id": "wk ahead inc flu hosp", + "target_name": "weekly influenza hospitalization incidence", + "target_units": "rate per 100,000 population", + "target_keys": { + "target": ["wk ahead inc flu hosp"] + }, + "target_type": "discrete", + "description": "This target represents the counts of new hospitalizations per horizon week.", + "is_step_ahead": true, + "time_unit": "week" + }] + }], + "submissions_due": { + "relative_to": "forecast_date", + "start": -6, + "end": 2 + } + } + + ] +} diff --git a/tests/testthat/testdata/hub/model-metadata/hub-baseline.yml b/tests/testthat/testdata/hub/model-metadata/hub-baseline.yml new file mode 100644 index 00000000..b94d3fae --- /dev/null +++ b/tests/testthat/testdata/hub/model-metadata/hub-baseline.yml @@ -0,0 +1,39 @@ +team_name: "UMass-Amherst" +team_abbr: "UMass" +model_name: "Ensemble of baseline models with trends" +model_abbr: "trends_ensemble" +model_version: "1.0" +model_contributors: [ + { + "name": "Nutcha Wattanachit", + "affiliation": "UMass Amherst", + "email": "nwattanachit@schoolph.umass.edu" + }, + { + "name": "Aaron Gerding", + "affiliation": "UMass Amherst", + "email": "agerding@umass.edu" + }, + { + "name": "Nick Reich", + "affiliation": "UMass Amherst", + "email": "nick@umass.edu" + }, + { + "name": "Evan Ray", + "affiliation": "UMass Amherst", + "email": "elray@umass.edu" + } +] +website_url: "https://github.com/reichlab/flu-hosp-models-2021-2022" +license: "CC-BY-4.0" +citation: "citation" +team_funding: "funding" +include_viz: true +include_ensemble: true +include_eval: true +methods: "Equally weighted ensemble of simple time-series baseline models." +data_inputs: "Daily and weekly incident flu hospitalizations, queried through covidData" +methods_long: "Equally weighted ensemble of simple time-series baseline models. Each baseline model calculates first differences of incidence in recent weeks. These differences are sampled and then added to the most recently observed incidence. Variations on this method include (a) including the first differences and the negative of these differences to enforce symmetry, resulting in a flat-line forecast, (b) generating predictions by working on the daily scale and then aggregating to weekly predictions, or by working directly with weekly data; (c) varying the number of time-units in the past for computing the first differences (14 or 21 days, or 3 or 4 weeks) to focus on capturing recent trends, and (d) using the original time-series or a variance-stabilizing transformation of it, e.g. square-root. Additionally, the resulting predictive distributions are truncated so that any predicted samples computed to be less than zero are truncated to be zero." +ensemble_of_models: true +ensemble_of_hub_models: false From 2e665325abc3f71ddf8d1cd2d75260b55473c5bb Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 11 Oct 2023 11:13:28 +0300 Subject: [PATCH 6/9] Bump version to 0.0.0.9004 --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ba768040..c31e94d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hubValidations Title: Testing framework for hubverse hub validations -Version: 0.0.0.9003 +Version: 0.0.0.9004 Authors@R: c( person( given = "Anna", diff --git a/NEWS.md b/NEWS.md index b90b933c..4d8d5b38 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# hubValidations 0.0.0.9004 + +This release contains a bug fix for reading in and validating CSV column types correctly. (#54) + # hubValidations 0.0.0.9003 This release includes a number of bug fixes: From d893888ca276bb02b08012b515a74cd34ef72c2a Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 11 Oct 2023 11:24:53 +0300 Subject: [PATCH 7/9] add extra test for parsing error --- tests/testthat/_snaps/validate_submission.md | 58 ++++++++++++++++++++ tests/testthat/test-validate_submission.R | 12 ++++ 2 files changed, 70 insertions(+) diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md index 0c11ba5b..8cc3ce5e 100644 --- a/tests/testthat/_snaps/validate_submission.md +++ b/tests/testthat/_snaps/validate_submission.md @@ -565,3 +565,61 @@ ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... - attr(*, "class")= chr [1:2] "hub_validations" "list" +# validate_submission fails when csv cannot be parsed according to schema. + + Code + str(validate_submission(hub_path = test_path("testdata/hub"), file_path = "hub-baseline/2023-05-01-hub-baseline.csv", + skip_submit_window_check = TRUE)) + Output + Classes 'hub_validations', 'list' hidden list of 8 + $ valid_config :List of 4 + ..$ message : chr "All hub config files are valid. \n " + ..$ where : chr "hub" + ..$ call : chr "check_config_hub_valid" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'forecasts/hub-baseline/2023-05-01-hub-baseline.csv'. \n " + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2023-05-01-hub-baseline.csv\" is valid. \n " + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists:List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 6 + ..$ message : chr "File could not be read successfully. \n Invalid: In CSV column #2: CSV conversion error to int32: invalid value"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R index 5c221be2..37cffff0 100644 --- a/tests/testthat/test-validate_submission.R +++ b/tests/testthat/test-validate_submission.R @@ -137,3 +137,15 @@ test_that("validate_submission csv file read in and validated according to schem ) ) }) + +test_that("validate_submission fails when csv cannot be parsed according to schema.", { + expect_snapshot( + str( + validate_submission( + hub_path = test_path("testdata/hub"), + file_path = "hub-baseline/2023-05-01-hub-baseline.csv", + skip_submit_window_check = TRUE + ) + ) + ) +}) From 56be7366b786b788fac283e7f68ae931c929a81e Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 11 Oct 2023 11:25:19 +0300 Subject: [PATCH 8/9] document --- man/read_model_out_file.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/read_model_out_file.Rd b/man/read_model_out_file.Rd index 3c63e4c5..d496b643 100644 --- a/man/read_model_out_file.Rd +++ b/man/read_model_out_file.Rd @@ -4,7 +4,7 @@ \alias{read_model_out_file} \title{Read a model output file} \usage{ -read_model_out_file(file_path, hub_path = ".", use_hub_schema = FALSE) +read_model_out_file(file_path, hub_path = ".", use_hub_schema = TRUE) } \arguments{ \item{file_path}{character string. Path to the file being validated relative to From f22975ba43a1d39703d50d49bb2a649fe4a4494e Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 11 Oct 2023 11:57:37 +0300 Subject: [PATCH 9/9] refactor tests to account for arrow error msg diffs on windows --- tests/testthat/_snaps/read_model_out_file.md | 9 --- tests/testthat/_snaps/validate_submission.md | 58 -------------------- tests/testthat/test-read_model_out_file.R | 4 +- tests/testthat/test-validate_submission.R | 6 +- 4 files changed, 5 insertions(+), 72 deletions(-) diff --git a/tests/testthat/_snaps/read_model_out_file.md b/tests/testthat/_snaps/read_model_out_file.md index 4199bdc7..83a6fd68 100644 --- a/tests/testthat/_snaps/read_model_out_file.md +++ b/tests/testthat/_snaps/read_model_out_file.md @@ -45,12 +45,3 @@ $ output_type_id : chr [1:48] NA NA "0.01" "0.025" ... $ value : num [1:48] 1033 1033 0 0 0 ... -# read_model_out_file errors when file contents cannot be coerced to hub schema. - - Code - read_model_out_file(hub_path = test_path("testdata/hub"), - "hub-baseline/2023-05-01-hub-baseline.csv") - Condition - Error: - ! Invalid: In CSV column #2: CSV conversion error to int32: invalid value 'horizon 1' - diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md index 8cc3ce5e..0c11ba5b 100644 --- a/tests/testthat/_snaps/validate_submission.md +++ b/tests/testthat/_snaps/validate_submission.md @@ -565,61 +565,3 @@ ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... - attr(*, "class")= chr [1:2] "hub_validations" "list" -# validate_submission fails when csv cannot be parsed according to schema. - - Code - str(validate_submission(hub_path = test_path("testdata/hub"), file_path = "hub-baseline/2023-05-01-hub-baseline.csv", - skip_submit_window_check = TRUE)) - Output - Classes 'hub_validations', 'list' hidden list of 8 - $ valid_config :List of 4 - ..$ message : chr "All hub config files are valid. \n " - ..$ where : chr "hub" - ..$ call : chr "check_config_hub_valid" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ file_exists :List of 4 - ..$ message : chr "File exists at path 'forecasts/hub-baseline/2023-05-01-hub-baseline.csv'. \n " - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_file_exists" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ file_name :List of 4 - ..$ message : chr "File name \"2023-05-01-hub-baseline.csv\" is valid. \n " - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_file_name" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ file_location :List of 4 - ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_file_location" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ round_id_valid :List of 4 - ..$ message : chr "`round_id` is valid. \n " - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_valid_round_id" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ file_format :List of 4 - ..$ message : chr "File is accepted hub format. \n " - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_file_format" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ metadata_exists:List of 4 - ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_submission_metadata_file_exists" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... - $ file_read :List of 6 - ..$ message : chr "File could not be read successfully. \n Invalid: In CSV column #2: CSV conversion error to int32: invalid value"| __truncated__ - ..$ trace : NULL - ..$ parent : NULL - ..$ where : chr "hub-baseline/2023-05-01-hub-baseline.csv" - ..$ call : chr "check_file_read" - ..$ use_cli_format: logi TRUE - ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... - diff --git a/tests/testthat/test-read_model_out_file.R b/tests/testthat/test-read_model_out_file.R index bde4dd23..91742ea0 100644 --- a/tests/testthat/test-read_model_out_file.R +++ b/tests/testthat/test-read_model_out_file.R @@ -34,12 +34,12 @@ test_that( test_that( "read_model_out_file errors when file contents cannot be coerced to hub schema.", { - expect_snapshot( + expect_error( read_model_out_file( hub_path = test_path("testdata/hub"), "hub-baseline/2023-05-01-hub-baseline.csv" ), - error = TRUE + regexp = "* CSV conversion error to int32: invalid value 'horizon 1'" ) } ) diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R index 37cffff0..89eaaa13 100644 --- a/tests/testthat/test-validate_submission.R +++ b/tests/testthat/test-validate_submission.R @@ -139,13 +139,13 @@ test_that("validate_submission csv file read in and validated according to schem }) test_that("validate_submission fails when csv cannot be parsed according to schema.", { - expect_snapshot( - str( + expect_s3_class( validate_submission( hub_path = test_path("testdata/hub"), file_path = "hub-baseline/2023-05-01-hub-baseline.csv", skip_submit_window_check = TRUE + )[["file_read"]], + c("check_error", "hub_check", "rlang_error", "error", "condition" ) ) - ) })