Skip to content

Commit

Permalink
Merge pull request #88 from hubverse-org/ak/round-id-regex-val/68
Browse files Browse the repository at this point in the history
Add round_id expected pattern match check
  • Loading branch information
annakrystalli authored Dec 18, 2024
2 parents ac8f7bc + 34f2624 commit 725c22b
Show file tree
Hide file tree
Showing 14 changed files with 1,047 additions and 12 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# hubAdmin (development version)

* Use options to set `schema_version` and `branch` arguments in `download_tasks_schema()` and the `create_*()` family of functions for creating config files programmatically. This allows for setting the schema version and branch globally for the session (#85).
* Make validation of `round_id` patterns explicit. This ties in with schema version v4.0.1 where the pattern the `round_id` property must match if `round_id_from_variable` is `false` is now specified as a regular expression in the schema. This check is also now implemented dynamically on values of the `round_id` variable if `round_id_from_variable` is `true` when validating tasks.json config files. Checks on `round_id` patterns are also now implemented in `create_round()` when creating rounds programmatically.

# hubAdmin 1.4.0

Expand Down
64 changes: 64 additions & 0 deletions R/create_round.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ create_round <- function(round_id_from_variable,
}
if (round_id_from_variable) {
check_round_id_variable(model_tasks, round_id)
check_round_id_pattern_vals(model_tasks, round_id)
} else {
check_round_id_pattern(round_id)
}

structure(
Expand Down Expand Up @@ -276,3 +279,64 @@ get_schema_round <- function(schema) {
"items", "properties"
)
}
# Check round_id pattern in `round_id` var values when
# round_id_from_variable = TRUE
check_round_id_pattern_vals <- function(model_tasks, round_id,
call = rlang::caller_env()) {
invalid_round_id_vals <- purrr::map(
model_tasks$model_tasks,
~ {
round_id_var_vals <- purrr::pluck(
.x, "task_ids", round_id
)
invalid_round_id_var_patterns(round_id_var_vals) |>
purrr::compact()
}
)
invalid_round_id_vals <- purrr::set_names(
invalid_round_id_vals,
seq_along(invalid_round_id_vals)
)

if (length(unlist(invalid_round_id_vals)) > 0L) {
invalid_vals_bullets <-
purrr::compact(invalid_round_id_vals) |>
# iterate over any model tasks containing invalid values
purrr::imap(~ {
mt_idx <- .y
# iterate over invalid values in "required" and "optional" properties if present
purrr::imap_chr(
.x,
~ {
# Create a separate message for invalid values in each model task and property
cli::format_inline("In {.arg model_tasks[[{mt_idx}]]${round_id}${.y}}: {.val {.x}}")
}
)
}) |>
unlist() |>
purrr::set_names("x")

cli::cli_abort(
c(
"!" = "Values in {.var round_id} var {.val {round_id}} must contain either
ISO formatted dates or alphanumeric characters separated by underscores ('_').",
invalid_vals_bullets
),
call = call
)
}
}
# Check round_id pattern when round_id_from_variable = FALSE
check_round_id_pattern <- function(round_id,
call = rlang::caller_env()) {
if (!validate_round_id_pattern(round_id)) {
cli::cli_abort(
c(
"!" = "{.var round_id} must contain either ISO formatted date or
alphanumeric characters separated by underscores ('_').",
"x" = "{.val {round_id}} does not match expected pattern"
),
call = call
)
}
}
12 changes: 9 additions & 3 deletions R/get_error_path.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@
#' unevaluated in the output and are instead encoded a glue interpolation strings
#' (i.e wrapped in `{}`).
#' They are defined by variables `round_i`, `model_task_i` or `target_key_i` depending
#' on the depth of the property being validated. Values for these variables need to be passed
#' using `glue::glue_data()` and explicitly passing an index variable
#' and it's value as a named list.
#' on the depth of the property being validated. To interpolate these values into
#' valid instance path, you can either:
#' - wrap `get_error_path()` in `glue::glue()` and let the function interpolate
#' the values using objects available in the caller environment.
#' - wrap `get_error_path()` in `glue::glue_data()` and pass the values explicitly
#' as a named list.
#'
#' Note as well that instance paths are converted to zero indexed format to align
#' with the output of basic JSON schema validation (i.e. performed by `jsonvalidate`)
#' @noRd
#' @examples
#' # Return the instance path to the origin date task ID in the second modeling task
Expand Down
27 changes: 27 additions & 0 deletions R/utils-round_ids.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
validate_round_id_pattern <- function(x) {
stringr::str_detect(
x,
"^(\\d{4}-\\d{2}-\\d{2})$|^[A-Za-z0-9_]+$"
)
}

# This function expects a list representation of the task ID variable values
# specified in `round_id` when `round_id_from_variable` is `true.`
# Returns a list with `required` and `optional` elements containg either the
# invalid values identified by the `validate_round_id_pattern` function or NULL.
invalid_round_id_var_patterns <- function(round_id_var_vals) {
purrr::map(
round_id_var_vals,
\(.x) {
if (is.null(.x)) {
return(NULL)
}
valid <- validate_round_id_pattern(.x)
invalid <- .x[!valid]
if (length(invalid) == 0L) {
return(NULL)
}
invalid
}
)
}
53 changes: 52 additions & 1 deletion R/validate-config-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,58 @@ validate_mt_property_unique_vals <- function(model_task_grp,
}
}

# Check that modeling task round ids match the expected round ID patterns when
# round_id_from_variable = TRUE
validate_mt_round_id_pattern <- function(model_task_grp,
model_task_i,
round_i,
schema,
round_id_from_variable,
round_id_var) {
if (!round_id_from_variable) {
return(NULL)
}
round_id_var_vals <- purrr::pluck(
model_task_grp, "task_ids", round_id_var
)
invalid_vals <- invalid_round_id_var_patterns(round_id_var_vals)

if (any(lengths(invalid_vals) > 0L)) {
# Collapse invalid values into a single string
invalid_vals_msg <- purrr::compact(invalid_vals) |>
purrr::map_chr(
~ glue::glue_collapse(glue::glue("'{.x}'"), ", ", last = " and ")
)

error_row <- tibble::tibble(
instancePath = paste(
glue::glue(
get_error_path(schema, "/task_ids", "instance")
),
# using names(invalid_vals_msg) creates a row for each property
# ("required"/"optional") containing invalid round_id values
round_id_var, names(invalid_vals_msg),
sep = "/"
),
schemaPath = paste(
get_error_path(
schema,
glue::glue("task_ids/{round_id_var}"),
"schema"
), names(invalid_vals_msg),
sep = "/"
),
keyword = "round_id variable pattern",
message = glue::glue(
"round_id variable '{round_id_var}' values must be either ISO formatted
dates or alphanumeric characters separated by '_'."
),
schema = "^([0-9]{4}-[0-9]{2}-[0-9]{2})$|^[A-Za-z0-9_]+$",
data = glue::glue("invalid values: {invalid_vals_msg}")
)
return(error_row)
}
}
## ROUND LEVEL VALIDATIONS ----
# Check that round id variables are consistent across modeling tasks
validate_round_ids_consistent <- function(round, round_i,
Expand Down Expand Up @@ -596,7 +648,6 @@ validate_round_derived_task_ids <- function(round, round_i, schema) {
out
}


## CONFIG LEVEL VALIDATIONS ----
# Validate that round IDs are unique across all rounds in config file
validate_round_ids_unique <- function(config_tasks, schema) {
Expand Down
14 changes: 14 additions & 0 deletions R/validate_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ perform_dynamic_config_validations <- function(validation) {
## Dynamic schema validation utilities ----
val_round <- function(round, round_i, schema) {
model_task_grps <- round[["model_tasks"]]
round_id_from_variable <- round[["round_id_from_variable"]]
round_id_var <- round[["round_id"]]

c(
purrr::imap(
Expand Down Expand Up @@ -233,6 +235,18 @@ val_round <- function(round, round_i, schema) {
schema = schema
)
),
purrr::imap(
model_task_grps,
\(.x, .y) {
validate_mt_round_id_pattern(
model_task_grp = .x, model_task_i = .y,
round_i = round_i,
schema = schema,
round_id_from_variable = round_id_from_variable,
round_id_var = round_id_var
)
}
),
list(
validate_round_ids_consistent(
round = round,
Expand Down
23 changes: 23 additions & 0 deletions R/view_config_val_errors.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,20 @@ clean_error_df <- function(errors_tbl) {
if (is.null(errors_tbl)) {
return(NULL)
}

# Move any custom error messages to the message column
if (!is.null(purrr::pluck(errors_tbl, "parentSchema", "errorMessage"))) {
error_msg <- !is.na(errors_tbl$parentSchema$errorMessage)
errors_tbl$message[error_msg] <- errors_tbl$parentSchema$errorMessage[error_msg]
}

errors_tbl[c("dataPath", "parentSchema")] <- NULL
errors_tbl <- errors_tbl[!grepl("oneOf.+", errors_tbl$schemaPath), ]
# remove superfluous if error. The "then" error is what we are interested in
errors_tbl <- errors_tbl[!errors_tbl$keyword == "if", ]
errors_tbl <- remove_superfluous_enum_rows(errors_tbl)


# Get rid of unnecessarily verbose data entry when a data column is a data.frame
if (inherits(errors_tbl$data, "data.frame")) {
errors_tbl$data <- ""
Expand Down Expand Up @@ -269,6 +279,16 @@ extract_params_to_data <- function(errors_tbl,
errors_tbl
}

escape_pattern_dollar <- function(error_df) {
is_pattern <- grepl("pattern", error_df[["keyword"]])
error_df[["schema"]][is_pattern] <- gsub(
"$", "&#36;",
error_df[["schema"]][is_pattern],
fixed = TRUE
)
error_df
}

render_errors_df <- function(error_df) {
schema_version <- attr(error_df, "schema_version")
schema_url <- attr(error_df, "schema_url")
Expand All @@ -286,6 +306,9 @@ render_errors_df <- function(error_df) {
error_df[["schemaPath"]] <- purrr::map_chr(error_df[["schemaPath"]], path_to_tree)
error_df[["instancePath"]] <- purrr::map_chr(error_df[["instancePath"]], path_to_tree)
error_df[["message"]] <- paste("\u274c", error_df[["message"]])
# Escape `$` characters to ensure regex pattern does not trigger equation
# formatting in markdown
error_df <- escape_pattern_dollar(error_df)


# Create table ----
Expand Down
38 changes: 36 additions & 2 deletions tests/testthat/_snaps/create_round.md
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@
---

Code
create_derived_task_ids_round(version = "v4.0.0", branch = "br-v4.0.0",
derived_task_ids = 1L)
create_derived_task_ids_round(version = "v4.0.0", derived_task_ids = 1L)
Condition
Error in `map()`:
i In index: 1.
Expand All @@ -419,3 +418,38 @@
x `derived_task_ids` value "random_task_id" is not valid `task_id` variable in the provided `model_tasks` object.
i Valid `task_id` variables are: "origin_date", "location", and "horizon"

# validating round_id patterns when round_id_from_var = TRUE works

Code
create_round(round_id_from_variable = TRUE, round_id = "round_id_var",
model_tasks = model_tasks, submissions_due = list(start = "2023-01-12", end = "2023-01-18"),
last_data_date = "2023-01-02")
Condition
Error in `create_round()`:
! Values in `round_id` var "round_id_var" must contain either ISO formatted dates or alphanumeric characters separated by underscores ('_').
x In `model_tasks[[1]]$round_id_var$required`: "invalid-round-id-req"
x In `model_tasks[[1]]$round_id_var$optional`: "invalid-round-id-opt1" and "invalid-round-id-opt2"

---

Code
create_round(round_id_from_variable = TRUE, round_id = "round_id_var",
model_tasks = model_tasks, submissions_due = list(start = "2023-01-12", end = "2023-01-18"),
last_data_date = "2023-01-02")
Condition
Error in `create_round()`:
! Values in `round_id` var "round_id_var" must contain either ISO formatted dates or alphanumeric characters separated by underscores ('_').
x In `model_tasks[[1]]$round_id_var$optional`: "invalid-round-id-opt1" and "invalid-round-id-opt2"
x In `model_tasks[[2]]$round_id_var$required`: "invalid-round-id-req1" and "invalid-round-id-req2"

# validating round_id pattern when round_id_from_var = FALSE works

Code
create_round(round_id_from_variable = FALSE, round_id = "round-id-var",
model_tasks = model_tasks, submissions_due = list(start = "2023-01-12", end = "2023-01-18"),
last_data_date = "2023-01-02")
Condition
Error in `create_round()`:
! `round_id` must contain either ISO formatted date or alphanumeric characters separated by underscores ('_').
x "round-id-var" does not match expected pattern

Loading

0 comments on commit 725c22b

Please sign in to comment.