Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add round_id expected pattern match check #88

Merged
merged 14 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# hubAdmin (development version)

* Use options to set `schema_version` and `branch` arguments in `download_tasks_schema()` and the `create_*()` family of functions for creating config files programmatically. This allows for setting the schema version and branch globally for the session (#85).
* Make validation of `round_id` patterns explicit. This ties in with schema version v4.0.1 where the pattern the `round_id` property must match if `round_id_from_variable` is `false` is now specified as a regular expression in the schema. This check is also now implemented dynamically on values of the `round_id` variable if `round_id_from_variable` is `true` when validating tasks.json config files. Checks on `round_id` patterns are also now implemented in `create_round()` when creating rounds programmatically.

# hubAdmin 1.4.0

Expand Down
61 changes: 61 additions & 0 deletions R/create_round.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ create_round <- function(round_id_from_variable,
}
if (round_id_from_variable) {
check_round_id_variable(model_tasks, round_id)
check_round_id_pattern_vals(model_tasks, round_id)
} else {
check_round_id_pattern(round_id)
}

structure(
Expand Down Expand Up @@ -276,3 +279,61 @@ get_schema_round <- function(schema) {
"items", "properties"
)
}
# Check round_id pattern in `round_id` var values when
# round_id_from_variable = TRUE
check_round_id_pattern_vals <- function(model_tasks, round_id,
call = rlang::caller_env()) {
invalid_round_id_vals <- purrr::map(
model_tasks$model_tasks,
~ {
round_id_var_vals <- purrr::pluck(
.x, "task_ids", round_id
)
invalid_round_id_var_patterns(round_id_var_vals) |>
purrr::compact()
}
)
invalid_round_id_vals <- purrr::set_names(
invalid_round_id_vals,
seq_along(invalid_round_id_vals)
)

if (length(unlist(invalid_round_id_vals)) > 0L) {
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
invalid_vals_bullets <-
purrr::compact(invalid_round_id_vals) |>
purrr::imap(~ {
mt_idx <- .y
purrr::imap_chr(
.x,
~ {
cli::format_inline("In {.arg model_tasks[[{mt_idx}]]${round_id}${.y}}: {.val {.x}}")
}
)
}) |>
unlist() |>
purrr::set_names("x")

cli::cli_abort(
c(
"!" = "Values in {.var round_id} var {.val {round_id}} must contain either
ISO formatted dates or alphanumeric characters separated by underscores ('_').",
invalid_vals_bullets
),
call = call
)
}
}
# Check round_id pattern when round_id_from_variable = FALSE
check_round_id_pattern <- function(round_id,
call = rlang::caller_env()) {
if (!validate_round_id_pattern(round_id)) {
cli::cli_abort(
c(
"!" = "{.var round_id} must contain either ISO formatted date or
alphanumeric characters separated by underscores ('_').",
"x" = "{.val {round_id}} does not match expected pattern"
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
),
call = call
)
}
}
23 changes: 23 additions & 0 deletions R/utils-round_ids.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
validate_round_id_pattern <- function(x) {
stringr::str_detect(
x,
"^(\\d{4}-\\d{2}-\\d{2})$|^[A-Za-z0-9_]+$"
)
}

invalid_round_id_var_patterns <- function(round_id_var_vals) {
purrr::map(
round_id_var_vals,
\(.x) {
if (is.null(.x)) {
return(NULL)
}
valid <- validate_round_id_pattern(.x)
invalid <- .x[!valid]
if (length(invalid) == 0L) {
return(NULL)
}
invalid
}
)
}
51 changes: 50 additions & 1 deletion R/validate-config-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,56 @@ validate_mt_property_unique_vals <- function(model_task_grp,
}
}

# Check that modeling task round ids match the expected round ID patterns when
# round_id_from_variable = TRUE
validate_mt_round_id_pattern <- function(model_task_grp,
model_task_i,
round_i,
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
schema,
round_id_from_variable,
round_id_var) {
if (!round_id_from_variable) {
return(NULL)
}
round_id_var_vals <- purrr::pluck(
model_task_grp, "task_ids", round_id_var
)
invalid_vals <- invalid_round_id_var_patterns(round_id_var_vals)

if (any(lengths(invalid_vals) > 0L)) {
# Collapse invalid values into a single string
invalid_vals_msg <- purrr::compact(invalid_vals) |>
purrr::map_chr(
~ glue::glue_collapse(glue::glue("'{.x}'"), ", ", last = " and ")
)

error_row <- tibble::tibble(
instancePath = paste(
glue::glue(
get_error_path(schema, "/task_ids", "instance")
),
round_id_var, names(invalid_vals_msg),
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
sep = "/"
),
schemaPath = paste(
get_error_path(
schema,
glue::glue("task_ids/{round_id_var}"),
"schema"
), names(invalid_vals_msg),
sep = "/"
),
keyword = "round_id variable pattern",
message = glue::glue(
"round_id variable '{round_id_var}' values must be either ISO formatted
dates or alphanumeric characters separated by '_'."
),
schema = "^([0-9]{4}-[0-9]{2}-[0-9]{2})$|^[A-Za-z0-9_]+$",
data = glue::glue("invalid values: {invalid_vals_msg}")
)
return(error_row)
}
}
## ROUND LEVEL VALIDATIONS ----
# Check that round id variables are consistent across modeling tasks
validate_round_ids_consistent <- function(round, round_i,
Expand Down Expand Up @@ -596,7 +646,6 @@ validate_round_derived_task_ids <- function(round, round_i, schema) {
out
}


## CONFIG LEVEL VALIDATIONS ----
# Validate that round IDs are unique across all rounds in config file
validate_round_ids_unique <- function(config_tasks, schema) {
Expand Down
14 changes: 14 additions & 0 deletions R/validate_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ perform_dynamic_config_validations <- function(validation) {
## Dynamic schema validation utilities ----
val_round <- function(round, round_i, schema) {
model_task_grps <- round[["model_tasks"]]
round_id_from_variable <- round[["round_id_from_variable"]]
round_id_var <- round[["round_id"]]

c(
purrr::imap(
Expand Down Expand Up @@ -233,6 +235,18 @@ val_round <- function(round, round_i, schema) {
schema = schema
)
),
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
purrr::imap(
model_task_grps,
\(.x, .y) {
validate_mt_round_id_pattern(
model_task_grp = .x, model_task_i = .y,
round_i = round_i,
schema = schema,
round_id_from_variable = round_id_from_variable,
round_id_var = round_id_var
)
}
elray1 marked this conversation as resolved.
Show resolved Hide resolved
),
list(
validate_round_ids_consistent(
round = round,
Expand Down
23 changes: 23 additions & 0 deletions R/view_config_val_errors.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,20 @@ clean_error_df <- function(errors_tbl) {
if (is.null(errors_tbl)) {
return(NULL)
}

# Move any custom error messages to the message column
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
if (!is.null(errors_tbl$parentSchema$errorMessage)) {
error_msg <- !is.na(errors_tbl$parentSchema$errorMessage)
errors_tbl$message[error_msg] <- errors_tbl$parentSchema$errorMessage[error_msg]
}

errors_tbl[c("dataPath", "parentSchema")] <- NULL
errors_tbl <- errors_tbl[!grepl("oneOf.+", errors_tbl$schemaPath), ]
# remove superfluous if error. The "then" error is what we are interested in
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved
errors_tbl <- errors_tbl[!errors_tbl$keyword == "if", ]
errors_tbl <- remove_superfluous_enum_rows(errors_tbl)


# Get rid of unnecessarily verbose data entry when a data column is a data.frame
if (inherits(errors_tbl$data, "data.frame")) {
errors_tbl$data <- ""
Expand Down Expand Up @@ -269,6 +279,16 @@ extract_params_to_data <- function(errors_tbl,
errors_tbl
}

escape_pattern_dollar <- function(error_df) {
is_pattern <- grepl("pattern", error_df[["keyword"]])
error_df[["schema"]][is_pattern] <- gsub(
"$", "&#36;",
error_df[["schema"]][is_pattern],
fixed = TRUE
)
error_df
}

render_errors_df <- function(error_df) {
schema_version <- attr(error_df, "schema_version")
schema_url <- attr(error_df, "schema_url")
Expand All @@ -286,6 +306,9 @@ render_errors_df <- function(error_df) {
error_df[["schemaPath"]] <- purrr::map_chr(error_df[["schemaPath"]], path_to_tree)
error_df[["instancePath"]] <- purrr::map_chr(error_df[["instancePath"]], path_to_tree)
error_df[["message"]] <- paste("\u274c", error_df[["message"]])
# Escape `$` characters to ensure regex pattern does not trigger equation
# formatting in markdown
error_df <- escape_pattern_dollar(error_df)


# Create table ----
Expand Down
38 changes: 36 additions & 2 deletions tests/testthat/_snaps/create_round.md
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,7 @@
---

Code
create_derived_task_ids_round(version = "v4.0.0", branch = "br-v4.0.0",
derived_task_ids = 1L)
create_derived_task_ids_round(version = "v4.0.0", derived_task_ids = 1L)
Condition
Error in `map()`:
i In index: 1.
Expand All @@ -419,3 +418,38 @@
x `derived_task_ids` value "random_task_id" is not valid `task_id` variable in the provided `model_tasks` object.
i Valid `task_id` variables are: "origin_date", "location", and "horizon"

# validating round_id patterns when round_id_from_var = TRUE works

Code
create_round(round_id_from_variable = TRUE, round_id = "round_id_var",
model_tasks = model_tasks, submissions_due = list(start = "2023-01-12", end = "2023-01-18"),
last_data_date = "2023-01-02")
Condition
Error in `create_round()`:
! Values in `round_id` var "round_id_var" must contain either ISO formatted dates or alphanumeric characters separated by underscores ('_').
x In `model_tasks[[1]]$round_id_var$required`: "invalid-round-id-req"
x In `model_tasks[[1]]$round_id_var$optional`: "invalid-round-id-opt1" and "invalid-round-id-opt2"
annakrystalli marked this conversation as resolved.
Show resolved Hide resolved

---

Code
create_round(round_id_from_variable = TRUE, round_id = "round_id_var",
model_tasks = model_tasks, submissions_due = list(start = "2023-01-12", end = "2023-01-18"),
last_data_date = "2023-01-02")
Condition
Error in `create_round()`:
! Values in `round_id` var "round_id_var" must contain either ISO formatted dates or alphanumeric characters separated by underscores ('_').
x In `model_tasks[[1]]$round_id_var$optional`: "invalid-round-id-opt1" and "invalid-round-id-opt2"
x In `model_tasks[[2]]$round_id_var$required`: "invalid-round-id-req1" and "invalid-round-id-req2"

# validating round_id pattern when round_id_from_var = FALSE works

Code
create_round(round_id_from_variable = FALSE, round_id = "round-id-var",
model_tasks = model_tasks, submissions_due = list(start = "2023-01-12", end = "2023-01-18"),
last_data_date = "2023-01-02")
Condition
Error in `create_round()`:
! `round_id` must contain either ISO formatted date or alphanumeric characters separated by underscores ('_').
x "round-id-var" does not match expected pattern

Loading
Loading