refactor custom function execution (#150)

* refactor get_caller_env_formals - make DRY code - add roxygen skeleton - split logical vectors into separate variables - insert explanatory comments * refactor exec_cfg_check() - add sentinel boolean variables - add roxygen skeleton - insert explanatory comments - unwrap calls that don't need to be wrapped * add catch for malformed config file * add explainer for caller vars * unnest missing file check; add class to error * unnest unspecified config file check and simplify logic * add explanatory comments * add tests for errors in custom configs * appeas lintr * DIAF lintr * Apply suggestions from code review Co-authored-by: Anna Krystalli <[email protected]> --------- Co-authored-by: Anna Krystalli <[email protected]>
hubverse-org · Nov 1, 2024 · 61107a9 · 61107a9
1 parent d832175
commit 61107a9
Show file tree

Hide file tree

Showing 4 changed files with 111 additions and 25 deletions.
diff --git a/R/exec_cfg_check.R b/R/exec_cfg_check.R
@@ -1,25 +1,48 @@
+#' Execute a check function from the validations configuration file
+#'
+#' @param check_name [character] the name of the check function
+#' @param validations_cfg [list] the the parsed `validations.yml` file
+#' @param caller_env [environment] the environment of the calling function.
+#'   This is usually generated from `rlang::caller_env()`
+#' @param caller_call [call] the call of the calling function.
+#'   This is usually generated from `rlang::caller_call()`
+#' @noRd
 exec_cfg_check <- function(check_name, validations_cfg, caller_env, caller_call) {
   fn_cfg <- validations_cfg[[check_name]]
-  if (!is.null(fn_cfg[["pkg"]])) {
+  from_pkg <- !is.null(fn_cfg[["pkg"]])
+  from_src <- !is.null(fn_cfg[["source"]])
+  if (from_pkg) {
+    # if the function is from a package, assume the package is installed and
+    # extract it from that package.
     fn <- get(fn_cfg[["fn"]],
       envir = getNamespace(fn_cfg[["pkg"]])
     )
-  } else if (!is.null(fn_cfg[["source"]])) {
-    # TODO Validate source script.
+  } else if (from_src) {
+    # TODO: Validate source script.
+    # if it's a source script, we need to source the script locally to make
+    # the function available in local environment.
     hub_path <- rlang::env_get(env = caller_env, nm = "hub_path")
     src <- fs::path(hub_path, fn_cfg[["source"]])
     source(src, local = TRUE)
     fn <- get(fn_cfg[["fn"]])
+  } else {
+    path <- rlang::env_get(env = caller_env, nm = "validations_cfg_path") # nolint
+    msg <- c("Custom validation function {.var {check_name}}",
+             "must specify either a {.arg pkg} or {.arg script} in {.path {path}}")
+    cli::cli_abort(paste(msg, collapse = " "),
+      call = caller_call,
+      class = "custom_validation_cfg_malformed"
+    )
   }
 
+  # get the arguments from the caller environment
   caller_env_formals <- get_caller_env_formals(
-    fn, caller_env,
+    fn,
+    caller_env,
     cfg_args = fn_cfg[["args"]]
   )
-  args <- c(
-    caller_env_formals,
-    fn_cfg[["args"]]
-  )
+  # combine the arguments from the caller environment and the config
+  args <- c(caller_env_formals, fn_cfg[["args"]])
 
   res <- try(rlang::exec(fn, !!!args), silent = TRUE)
 
@@ -35,10 +58,36 @@ exec_cfg_check <- function(check_name, validations_cfg, caller_env, caller_call)
   res
 }
 
+#' Get non-overridden variables from the calling environment
+#'
+#' When executing custom functions, we need to extract variables from the
+#' validation function calling it, but we need to ensure two things:
+#'
+#' 1. match the variables with the arguments of the custom function
+#' 2. respect the variables overridden from the config file
+#'
+#' @param fn [function] the custom function
+#' @param caller_env [environment] the environment of the calling function.
+#'   This is usually generated from `rlang::caller_env()`
+#' @param caller_call [call] the call of the calling function.
+#'   This is usually generated from `rlang::caller_call()`
+#' @noRd
 get_caller_env_formals <- function(fn, caller_env, cfg_args) {
-  caller_env_fmls <- rlang::fn_fmls_names(fn)[
-    rlang::fn_fmls_names(fn) %in% rlang::env_names(caller_env) &
-      !rlang::fn_fmls_names(fn) %in% names(cfg_args)
-  ]
+  # find the arguments of the custom function
+  fn_arg_names <- rlang::fn_fmls_names(fn)
+
+  # variables available from the calling environment (e.g. hub_path, tbl, etc..)
+  available_vars <- rlang::env_names(caller_env)
+
+  # match the arguments from the calling environment,
+  # discarding the ones not needed
+  args_from_vars <- fn_arg_names %in% available_vars
+
+  # do not include arguments that are specified in the config.
+  not_config_args <- !fn_arg_names %in% names(cfg_args)
+  caller_env_fmls <- fn_arg_names[args_from_vars & not_config_args]
+
+  # extract these values from the calling environment,
+  # replacing with `NULL` if they are missing.
   rlang::env_get_list(caller_env, nms = caller_env_fmls, default = NULL)
 }
diff --git a/R/execute_custom_checks.R b/R/execute_custom_checks.R
@@ -1,38 +1,50 @@
 execute_custom_checks <- function(validations_cfg_path = NULL) {
+  # There is more than one function that will call this function. These two
+  # variables help us to pass the variables from that function to the custom
+  # functions.
+  #
+  # Having the calling function's environment gives us access to the variables
   caller_env <- rlang::caller_env()
+  # Knowing the calling function's name allows us to select the correct
+  # custom validation function.
   caller_call <- rlang::caller_call()
 
-  if (!is.null(validations_cfg_path)) {
-    if (!fs::file_exists(validations_cfg_path)) {
-      cli::cli_abort(
-        "Validations .yml file not found at {.path {validations_cfg_path}}",
-        call = caller_call
-      )
-    }
+  missing_file <- !is.null(validations_cfg_path) &&
+    !fs::file_exists(validations_cfg_path)
+  if (missing_file) {
+    cli::cli_abort(
+      "Validations .yml file not found at {.path {validations_cfg_path}}",
+      call = caller_call,
+      class = "custom_validation_yml_missing"
+    )
   }
 
+  # if the validations_cfg_path is not specified, we check if it exists in
+  # the hub.
   if (is.null(validations_cfg_path)) {
-    default_cfg_path <- fs::path(
+    validations_cfg_path <- fs::path(
       rlang::env_get(env = caller_env, nm = "hub_path"),
       "hub-config", "validations.yml"
     )
-    if (!fs::file_exists(default_cfg_path)) {
-      return(NULL)
-    } else {
-      validations_cfg_path <- default_cfg_path
-    }
+  }
+  # no need to perform checks if there is no config file
+  if (!fs::file_exists(validations_cfg_path)) {
+    return(NULL)
   }
 
+  # extract the correct function from the config file based on the round ID
   validations_cfg <- config::get(
     value = rlang::call_name(caller_call),
     config = rlang::env_get(env = caller_env, nm = "round_id"),
     file = validations_cfg_path
   )
 
+  # again, no need to perform checks if no checks exist
   if (is.null(validations_cfg)) {
     return(NULL)
   }
 
+  # Create the list to contain the validation output
   out <- vector("list", length(validations_cfg)) |>
     stats::setNames(names(validations_cfg))
 

diff --git a/tests/testthat/test-execute_custom_checks.R b/tests/testthat/test-execute_custom_checks.R
@@ -24,6 +24,23 @@ test_that("execute_custom_checks works", {
   )
 })
 
+
+test_that("bad configs throw the correct errors", {
+
+  missing_cfg <- testthat::test_path("testdata", "config", "does-not-exist.yml")
+  expect_error(
+    test_custom_checks_caller(validations_cfg_path = missing_cfg),
+    class = "custom_validation_yml_missing"
+  )
+
+  malformed_cfg <- testthat::test_path("testdata", "config", "validations-bad-cfg.yml")
+  expect_error(
+    test_custom_checks_caller(validations_cfg_path = malformed_cfg),
+    class = "custom_validation_cfg_malformed"
+  )
+})
+
+
 test_that("execute_custom_checks sourcing functions from scripts works", {
   tmp <- withr::local_tempdir()
   the_config <- testthat::test_path("testdata/config/validations-src.yml")

diff --git a/tests/testthat/testdata/config/validations-bad-cfg.yml b/tests/testthat/testdata/config/validations-bad-cfg.yml
@@ -0,0 +1,8 @@
+default:
+    test_custom_checks_caller:
+      horizon_timediff:
+        fn: "opt_check_tbl_horizon_timediff"
+        # bad config contains no pkg or source
+        args:
+          t0_colname: "forecast_date"
+          t1_colname: "target_end_date"