Add dynamic validation of sample output_type_id_params. Resolves #17

hubverse-org · Apr 29, 2024 · 625847e · 625847e
1 parent 965a5ea
commit 625847e
Show file tree

Hide file tree

Showing 7 changed files with 607 additions and 1 deletion.
diff --git a/R/validate-config-utils.R b/R/validate-config-utils.R
@@ -301,7 +301,112 @@ find_invalid_target_keys <- function(target_keys, model_task_grp) {
     stats::setNames(names(target_keys))
 }
 
+# Validate the range (minimum & maximum) of acceptable sample numbers for a
+# given modeling task group in a given round.
+# Returns NULL if not applicable or check passes and error df row if check fails.
+validate_mt_sample_range <- function(model_task_grp,
+                                     model_task_i,
+                                     round_i,
+                                     schema) {
+  sample_config <- purrr::pluck(
+    model_task_grp,
+    "output_type",
+    "sample",
+    "output_type_id_params"
+  )
+
+  if (is.null(sample_config)) {
+    return(NULL)
+  }
+
+  check <- sample_config$max_samples_per_task < sample_config$min_samples_per_task
+
+  if (check) {
+    error_row <- data.frame(
+      instancePath = glue::glue(
+        get_error_path(
+          schema,
+          "output_type/sample/output_type_id_params",
+          "instance"
+        )
+      ),
+      schemaPath = paste(
+        get_error_path(
+          schema,
+          "sample",
+          "schema"
+        ), "output_type_id_params",
+        sep = "/"
+      ),
+      keyword = "Sample number range",
+      message = glue::glue(
+        "min_samples_per_task must be less or equal to max_samples_per_task."
+      ),
+      schema = "",
+      data = glue::glue("min_samples_per_task: {sample_config$min_samples_per_task};
+            max_samples_per_task: {sample_config$max_samples_per_task}")
+    )
+    return(error_row)
+  }
+  return(NULL)
+}
+
+# Validate that compound_taskid_set values are valid task ids for a
+# given modeling task group in a given round.
+# Returns NULL if not applicable or check passes and error df row if check fails.
+validate_mt_sample_comp_tids <- function(model_task_grp,
+                                         model_task_i,
+                                         round_i,
+                                         schema) {
+  sample_config <- purrr::pluck(
+    model_task_grp,
+    "output_type",
+    "sample",
+    "output_type_id_params"
+  )
 
+  comp_tids <- sample_config[["compound_taskid_set"]]
+
+  if (is.null(comp_tids)) {
+    return(NULL)
+  }
+  invalid_comp_tids <- setdiff(comp_tids, get_grp_task_ids(model_task_grp))
+
+  check <- length(invalid_comp_tids) > 0L
+
+  if (check) {
+    error_row <- data.frame(
+      instancePath = glue::glue(
+        get_error_path(
+          schema,
+          "output_type/sample/output_type_id_params/compound_taskid_set",
+          "instance"
+        )
+      ),
+      schemaPath = paste(
+        get_error_path(
+          schema,
+          "sample",
+          "schema"
+        ), "output_type_id_params", "compound_taskid_set",
+        sep = "/"
+      ),
+      keyword = "compound_taskid_set values",
+      message = glue::glue(
+        "compound_taskid_set value(s) '{invalid_comp_tids}' not valid task id(s)."
+      ),
+      schema = "",
+      data = glue::glue(
+        "compound_taskid_set values: {glue::glue_collapse(comp_tids, sep = ', ')};
+        task id values: {
+          glue::glue_collapse(get_grp_task_ids(model_task_grp), sep = ', ')
+        }"
+      )
+    )
+    return(error_row)
+  }
+  return(NULL)
+}
 
 validate_mt_property_unique_vals <- function(model_task_grp,
                                              model_task_i,
@@ -321,7 +426,7 @@ validate_mt_property_unique_vals <- function(model_task_grp,
   val_properties <- switch(property,
     task_ids = model_task_grp[["task_ids"]],
     output_type = model_task_grp[["output_type"]][
-      c("quantile", "cdf", "pmf", "sample")
+      c("quantile", "cdf", "pmf")
     ] %>%
       purrr::compact() %>%
       purrr::map(

diff --git a/R/validate_config.R b/R/validate_config.R
@@ -207,6 +207,22 @@ val_round <- function(round, round_i, schema) {
         schema = schema
       )
     ),
+    purrr::imap(
+      model_task_grps,
+      ~ validate_mt_sample_range(
+        model_task_grp = .x, model_task_i = .y,
+        round_i = round_i,
+        schema = schema
+      )
+    ),
+    purrr::imap(
+      model_task_grps,
+      ~ validate_mt_sample_comp_tids(
+        model_task_grp = .x, model_task_i = .y,
+        round_i = round_i,
+        schema = schema
+      )
+    ),
     list(
       validate_round_ids_consistent(
         round = round,

diff --git a/tests/testthat/_snaps/validate_config.md b/tests/testthat/_snaps/validate_config.md
@@ -1,3 +1,64 @@
+# Config for samples handled succesfully
+
+    Code
+      out
+    Output
+      [1] TRUE
+      attr(,"config_path")
+      [1] "testdata/tasks-samples-pass.json"
+      attr(,"schema_version")
+      [1] "v3.0.0"
+      attr(,"schema_url")
+      https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/br-v3.0.0/v3.0.0/tasks-schema.json
+
+# Config for samples fail correctly
+
+    Code
+      out
+    Output
+      [1] FALSE
+      attr(,"config_path")
+      [1] "testdata/tasks-samples-error-range.json"
+      attr(,"schema_version")
+      [1] "v3.0.0"
+      attr(,"schema_url")
+      https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/br-v3.0.0/v3.0.0/tasks-schema.json
+      attr(,"errors")
+                                                                     instancePath
+      1 /rounds/0/model_tasks/0/task_ids/output_type/sample/output_type_id_params
+                                                                                                                   schemaPath
+      1 #/properties/rounds/items/properties/model_tasks/items/properties/output_type/properties/sample/output_type_id_params
+                    keyword
+      1 Sample number range
+                                                                    message schema
+      1 min_samples_per_task must be less or equal to max_samples_per_task.       
+                                                       data
+      1 min_samples_per_task: 60;\nmax_samples_per_task: 40
+
+---
+
+    Code
+      out
+    Output
+      [1] FALSE
+      attr(,"config_path")
+      [1] "testdata/tasks-samples-error-task-ids.json"
+      attr(,"schema_version")
+      [1] "v3.0.0"
+      attr(,"schema_url")
+      https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/br-v3.0.0/v3.0.0/tasks-schema.json
+      attr(,"errors")
+                                                                                         instancePath
+      1 /rounds/0/model_tasks/0/task_ids/output_type/sample/output_type_id_params/compound_taskid_set
+                                                                                                                                       schemaPath
+      1 #/properties/rounds/items/properties/model_tasks/items/properties/output_type/properties/sample/output_type_id_params/compound_taskid_set
+                           keyword
+      1 compound_taskid_set values
+                                                                 message schema
+      1 compound_taskid_set value(s) 'origin_date' not valid task id(s).       
+                                                                                                                         data
+      1 compound_taskid_set values: origin_date, location, horizon;\ntask id values: forecast_date, target, horizon, location
+
 # Config errors detected successfully
 
     Code

diff --git a/tests/testthat/test-validate_config.R b/tests/testthat/test-validate_config.R
@@ -23,6 +23,31 @@ test_that("Config validated successfully", {
   ))))
 })
 
+test_that("Config for samples handled succesfully", {
+  config_path <- testthat::test_path("testdata", "tasks-samples-pass.json")
+  out <- suppressMessages(validate_config(config_path = config_path,
+                                          branch = "br-v3.0.0",
+                                          schema_version = "latest"))
+  expect_snapshot(out)
+  expect_true(out)
+})
+test_that("Config for samples fail correctly", {
+  config_path <- testthat::test_path("testdata", "tasks-samples-error-range.json")
+  out <- suppressWarnings(validate_config(config_path = config_path,
+                                          branch = "br-v3.0.0",
+                                          schema_version = "latest"))
+  expect_snapshot(out)
+  expect_false(out)
+
+  config_path <- testthat::test_path("testdata", "tasks-samples-error-task-ids.json")
+  out <- suppressWarnings(validate_config(config_path = config_path,
+                                          branch = "br-v3.0.0",
+                                          schema_version = "latest"))
+  expect_snapshot(out)
+  expect_false(out)
+})
+
+
 
 test_that("Config errors detected successfully", {
   config_path <- testthat::test_path("testdata", "tasks-errors.json")

diff --git a/tests/testthat/testdata/tasks-samples-error-range.json b/tests/testthat/testdata/tasks-samples-error-range.json
@@ -0,0 +1,132 @@
+{
+    "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v3.0.0/tasks-schema.json",
+    "rounds": [{
+            "round_id_from_variable": true,
+            "round_id": "forecast_date",
+            "model_tasks": [{
+                "task_ids": {
+                    "forecast_date": {
+                        "required": null,
+                        "optional": [
+                            "2022-12-12", "2022-12-19", "2022-12-26", "2023-01-02", "2023-01-09",
+                            "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06", "2023-02-13",
+                            "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13", "2023-03-20",
+                            "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17", "2023-04-24",
+                            "2023-05-01", "2023-05-08", "2023-05-15"
+                        ]
+                    },
+                    "target": {
+                        "required": null,
+                        "optional": ["wk ahead inc flu hosp"]
+                    },
+                    "horizon": {
+                        "required": [2],
+                        "optional": [1]
+                    },
+                    "location": {
+                        "required": ["US"],
+                        "optional": [
+                            "01",
+                            "02"
+                        ]
+                    }
+                },
+                "output_type": {
+                    "sample": {
+                        "output_type_id_params": {
+                            "is_required": true,
+                            "type": "integer",
+                            "min_samples_per_task": 60,
+                            "max_samples_per_task": 40
+                        },
+                        "value": {
+                            "type": "integer",
+                            "minimum": 0
+                        }
+                    },
+                    "mean": {
+                        "output_type_id": {
+                            "required": null,
+                            "optional": ["NA"]
+                        },
+                        "value": {
+                            "type": "double",
+                            "minimum": 0
+                        }
+                    }
+                },
+                "target_metadata": [{
+                    "target_id": "wk ahead inc flu hosp",
+                    "target_name": "weekly influenza hospitalization incidence",
+                    "target_units": "rate per 100,000 population",
+                    "target_keys": {
+                        "target": ["wk ahead inc flu hosp"]
+                    },
+                    "target_type": "discrete",
+                    "description": "This target represents the counts of new hospitalizations per horizon week.",
+                    "is_step_ahead": true,
+                    "time_unit": "week"
+                }]
+            }, {
+                "task_ids": {
+                    "forecast_date": {
+                        "required": null,
+                        "optional": [
+                            "2022-12-12", "2022-12-19", "2022-12-26", "2023-01-02", "2023-01-09",
+                            "2023-01-16", "2023-01-23", "2023-01-30", "2023-02-06", "2023-02-13",
+                            "2023-02-20", "2023-02-27", "2023-03-06", "2023-03-13", "2023-03-20",
+                            "2023-03-27", "2023-04-03", "2023-04-10", "2023-04-17", "2023-04-24",
+                            "2023-05-01", "2023-05-08", "2023-05-15"
+                        ]
+                    },
+                    "target": {
+                        "required": null,
+                        "optional": ["wk flu hosp rate change"]
+                    },
+                    "horizon": {
+                        "required": [2],
+                        "optional": [1]
+                    },
+                    "location": {
+                        "required": ["US"],
+                        "optional": [
+                            "01",
+                            "02"
+                        ]
+                    }
+                },
+                "output_type": {
+                    "pmf": {
+                        "output_type_id": {
+                            "required": ["large_decrease", "decrease", "stable", "increase", "large_increase"],
+                            "optional": null
+                        },
+                        "value": {
+                            "type": "double",
+                            "minimum": 0,
+                            "maximum": 1
+                        }
+                    }
+                },
+                "target_metadata": [{
+                    "target_id": "wk flu hosp rate change",
+                    "target_name": "weekly influenza hospitalization rate change",
+                    "target_units": "rate per 100,000 population",
+                    "target_keys": {
+                        "target": ["wk flu hosp rate change"]
+                    },
+                    "target_type": "nominal",
+                    "description": "This target represents the change in the rate of new hospitalizations per week comparing the week ending two days prior to the forecast_date to the week ending h weeks after the forecast_date.",
+                    "is_step_ahead": true,
+                    "time_unit": "week"
+                }]
+            }],
+            "submissions_due": {
+                "relative_to": "forecast_date",
+                "start": -6,
+                "end": 2
+            }
+        }
+
+    ]
+}