diff --git a/DESCRIPTION b/DESCRIPTION index c31e94d3..f572c254 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hubValidations Title: Testing framework for hubverse hub validations -Version: 0.0.0.9004 +Version: 0.0.0.9005 Authors@R: c( person( given = "Anna", @@ -37,7 +37,7 @@ Imports: dplyr, fs, gh, - hubUtils (>= 0.0.0.9014), + hubUtils (>= 0.0.0.9016), jsonlite, jsonvalidate, lubridate, diff --git a/NAMESPACE b/NAMESPACE index 4fef6c64..eaa10bc7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ export(is_info) export(is_success) export(new_hub_validations) export(not_pass) +export(opt_check_metadata_team_max_model_n) export(opt_check_tbl_col_timediff) export(opt_check_tbl_counts_lt_popn) export(opt_check_tbl_horizon_timediff) diff --git a/NEWS.md b/NEWS.md index 4d8d5b38..1404f2ca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# hubValidations 0.0.0.9005 + +* Improved handling of numeric output type IDs (including high precision floating points / values with trailing zeros), especially when overall hub output type ID column is character. This previously lead to a number of bugs and false validation failures (#58 & #54) which are addressed in this version. +* Bug fixes with respect to handling modelling tasks with no required task ID / output type combinations. +* Improved capture of error messages when check execution error occurs. + # hubValidations 0.0.0.9004 This release contains a bug fix for reading in and validating CSV column types correctly. (#54) @@ -10,7 +16,7 @@ This release includes a number of bug fixes: # hubValidations 0.0.0.9002 -This release includes improvements desgined after the first round of sandbox testing on setting up the CDC FluSight hub. Improvements include: +This release includes improvements designed after the first round of sandbox testing on setting up the CDC FluSight hub. Improvements include: * Export `parse_file_name` function for parsing model output metadata from a model output file name. * Issue more specific and informative messaging when `check_tbl_values()` check fails. diff --git a/R/check_file_read.R b/R/check_file_read.R index ccfbb13e..aa685944 100644 --- a/R/check_file_read.R +++ b/R/check_file_read.R @@ -6,7 +6,21 @@ #' @export check_file_read <- function(file_path, hub_path = ".") { try_read <- try( - read_model_out_file(file_path, hub_path), + { + if (fs::path_ext(file_path) == "csv") { + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "hub" + ) + } else { + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "none" + ) + } + }, silent = TRUE ) check <- !inherits(try_read, "try-error") @@ -15,7 +29,7 @@ check_file_read <- function(file_path, hub_path = ".") { details <- NULL } else { details <- cli::format_inline( - attr(try_read, "condition")$message, "\n", + attr(try_read, "condition")$message, "\n", "Please check file path is correct and file can be read using {.fn read_model_out_file}" ) } diff --git a/R/check_tbl_rows_unique.R b/R/check_tbl_rows_unique.R index 8488089c..0e590228 100644 --- a/R/check_tbl_rows_unique.R +++ b/R/check_tbl_rows_unique.R @@ -3,6 +3,7 @@ #' Checks that combinations of task ID, output type and output type ID value #' combinations are unique, by checking that there are no duplicate rows across #' all `tbl` columns excluding the `value` column. +#' @inheritParams check_tbl_values #' @inherit check_tbl_colnames params #' @inherit check_tbl_col_types return #' @export diff --git a/R/check_tbl_values.R b/R/check_tbl_values.R index 3c0e88e3..ebef5837 100644 --- a/R/check_tbl_values.R +++ b/R/check_tbl_values.R @@ -1,17 +1,16 @@ #' Check model output data tbl contains valid value combinations -#' +#' @param tbl a tibble/data.frame of the contents of the file being validated. Column types must **all be character**. #' @inherit check_tbl_colnames params #' @inherit check_tbl_colnames return #' @export check_tbl_values <- function(tbl, round_id, file_path, hub_path) { config_tasks <- hubUtils::read_config(hub_path, "tasks") - # Coerce both tbl and accepted vals to character for easier comparison of + # Coerce accepted vals to character for easier comparison of # values. Tried to use arrow tbls for comparisons as more efficient when # working with larger files but currently arrow does not match NAs as dplyr # does, returning false positives for mean & median rows which contain NA in # output type ID column. - tbl <- hubUtils::coerce_to_character(tbl) accepted_vals <- hubUtils::expand_model_out_val_grid( config_tasks = config_tasks, round_id = round_id, @@ -105,3 +104,40 @@ summarise_invalid_values <- function(valid_tbl, accepted_vals) { invalid_combs_idx = invalid_combs_idx ) } + + +get_numeric_output_type_ids <- function(file_path, hub_path) { + + get_file_round_config(file_path, hub_path)[["model_tasks"]] %>% + purrr::map(~ .x[["output_type"]]) %>% + unlist(recursive = FALSE) %>% + purrr::map(~ purrr::pluck(.x, "output_type_id")) %>% + purrr::map_lgl(~is.numeric(unlist(.x))) %>% + purrr::keep(isTRUE) %>% + names() %>% + unique() +} + + +coerce_num_output_type_ids <- function(tbl, file_path, hub_path) { + + num_output_types <- get_numeric_output_type_ids( + file_path = file_path, + hub_path = hub_path) + + if (any(tbl[["output_type"]] %in% num_output_types) && + inherits(tbl[["output_type_id"]], "character")) { + + type_coerce <- tbl[["output_type"]] %in% num_output_types + num_output_type_id <- suppressWarnings( + as.numeric(tbl$output_type_id[type_coerce]) + ) + # establish only valid coercions to distinguish between the potential for + # two cdf output types in the same round, one numeric and one character. + valid <- !is.na(num_output_type_id) + tbl$output_type_id[type_coerce][valid] <- as.character( + num_output_type_id[valid] + ) + } + tbl +} diff --git a/R/check_tbl_values_required.R b/R/check_tbl_values_required.R index cd106eb7..b5979379 100644 --- a/R/check_tbl_values_required.R +++ b/R/check_tbl_values_required.R @@ -1,14 +1,13 @@ #' Check all required task ID/output type/output type ID value combinations present #' in model data. #' +#' @inheritParams check_tbl_values #' @inherit check_tbl_colnames params #' @inherit check_tbl_col_types return #' @export check_tbl_values_required <- function(tbl, round_id, file_path, hub_path) { - config_tasks <- hubUtils::read_config(hub_path, "tasks") tbl[["value"]] <- NULL - tbl <- hubUtils::coerce_to_character(tbl) - + config_tasks <- hubUtils::read_config(hub_path, "tasks") req <- hubUtils::expand_model_out_val_grid( config_tasks, round_id = round_id, @@ -32,7 +31,7 @@ check_tbl_values_required <- function(tbl, round_id, file_path, hub_path) { ) missing_df <- purrr::pmap( - list(tbl, req, full), + combine_mt_inputs(tbl, req, full), check_modeling_task_values_required ) %>% purrr::list_rbind() @@ -323,3 +322,10 @@ split_na_req <- function(req) { req[na_idx[, "row"], ] %>% split(na_idx[, "col"]) } + +combine_mt_inputs <- function(tbl, req, full) { + keep_mt <- purrr::map_lgl(req, ~nrow(.x) > 0L) + list(tbl[keep_mt], + req[keep_mt], + full[keep_mt]) +} diff --git a/R/opt_check_metadata_team_max_model_n.R b/R/opt_check_metadata_team_max_model_n.R new file mode 100644 index 00000000..f10ff468 --- /dev/null +++ b/R/opt_check_metadata_team_max_model_n.R @@ -0,0 +1,39 @@ +#' Check that submitting team does not exceed maximum number of allowed models +#' per team +#' +#' @inherit check_metadata_file_exists params +#' @param n_max Integer. Number of maximum allowed models per team. +#' @inherit check_tbl_col_types return +#' @details +#' Should be deployed as part of `validate_model_metadata` optional checks. +#' +#' +#' @export +opt_check_metadata_team_max_model_n <- function(file_path, hub_path, n_max = 2L) { + + team_abbr <- parse_file_name( + file_path, + file_type = "model_metadata")$team_abbr + all_model_meta <- hubUtils::load_model_metadata(hub_path) + + team_models <- all_model_meta[["model_abbr"]][all_model_meta[["team_abbr"]] == team_abbr] + n_models <- length(team_models) + check <- isFALSE(n_models > n_max) + if (check) { + details <- NULL + } else { + details <- cli::format_inline( + "Team {.val {team_abbr}} has submitted valid metadata for + {.val {n_models}} model{?s}: + {.val {team_models}}.") + } + + capture_check_cnd( + check = check, + file_path = file_path, + msg_subject = cli::format_inline( + "Maximum number of models per team ({.val {n_max}})"), + msg_attribute = "exceeded.", + msg_verbs = c("not", ""), + details = details) +} diff --git a/R/opt_check_tbl_col_timediff.R b/R/opt_check_tbl_col_timediff.R index 34856801..da92ecbc 100644 --- a/R/opt_check_tbl_col_timediff.R +++ b/R/opt_check_tbl_col_timediff.R @@ -3,6 +3,8 @@ #' @param t0_colname Character string. The name of the time zero date column. #' @param t1_colname Character string. The name of the time zero + 1 time step date column. #' @param timediff an object of class `lubridate` [`Period-class`] and length 1. +#' @details +#' Should be deployed as part of `validate_model_data` optional checks. #' @inherit check_tbl_colnames params #' @inherit check_tbl_col_types return #' @export diff --git a/R/opt_check_tbl_counts_lt_popn.R b/R/opt_check_tbl_counts_lt_popn.R index 5e3a0bd3..e51c8555 100644 --- a/R/opt_check_tbl_counts_lt_popn.R +++ b/R/opt_check_tbl_counts_lt_popn.R @@ -16,6 +16,8 @@ #' #' @inherit check_tbl_colnames params #' @inherit check_tbl_col_types return +#' @details +#' Should be deployed as part of `validate_model_data` optional checks. #' @export #' @examples #' hub_path <- system.file("testhubs/flusight", package = "hubValidations") diff --git a/R/opt_check_tbl_horizon_timediff.R b/R/opt_check_tbl_horizon_timediff.R index 8b75254b..4851cd1c 100644 --- a/R/opt_check_tbl_horizon_timediff.R +++ b/R/opt_check_tbl_horizon_timediff.R @@ -8,6 +8,8 @@ #' The period of a single horizon. Default to 1 week. #' @inherit check_tbl_colnames params #' @inherit check_tbl_col_types return +#' @details +#' Should be deployed as part of `validate_model_data` optional checks. #' @export opt_check_tbl_horizon_timediff <- function(tbl, file_path, hub_path, t0_colname, t1_colname, horizon_colname = "horizon", diff --git a/R/parse_file_name.R b/R/parse_file_name.R index 365e0719..a1fa31a4 100644 --- a/R/parse_file_name.R +++ b/R/parse_file_name.R @@ -2,9 +2,12 @@ #' #' @param file_path Character string. A model output file name. #' Can include parent directories which are ignored. +#' @param file_type Character string. Type of file name being parsed. One of `"model_output"` +#' or `"model_metadata"`. #' #' @return A list with the following elements: -#' - `round_id`: The round ID the model output is associated with. +#' - `round_id`: The round ID the model output is associated with (`NA` for +#' model metadata files.) #' - `team_abbr`: The team responsible for the model. #' - `model_abbr`: The name of the model. #' - `model_id`: The unique model ID derived from the concatenation of @@ -15,32 +18,40 @@ #' #' @examples #' parse_file_name("hub-baseline/2022-10-15-hub-baseline.csv") -parse_file_name <- function(file_path) { - checkmate::assert_string(file_path) - file_name <- tools::file_path_sans_ext(basename(file_path)) +parse_file_name <- function(file_path, file_type = c("model_output", "model_metadata")) { + file_type <- rlang::arg_match(file_type) + checkmate::assert_string(file_path) + file_name <- tools::file_path_sans_ext(basename(file_path)) - split_pattern <- stringr::regex( - "([[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2})|[a-z_0-9]+", - TRUE + split_pattern <- stringr::regex( + "([[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2})|[a-z_0-9]+", + TRUE + ) + split_res <- unlist( + stringr::str_extract_all( + file_name, + split_pattern ) - split_res <- unlist( - stringr::str_extract_all( - file_name, - split_pattern - ) - ) - if (length(split_res) != 3L) { - cli::cli_abort( - "Could not parse file name {.path {file_name}} for submission metadata. + ) + exp_n <- switch(file_type, + model_output = 3L, + model_metadata = 2L + ) + if (length(split_res) != exp_n) { + cli::cli_abort( + "Could not parse file name {.path {file_name}} for submission metadata. Please consult documentation for file name requirements for correct metadata parsing." - ) - } - list( - round_id = split_res[1], - team_abbr = split_res[2], - model_abbr = split_res[3], - model_id = paste(split_res[2], split_res[3], sep = "-"), - ext = fs::path_ext(file_path) ) + } + if (file_type == "model_metadata") { + split_res <- c(NA, split_res) + } + list( + round_id = split_res[1], + team_abbr = split_res[2], + model_abbr = split_res[3], + model_id = paste(split_res[2], split_res[3], sep = "-"), + ext = fs::path_ext(file_path) + ) } diff --git a/R/read_model_out_file.R b/R/read_model_out_file.R index d2b7cc7f..9494ded0 100644 --- a/R/read_model_out_file.R +++ b/R/read_model_out_file.R @@ -1,12 +1,15 @@ #' Read a model output file #' #' @inheritParams check_valid_round_id -#' @param use_hub_schema Logical. When reading in `csv` files, whether to use -#' the hub's schema to specify column data types. +#' @param coerce_types character. What to coerce column types to on read. +#' - `hub`: read in (`csv`) or coerce (`parquet`, `arrow`) to hub schema. +#' - `chr`: read in (`csv`) or coerce (`parquet`, `arrow`) all columns to character. +#' - `none`: No coercion. Use `arrow` `read_*` function defaults. #' @return a tibble of contents of the model output file. #' @export read_model_out_file <- function(file_path, hub_path = ".", - use_hub_schema = TRUE) { + coerce_types = c("hub", "chr", "none")) { + coerce_types <- rlang::arg_match(coerce_types) full_path <- abs_file_path(file_path, hub_path) if (!fs::file_exists(full_path)) { @@ -21,22 +24,67 @@ read_model_out_file <- function(file_path, hub_path = ".", {.val {valid_ext}} not {.val {file_ext}}") } - df <- switch(file_ext, + tbl <- switch(file_ext, csv = { - if (use_hub_schema) { - arrow::read_csv_arrow( - full_path, - col_types = hubUtils::create_hub_schema( - config_tasks = hubUtils::read_config(hub_path, "tasks"), - partitions = NULL - ) + schema <- NULL + coerce_on_read <- ifelse(coerce_types == "none", FALSE, TRUE) + if (coerce_on_read) { + schema <- create_model_out_schema( + hub_path, + col_types = coerce_types ) + } + arrow::read_csv_arrow( + full_path, + col_types = schema + ) + }, + parquet = { + if (coerce_types == "hub") { + arrow::read_parquet(full_path) %>% + hubUtils::coerce_to_hub_schema( + config_tasks = hubUtils::read_config(hub_path, "tasks") + ) + } else if (coerce_types == "chr") { + arrow::read_parquet(full_path) %>% + hubUtils::coerce_to_character() } else { - arrow::read_csv_arrow(full_path) + arrow::read_parquet(full_path) } }, - parquet = arrow::read_parquet(full_path), - arrow = arrow::read_feather(full_path) + arrow = { + if (coerce_types == "hub") { + arrow::read_feather(full_path) %>% + hubUtils::coerce_to_hub_schema( + config_tasks = hubUtils::read_config(hub_path, "tasks") + ) + } else if (coerce_types == "chr") { + arrow::read_feather(full_path) %>% + hubUtils::coerce_to_character() + } else { + arrow::read_feather(full_path) + } + } + ) + tibble::as_tibble(tbl) +} + +create_model_out_schema <- function(hub_path, + col_types = c("hub", "chr")) { + col_types <- rlang::arg_match(col_types) + schema <- hubUtils::create_hub_schema( + config_tasks = hubUtils::read_config(hub_path, "tasks"), + partitions = NULL + ) + + switch(col_types, + hub = schema, + chr = { + purrr::map( + names(schema), + ~ arrow::field(.x, type = arrow::utf8()) + ) %>% + arrow::schema() + } ) - tibble::as_tibble(df) } diff --git a/R/try_check.R b/R/try_check.R index ecf1b910..840dfdea 100644 --- a/R/try_check.R +++ b/R/try_check.R @@ -10,15 +10,9 @@ try_check <- function(expr, file_path) { check <- try(expr, silent = TRUE) if (inherits(check, "try-error")) { - message <- attr(check, "condition")$message - parent_msg <- attr(check, "condition")$parent$message - if (is.character(parent_msg)) { - parent_msg <- paste(parent_msg, collapse = " --> ") - msg <- paste(message, parent_msg, sep = " --> ") - } else { - msg <- message - } - msg <- clean_msg(msg) + msg <- as.character(check) %>% + cli::ansi_strip() %>% + clean_msg() return( capture_exec_error( diff --git a/R/validate_model_data.R b/R/validate_model_data.R index 14e59712..8b0d7c84 100644 --- a/R/validate_model_data.R +++ b/R/validate_model_data.R @@ -27,10 +27,22 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, return(checks) } - tbl <- read_model_out_file( - file_path = file_path, - hub_path = hub_path - ) + # If `csv` file, read in using hub schema. Otherwise use file + # schema for other file formats. + if (fs::path_ext(file_path) == "csv") { + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "hub" + ) + } else { + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "none" + ) + } + # -- File round ID checks ---- # Will be skipped if round config round_id_from_var is FALSE and no round_id_col @@ -93,9 +105,14 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, ) # -- Row level checks ---- + tbl_chr <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) checks$valid_vals <- try_check( check_tbl_values( - tbl, + tbl_chr, round_id = round_id, file_path = file_path, hub_path = hub_path @@ -107,7 +124,7 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, checks$rows_unique <- try_check( check_tbl_rows_unique( - tbl, + tbl_chr, file_path = file_path, hub_path = hub_path ), file_path @@ -115,7 +132,7 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, checks$req_vals <- try_check( check_tbl_values_required( - tbl, + tbl_chr, round_id = round_id, file_path = file_path, hub_path = hub_path diff --git a/inst/testhubs/flusight/hub-config/model-metadata-schema.json b/inst/testhubs/flusight/hub-config/model-metadata-schema.json new file mode 100644 index 00000000..20088eec --- /dev/null +++ b/inst/testhubs/flusight/hub-config/model-metadata-schema.json @@ -0,0 +1,129 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Schema for Modeling Hub model metadata", + "description": "This is the schema for model metadata files, please refer to https://github.com/cdcepi/FluSight-forecast-hub/blob/main/model-metadata/README.md for more information.", + "type": "object", + "properties": { + "team_name": { + "description": "The name of the team submitting the model", + "type": "string" + }, + "team_abbr": { + "description": "Abbreviated name of the team submitting the model", + "type": "string", + "pattern": "^[a-zA-Z0-9_+]+$", + "maxLength": 16 + }, + "model_name": { + "description": "The name of the model", + "type": "string" + }, + "model_abbr": { + "description": "Abbreviated name of the model", + "type": "string", + "pattern": "^[a-zA-Z0-9_+]+$", + "maxLength": 16 + }, + "model_version": { + "description": "Identifier of the version of the model", + "type": "string" + }, + "model_contributors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "affiliation": { + "type": "string" + }, + "email": { + "type": "string", + "format": "email" + }, + "orcid": { + "type": "string", + "pattern": "^\\d{4}\\-\\d{4}\\-\\d{4}\\-[\\dX]{4}$" + } + }, + "additionalProperties": false, + "required": ["name", "affiliation", "email"] + } + }, + "website_url": { + "description": "Public facing website for the model", + "type": "string", + "format": "uri" + }, + "repo_url": { + "description": "Repository containing code for the model", + "type": "string", + "format": "uri" + }, + "license": { + "description": "License for use of model output data", + "type": "string", + "enum": [ + "CC0-1.0", + "CC-BY-4.0", + "CC-BY_SA-4.0", + "PPDL", + "ODC-by", + "ODbL", + "OGL-3.0" + ] + }, + "designated_model": { + "description": "Team-specified indicator for whether the model should be eligible for inclusion in a Hub ensemble and public visualization. A team may designate up to two models.", + "type": "boolean" + }, + "citation": { + "description": "One or more citations for this model", + "type": "string", + "examples": ["Gibson GC , Reich NG , Sheldon D. Real-time mechanistic bayesian forecasts of Covid-19 mortality. medRxiv. 2020. https://doi.org/10.1101/2020.12.22.20248736"] + }, + "team_funding": { + "description": "Any information about funding source for the team or members of the team.", + "type": "string", + "examples": ["National Institutes of General Medical Sciences (R01GM123456). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS."] + }, + "data_inputs": { + "description": "List or description of data inputs used by the model", + "type": "string" + }, + "methods": { + "description": "A brief (200 char.) description of the methods used by this model", + "type": "string", + "maxLength": 200 + }, + "methods_long": { + "description": "A full description of the methods used by this model. Among other details, this should include whether spatial correlation is considered and how the model accounts for uncertainty.", + "type": "string" + }, + "ensemble_of_models": { + "description": "Indicator for whether this model is an ensemble of any separate component models", + "type": "boolean" + }, + "ensemble_of_hub_models": { + "description": "Indicator for whether this model is an ensemble specifically of other models submitted to this Hub", + "type": "boolean" + } + }, + "additionalProperties": false, + "required": [ + "team_name", + "team_abbr", + "model_name", + "model_abbr", + "model_contributors", + "license", + "designated_model", + "data_inputs", + "methods", + "methods_long", + "ensemble_of_models", + "ensemble_of_hub_models" + ] +} diff --git a/inst/testhubs/flusight/model-metadata/hub-baseline.yml b/inst/testhubs/flusight/model-metadata/hub-baseline.yml new file mode 100644 index 00000000..d7d1bb81 --- /dev/null +++ b/inst/testhubs/flusight/model-metadata/hub-baseline.yml @@ -0,0 +1,39 @@ +team_name: "Hub Admins" +team_abbr: "hub" +model_name: "Hub baseline model" +model_abbr: "baseline" +model_version: "1.0" +model_contributors: [ + { + "name": "Nutcha Wattanachit", + "affiliation": "UMass Amherst", + "email": "nwattanachit@schoolph.umass.edu" + }, + { + "name": "Aaron Gerding", + "affiliation": "UMass Amherst", + "email": "agerding@umass.edu" + }, + { + "name": "Nick Reich", + "affiliation": "UMass Amherst", + "email": "nick@umass.edu" + }, + { + "name": "Evan Ray", + "affiliation": "UMass Amherst", + "email": "elray@umass.edu" + } +] +website_url: "https://github.com/reichlab/flu-hosp-models-2021-2022" +license: "CC-BY-4.0" +citation: "citation" +team_funding: "funding" +include_viz: true +include_ensemble: true +include_eval: true +methods: "Equally weighted ensemble of simple time-series baseline models." +data_inputs: "Daily and weekly incident flu hospitalizations, queried through covidData" +methods_long: "Equally weighted ensemble of simple time-series baseline models. Each baseline model calculates first differences of incidence in recent weeks. These differences are sampled and then added to the most recently observed incidence. Variations on this method include (a) including the first differences and the negative of these differences to enforce symmetry, resulting in a flat-line forecast, (b) generating predictions by working on the daily scale and then aggregating to weekly predictions, or by working directly with weekly data; (c) varying the number of time-units in the past for computing the first differences (14 or 21 days, or 3 or 4 weeks) to focus on capturing recent trends, and (d) using the original time-series or a variance-stabilizing transformation of it, e.g. square-root. Additionally, the resulting predictive distributions are truncated so that any predicted samples computed to be less than zero are truncated to be zero." +ensemble_of_models: true +ensemble_of_hub_models: false diff --git a/inst/testhubs/flusight/model-metadata/hub-ensemble.yml b/inst/testhubs/flusight/model-metadata/hub-ensemble.yml new file mode 100644 index 00000000..fc7c9580 --- /dev/null +++ b/inst/testhubs/flusight/model-metadata/hub-ensemble.yml @@ -0,0 +1,39 @@ +team_name: "Hub admins" +team_abbr: "hub" +model_name: "Ensemble of baseline models with trends" +model_abbr: "ensemble" +model_version: "1.0" +model_contributors: [ + { + "name": "Nutcha Wattanachit", + "affiliation": "UMass Amherst", + "email": "nwattanachit@schoolph.umass.edu" + }, + { + "name": "Aaron Gerding", + "affiliation": "UMass Amherst", + "email": "agerding@umass.edu" + }, + { + "name": "Nick Reich", + "affiliation": "UMass Amherst", + "email": "nick@umass.edu" + }, + { + "name": "Evan Ray", + "affiliation": "UMass Amherst", + "email": "elray@umass.edu" + } +] +website_url: "https://github.com/reichlab/flu-hosp-models-2021-2022" +license: "CC-BY-4.0" +citation: "citation" +team_funding: "funding" +include_viz: true +include_ensemble: true +include_eval: true +methods: "Equally weighted ensemble of simple time-series baseline models." +data_inputs: "Daily and weekly incident flu hospitalizations, queried through covidData" +methods_long: "Equally weighted ensemble of simple time-series baseline models. Each baseline model calculates first differences of incidence in recent weeks. These differences are sampled and then added to the most recently observed incidence. Variations on this method include (a) including the first differences and the negative of these differences to enforce symmetry, resulting in a flat-line forecast, (b) generating predictions by working on the daily scale and then aggregating to weekly predictions, or by working directly with weekly data; (c) varying the number of time-units in the past for computing the first differences (14 or 21 days, or 3 or 4 weeks) to focus on capturing recent trends, and (d) using the original time-series or a variance-stabilizing transformation of it, e.g. square-root. Additionally, the resulting predictive distributions are truncated so that any predicted samples computed to be less than zero are truncated to be zero." +ensemble_of_models: true +ensemble_of_hub_models: false diff --git a/man/check_tbl_rows_unique.Rd b/man/check_tbl_rows_unique.Rd index a9fdd0e9..ce51c343 100644 --- a/man/check_tbl_rows_unique.Rd +++ b/man/check_tbl_rows_unique.Rd @@ -7,7 +7,7 @@ check_tbl_rows_unique(tbl, file_path, hub_path) } \arguments{ -\item{tbl}{a tibble/data.frame of the contents of the file being validated.} +\item{tbl}{a tibble/data.frame of the contents of the file being validated. Column types must \strong{all be character}.} \item{file_path}{character string. Path to the file being validated relative to the hub's model-output directory.} diff --git a/man/check_tbl_values.Rd b/man/check_tbl_values.Rd index cc652d83..3c664ae8 100644 --- a/man/check_tbl_values.Rd +++ b/man/check_tbl_values.Rd @@ -7,7 +7,7 @@ check_tbl_values(tbl, round_id, file_path, hub_path) } \arguments{ -\item{tbl}{a tibble/data.frame of the contents of the file being validated.} +\item{tbl}{a tibble/data.frame of the contents of the file being validated. Column types must \strong{all be character}.} \item{round_id}{character string. The round identifier.} diff --git a/man/check_tbl_values_required.Rd b/man/check_tbl_values_required.Rd index ae834079..587851f0 100644 --- a/man/check_tbl_values_required.Rd +++ b/man/check_tbl_values_required.Rd @@ -8,7 +8,7 @@ in model data.} check_tbl_values_required(tbl, round_id, file_path, hub_path) } \arguments{ -\item{tbl}{a tibble/data.frame of the contents of the file being validated.} +\item{tbl}{a tibble/data.frame of the contents of the file being validated. Column types must \strong{all be character}.} \item{round_id}{character string. The round identifier.} diff --git a/man/opt_check_metadata_team_max_model_n.Rd b/man/opt_check_metadata_team_max_model_n.Rd new file mode 100644 index 00000000..dbcf72d0 --- /dev/null +++ b/man/opt_check_metadata_team_max_model_n.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/opt_check_metadata_team_max_model_n.R +\name{opt_check_metadata_team_max_model_n} +\alias{opt_check_metadata_team_max_model_n} +\title{Check that submitting team does not exceed maximum number of allowed models +per team} +\usage{ +opt_check_metadata_team_max_model_n(file_path, hub_path, n_max = 2L) +} +\arguments{ +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-metadata directory.} + +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{n_max}{Integer. Number of maximum allowed models per team.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +Returned object also inherits from subclass \verb{}. +} +\description{ +Check that submitting team does not exceed maximum number of allowed models +per team +} +\details{ +Should be deployed as part of \code{validate_model_metadata} optional checks. +} diff --git a/man/opt_check_tbl_col_timediff.Rd b/man/opt_check_tbl_col_timediff.Rd index 654755a0..70b9183b 100644 --- a/man/opt_check_tbl_col_timediff.Rd +++ b/man/opt_check_tbl_col_timediff.Rd @@ -47,3 +47,6 @@ Returned object also inherits from subclass \verb{}. \description{ Check time difference between values in two date columns equal a defined period. } +\details{ +Should be deployed as part of \code{validate_model_data} optional checks. +} diff --git a/man/opt_check_tbl_counts_lt_popn.Rd b/man/opt_check_tbl_counts_lt_popn.Rd index 59998fba..a7e4702a 100644 --- a/man/opt_check_tbl_counts_lt_popn.Rd +++ b/man/opt_check_tbl_counts_lt_popn.Rd @@ -59,6 +59,8 @@ Check that predicted values per location are less than total location population \details{ Should only be applied to rows containing count predictions. Use argument \code{targets} to filter \code{tbl} data to appropriate count target rows. + +Should be deployed as part of \code{validate_model_data} optional checks. } \examples{ hub_path <- system.file("testhubs/flusight", package = "hubValidations") diff --git a/man/opt_check_tbl_horizon_timediff.Rd b/man/opt_check_tbl_horizon_timediff.Rd index 6a26fed0..f419c3bf 100644 --- a/man/opt_check_tbl_horizon_timediff.Rd +++ b/man/opt_check_tbl_horizon_timediff.Rd @@ -52,3 +52,6 @@ Returned object also inherits from subclass \verb{}. \description{ Check time difference between values in two date columns equal a defined period. } +\details{ +Should be deployed as part of \code{validate_model_data} optional checks. +} diff --git a/man/parse_file_name.Rd b/man/parse_file_name.Rd index c5aa6473..73ec8a5a 100644 --- a/man/parse_file_name.Rd +++ b/man/parse_file_name.Rd @@ -4,16 +4,20 @@ \alias{parse_file_name} \title{Parse model output file metadata from file name} \usage{ -parse_file_name(file_path) +parse_file_name(file_path, file_type = c("model_output", "model_metadata")) } \arguments{ \item{file_path}{Character string. A model output file name. Can include parent directories which are ignored.} + +\item{file_type}{Character string. Type of file name being parsed. One of \code{"model_output"} +or \code{"model_metadata"}.} } \value{ A list with the following elements: \itemize{ -\item \code{round_id}: The round ID the model output is associated with. +\item \code{round_id}: The round ID the model output is associated with (\code{NA} for +model metadata files.) \item \code{team_abbr}: The team responsible for the model. \item \code{model_abbr}: The name of the model. \item \code{model_id}: The unique model ID derived from the concatenation of diff --git a/man/read_model_out_file.Rd b/man/read_model_out_file.Rd index d496b643..a91020e9 100644 --- a/man/read_model_out_file.Rd +++ b/man/read_model_out_file.Rd @@ -4,7 +4,11 @@ \alias{read_model_out_file} \title{Read a model output file} \usage{ -read_model_out_file(file_path, hub_path = ".", use_hub_schema = TRUE) +read_model_out_file( + file_path, + hub_path = ".", + coerce_types = c("hub", "chr", "none") +) } \arguments{ \item{file_path}{character string. Path to the file being validated relative to @@ -20,8 +24,12 @@ in the \code{arrow} package. The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} files within the \code{hub-config} directory.} -\item{use_hub_schema}{Logical. When reading in \code{csv} files, whether to use -the hub's schema to specify column data types.} +\item{coerce_types}{character. What to coerce column types to on read. +\itemize{ +\item \code{hub}: read in (\code{csv}) or coerce (\code{parquet}, \code{arrow}) to hub schema. +\item \code{chr}: read in (\code{csv}) or coerce (\code{parquet}, \code{arrow}) all columns to character. +\item \code{none}: No coercion. Use \code{arrow} \verb{read_*} function defaults. +}} } \value{ a tibble of contents of the model output file. diff --git a/tests/testthat/_snaps/opt_check_metadata_team_max_model_n.md b/tests/testthat/_snaps/opt_check_metadata_team_max_model_n.md new file mode 100644 index 00000000..9406fa43 --- /dev/null +++ b/tests/testthat/_snaps/opt_check_metadata_team_max_model_n.md @@ -0,0 +1,19 @@ +# opt_check_metadata_team_max_model_n works + + Code + opt_check_metadata_team_max_model_n(hub_path = hub_path, file_path = "hub-baseline.yml") + Output + + Message: + Maximum number of models per team (2) not exceeded. + +--- + + Code + opt_check_metadata_team_max_model_n(hub_path = hub_path, file_path = "hub-baseline.yml", + n_max = 1L) + Output + + Warning: + Maximum number of models per team (1) exceeded. Team "hub" has submitted valid metadata for 2 models: "baseline" and "ensemble". + diff --git a/tests/testthat/_snaps/parse_file_name.md b/tests/testthat/_snaps/parse_file_name.md index 95abd6bc..71e856f2 100644 --- a/tests/testthat/_snaps/parse_file_name.md +++ b/tests/testthat/_snaps/parse_file_name.md @@ -61,6 +61,35 @@ [1] "parquet" +--- + + Code + parse_file_name("hub-baseline.yml", file_type = "model_metadata") + Output + $round_id + [1] NA + + $team_abbr + [1] "hub" + + $model_abbr + [1] "baseline" + + $model_id + [1] "hub-baseline" + + $ext + [1] "yml" + + +--- + + Code + parse_file_name("hubBaseline.yml", file_type = "model_metadata") + Condition + Error in `parse_file_name()`: + ! Could not parse file name 'hubBaseline' for submission metadata. Please consult documentation for file name requirements for correct metadata parsing. + # parse_file_name fails correctly Code diff --git a/tests/testthat/_snaps/read_model_out_file.md b/tests/testthat/_snaps/read_model_out_file.md index 83a6fd68..4f802b0b 100644 --- a/tests/testthat/_snaps/read_model_out_file.md +++ b/tests/testthat/_snaps/read_model_out_file.md @@ -17,8 +17,7 @@ Code str(read_model_out_file(file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", - hub_path = system.file("testhubs/simple", package = "hubValidations"), - use_hub_schema = TRUE)) + hub_path = system.file("testhubs/simple", package = "hubValidations"))) Output tibble [47 x 7] (S3: tbl_df/tbl/data.frame) $ origin_date : Date[1:47], format: "2022-10-08" "2022-10-08" ... diff --git a/tests/testthat/_snaps/try_check.md b/tests/testthat/_snaps/try_check.md index 3addd69f..b0b448ad 100644 --- a/tests/testthat/_snaps/try_check.md +++ b/tests/testthat/_snaps/try_check.md @@ -14,7 +14,7 @@ Output Error: - ! EXEC ERROR: In index: 1. --> Assertion on 'hub_path' failed: Directory 'random_hub' does not exist. + ! EXEC ERROR: Error in purrr::map(configs, ~validate_config(hub_path = hub_path, config = .x, : i In index: 1. Caused by error in `validate_config()`: ! Assertion on 'hub_path' failed: Directory 'random_hub' does not exist. --- @@ -25,5 +25,5 @@ Output Error: - ! EXEC ERROR: Assertion on 't0_colname' failed: Must be element of set ['forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'], but is 'random_col1'. + ! EXEC ERROR: Error in opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "random_col1", : Assertion on 't0_colname' failed: Must be element of set ['forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'], but is 'random_col1'. diff --git a/tests/testthat/test-check_config_hub_valid.R b/tests/testthat/test-check_config_hub_valid.R index 513606be..15edf098 100644 --- a/tests/testthat/test-check_config_hub_valid.R +++ b/tests/testthat/test-check_config_hub_valid.R @@ -1,4 +1,5 @@ test_that("check_config_hub_valid works", { + skip_if_offline() expect_snapshot( check_config_hub_valid( hub_path = system.file("testhubs/simple", package = "hubValidations")) diff --git a/tests/testthat/test-check_tbl_col_types.R b/tests/testthat/test-check_tbl_col_types.R index aa538236..1fa74771 100644 --- a/tests/testthat/test-check_tbl_col_types.R +++ b/tests/testthat/test-check_tbl_col_types.R @@ -25,8 +25,7 @@ test_that("check_tbl_col_types works", { hub_path <- test_path("testdata/hub") file_path <- "hub-baseline/2023-04-24-hub-baseline.csv" tbl <- read_model_out_file(file_path, - hub_path, - use_hub_schema = TRUE + hub_path ) expect_snapshot( check_tbl_col_types(tbl, file_path, hub_path) diff --git a/tests/testthat/test-check_tbl_values.R b/tests/testthat/test-check_tbl_values.R index ef7aec41..102115cc 100644 --- a/tests/testthat/test-check_tbl_values.R +++ b/tests/testthat/test-check_tbl_values.R @@ -4,7 +4,8 @@ test_that("check_tbl_values works", { round_id <- "2022-10-08" tbl <- read_model_out_file( file_path = file_path, - hub_path = hub_path + hub_path = hub_path, + coerce_types = "chr" ) expect_snapshot( check_tbl_values( @@ -15,7 +16,7 @@ test_that("check_tbl_values works", { ) ) - tbl[1, "horizon"] <- 11L + tbl[1, "horizon"] <- "11" expect_snapshot( check_tbl_values( tbl = tbl, @@ -25,3 +26,148 @@ test_that("check_tbl_values works", { ) ) }) + + +test_that("check_tbl_values consistent across numeric & character output type id columns & does not ignore trailing zeros", { + + # Hub with both character & numeric output type ids & trailing zeros in + # numeric output type id + hub_path <- test_path("testdata/hub-chr") + # File with both character & numeric output type ids. + # Contains Number that is coerced + # to 0.1 by `as.character` as well as by `arrow::cast`. + # Also contains trailing zeros. + file_path <- "UMass-gbq/2023-10-28-UMass-gbq.csv" + round_id <- "2023-10-28" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + + expect_s3_class( + check_tbl_values( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_error", "hub_check", "rlang_error", "error", "condition" + ), + exact = TRUE + ) + + + # File with only numeric output type ids. + # Contains Number that is coerced + # to 0.1 by `as.character` as well as by `arrow::cast`. + file_path <- "UMass-gbq/2023-11-04-UMass-gbq.csv" + round_id <- "2023-11-04" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + expect_s3_class( + check_tbl_values( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_error", "hub_check", "rlang_error", "error", "condition" + ), + exact = TRUE + ) + + file_path <- "UMass-gbq/2023-11-11-UMass-gbq.csv" + # File with only numeric output type ids. + # Contains Number that is coerced + # to 0.1 by `as.character` but not by `arrow::cast` + round_id <- "2023-11-11" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + expect_s3_class( + check_tbl_values( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_error", "hub_check", "rlang_error", "error", "condition"), + exact = TRUE + ) + + # Hub with only numeric output type ids ---- + hub_path <- test_path("testdata/hub-num") + # File with only numeric output type ids. + # Contains Number that is coerced + # to 0.1 by `as.character` as well as by `arrow::cast`. + file_path <- "UMass-gbq/2023-11-04-UMass-gbq.csv" + round_id <- "2023-11-04" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + expect_s3_class( + check_tbl_values( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_error", "hub_check", "rlang_error", "error", "condition" + ), + exact = TRUE + ) + + + # File with only numeric output type ids. + # Contains Number that is coerced + # to 0.1 by `as.character` as well as by `arrow::cast`. + # Also contains trailing zeros + file_path <- "UMass-gbq/2023-10-28-UMass-gbq.csv" + round_id <- "2023-10-28" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + + expect_s3_class( + check_tbl_values( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_error", "hub_check", "rlang_error", "error", "condition" + ), + exact = TRUE + ) + + # File with only numeric output type ids. + # Contains Number that is coerced + # to 0.1 by `as.character` but not by `arrow::cast` + file_path <- "UMass-gbq/2023-11-11-UMass-gbq.csv" + round_id <- "2023-11-11" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + expect_s3_class( + check_tbl_values( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_error", "hub_check", "rlang_error", "error", "condition"), + exact = TRUE + ) +}) diff --git a/tests/testthat/test-check_tbl_values_required.R b/tests/testthat/test-check_tbl_values_required.R index 4b65ca26..47997022 100644 --- a/tests/testthat/test-check_tbl_values_required.R +++ b/tests/testthat/test-check_tbl_values_required.R @@ -2,8 +2,10 @@ test_that("check_tbl_values_required works with 1 model task & completely opt co hub_path <- system.file("testhubs/simple", package = "hubValidations") file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" config_tasks <- hubUtils::read_config(hub_path, "tasks") - tbl <- hubValidations::read_model_out_file(file_path, hub_path) %>% - hubUtils::coerce_to_hub_schema(config_tasks) + tbl <- read_model_out_file(file_path, hub_path, + coerce_types = "chr") + tbl_hub <- read_model_out_file(file_path, hub_path, + coerce_types = "hub") round_id <- "2022-10-08" # Test all required but only optional location for optional output type @@ -21,7 +23,7 @@ test_that("check_tbl_values_required works with 1 model task & completely opt co expect_false("mean" %in% missing_req_block$missing$output_type) expect_equal( - missing_req_block$missing, tbl[1:23, names(tbl) != "value"] + missing_req_block$missing, tbl_hub[1:23, names(tbl_hub) != "value"] ) # Test missing required output_type_id for optional task ID @@ -35,7 +37,7 @@ test_that("check_tbl_values_required works with 1 model task & completely opt co expect_false("mean" %in% res_missing_otid$missing$output_type) expect_equal( - res_missing_otid$missing, tbl[24:26, names(tbl) != "value"] + res_missing_otid$missing, tbl_hub[24:26, names(tbl_hub) != "value"] ) }) @@ -45,9 +47,10 @@ test_that("check_tbl_values_required works with 2 separate model tasks & complet file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" round_id <- "2023-05-08" config_tasks <- hubUtils::read_config(hub_path, "tasks") - tbl <- hubValidations::read_model_out_file(file_path, hub_path) %>% - hubUtils::coerce_to_hub_schema(config_tasks) - + tbl <- read_model_out_file(file_path, hub_path, + coerce_types = "chr") + tbl_hub <- read_model_out_file(file_path, hub_path, + coerce_types = "hub") expect_snapshot( check_tbl_values_required(tbl, round_id, file_path, hub_path) ) @@ -57,7 +60,7 @@ test_that("check_tbl_values_required works with 2 separate model tasks & complet str(missing_required) ) expect_equal( - missing_required$missing, tbl[24:25, names(tbl) != "value"] + missing_required$missing, tbl_hub[24:25, names(tbl_hub) != "value"] ) missing_opt_otid <- check_tbl_values_required( @@ -68,14 +71,14 @@ test_that("check_tbl_values_required works with 2 separate model tasks & complet str(missing_opt_otid) ) expect_equal( - missing_opt_otid$missing, tbl[1:2, names(tbl) != "value"] + missing_opt_otid$missing, tbl_hub[1:2, names(tbl_hub) != "value"] ) pmf_row <- tbl[24, ] pmf_row$output_type <- "pmf" pmf_row$output_type_id <- "large_decrease" pmf_row$target <- "wk flu hosp rate change" - pmf_row$value <- 0.5 + pmf_row$value <- "0.5" missing_pmf <- check_tbl_values_required( pmf_row, round_id, @@ -102,7 +105,7 @@ test_that("check_tbl_values_required works with 2 separate model tasks & complet ) - pmf_row$horizon <- 1L + pmf_row$horizon <- "1" missing_horizon <- check_tbl_values_required( pmf_row, round_id, file_path, hub_path @@ -111,3 +114,66 @@ test_that("check_tbl_values_required works with 2 separate model tasks & complet str(missing_horizon) ) }) + +test_that( + "check_tbl_values_required correctly matches numeric output type IDs when output type ID col is character.", + { + hub_path <- test_path("testdata/hub-chr") + file_path <- "UMass-gbq/2023-10-28-UMass-gbq.csv" + round_id <- "2023-10-28" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + + check <- check_tbl_values_required( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ) + + expect_s3_class( + check, + c("check_failure", "hub_check", "rlang_warning", "warning", "condition") + ) + + # Expect that values for output type IDs "0.1000000000000000055511", + # and "0.150" (trailing zero retained yet lost on read of tasks config) + # are not correctly interpreted and are part of the missing output + # whereas "0.2" matches the values in config without the trailing zeros. + expect_length( + intersect( + c("0.1", "0.15", "0.2"), + check$missing$output_type_id + ), + 2L + ) + } +) + + + +test_that( + "check_tbl_values_required works when config contains non required modeling task.", + { + hub_path <- test_path("testdata/hub-it") + file_path <- "Tm-Md/2023-11-04-Tm-Md.csv" + round_id <- "2023-11-04" + tbl <- read_model_out_file( + file_path = file_path, + hub_path = hub_path, + coerce_types = "chr" + ) + expect_s3_class( + check_tbl_values_required( + tbl = tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), + c("check_success", "hub_check", "rlang_message", "message", "condition") + ) + } +) diff --git a/tests/testthat/test-opt_check_metadata_team_max_model_n.R b/tests/testthat/test-opt_check_metadata_team_max_model_n.R new file mode 100644 index 00000000..3ac60dc2 --- /dev/null +++ b/tests/testthat/test-opt_check_metadata_team_max_model_n.R @@ -0,0 +1,18 @@ +test_that("opt_check_metadata_team_max_model_n works", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + + expect_snapshot( + opt_check_metadata_team_max_model_n( + hub_path = hub_path, + file_path = "hub-baseline.yml" + ) + ) + + expect_snapshot( + opt_check_metadata_team_max_model_n( + hub_path = hub_path, + file_path = "hub-baseline.yml", + n_max = 1L + ) + ) +}) diff --git a/tests/testthat/test-parse_file_name.R b/tests/testthat/test-parse_file_name.R index 81075431..b3342c92 100644 --- a/tests/testthat/test-parse_file_name.R +++ b/tests/testthat/test-parse_file_name.R @@ -15,6 +15,21 @@ test_that("parse_file_name works", { ) ) + + expect_snapshot( + parse_file_name( + "hub-baseline.yml", + file_type = "model_metadata" + ) + ) + expect_snapshot( + parse_file_name( + "hubBaseline.yml", + file_type = "model_metadata" + ), + error = TRUE + ) + file_path <- "model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv" file_meta <- parse_file_name(file_path) expect_equal( @@ -31,6 +46,8 @@ test_that("parse_file_name works", { sep = "-") %>% paste(file_meta$ext, sep = ".") ) + + }) test_that("parse_file_name fails correctly", { diff --git a/tests/testthat/test-read_model_out_file.R b/tests/testthat/test-read_model_out_file.R index 91742ea0..4564cb4d 100644 --- a/tests/testthat/test-read_model_out_file.R +++ b/tests/testthat/test-read_model_out_file.R @@ -11,8 +11,7 @@ test_that("read_model_out_file works", { str( read_model_out_file( file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", - hub_path = system.file("testhubs/simple", package = "hubValidations"), - use_hub_schema = TRUE + hub_path = system.file("testhubs/simple", package = "hubValidations") ) ) ) diff --git a/tests/testthat/test-try_check.R b/tests/testthat/test-try_check.R index 675e3ecf..5df34bfb 100644 --- a/tests/testthat/test-try_check.R +++ b/tests/testthat/test-try_check.R @@ -1,6 +1,7 @@ test_that("try_check works", { hub_path <- system.file("testhubs/flusight", package = "hubValidations") + skip_if_offline() expect_snapshot( try_check( check_config_hub_valid(hub_path), diff --git a/tests/testthat/test-validate_pr.R b/tests/testthat/test-validate_pr.R index 6d771498..ddae6155 100644 --- a/tests/testthat/test-validate_pr.R +++ b/tests/testthat/test-validate_pr.R @@ -1,4 +1,5 @@ test_that("validate_pr works on valid PR", { + skip_if_offline() temp_hub <- fs::path(tempdir(), "valid_sb_hub") gert::git_clone(url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", @@ -18,6 +19,7 @@ test_that("validate_pr works on valid PR", { }) test_that("validate_pr works on invalid PR", { + skip_if_offline() temp_hub <- fs::path(tempdir(), "invalid_sb_hub") gert::git_clone(url = "https://github.com/Infectious-Disease-Modeling-Hubs/ci-testhub-simple", diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R index 89eaaa13..e0a9d73b 100644 --- a/tests/testthat/test-validate_submission.R +++ b/tests/testthat/test-validate_submission.R @@ -1,4 +1,6 @@ test_that("validate_submission works", { + skip_if_offline() + hub_path <- system.file("testhubs/simple", package = "hubValidations") # File that passes validation @@ -91,6 +93,8 @@ test_that("validate_submission works", { }) test_that("validate_submission submission within window works", { + skip_if_offline() + hub_path <- system.file("testhubs/simple", package = "hubValidations") mockery::stub( @@ -109,6 +113,8 @@ test_that("validate_submission submission within window works", { }) test_that("validate_submission submission outside window fails correctly", { + skip_if_offline() + hub_path <- system.file("testhubs/simple", package = "hubValidations") mockery::stub( @@ -127,6 +133,8 @@ test_that("validate_submission submission outside window fails correctly", { }) test_that("validate_submission csv file read in and validated according to schema.", { + skip_if_offline() + expect_snapshot( str( validate_submission( @@ -139,6 +147,8 @@ test_that("validate_submission csv file read in and validated according to schem }) test_that("validate_submission fails when csv cannot be parsed according to schema.", { + skip_if_offline() + expect_s3_class( validate_submission( hub_path = test_path("testdata/hub"), diff --git a/tests/testthat/testdata/files/2023-11-11-UMass-gbq.parquet b/tests/testthat/testdata/files/2023-11-11-UMass-gbq.parquet new file mode 100644 index 00000000..d146ce8b Binary files /dev/null and b/tests/testthat/testdata/files/2023-11-11-UMass-gbq.parquet differ diff --git a/tests/testthat/testdata/hub-chr/hub-config/admin.json b/tests/testthat/testdata/hub-chr/hub-config/admin.json new file mode 100644 index 00000000..9dc93e50 --- /dev/null +++ b/tests/testthat/testdata/hub-chr/hub-config/admin.json @@ -0,0 +1,14 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json", + "name": "US CDC FluSight", + "maintainer": "US CDC", + "contact": { + "name": "Hub Admin Name", + "email": "hub-admin-email@example.com" + }, + "repository_host": "GitHub", + "repository_url": "https://github.com/cdcepi/FluSight-forecast-hub", + "file_format": ["csv"], + "timezone": "US/Eastern", + "model_output_dir": "model-output" +} diff --git a/tests/testthat/testdata/hub-chr/hub-config/tasks.json b/tests/testthat/testdata/hub-chr/hub-config/tasks.json new file mode 100644 index 00000000..a7acbfad --- /dev/null +++ b/tests/testthat/testdata/hub-chr/hub-config/tasks.json @@ -0,0 +1,299 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json", + "rounds": [ + { + "round_id_from_variable": true, + "round_id": "reference_date", + "model_tasks": [ + { + "task_ids": { + "reference_date": { + "required": null, + "optional": [ + "2023-10-07", "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11" + ] + }, + "target": { + "required": null, + "optional": ["wk flu hosp rate change"] + }, + "horizon": { + "required": null, + "optional": [-1, 0, 1, 2, 3] + }, + "location": { + "required": null, + "optional": [ + "US", + "01", + "02", + "04", + "05", + "06", + "08", + "09", + "10", + "11", + "12", + "13", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "53", + "54", + "55", + "56", + "72" + ] + }, + "target_end_date": { + "required": null, + "optional": [ + "2023-09-23", "2023-09-30", "2023-10-07", + "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11", "2024-05-18", + "2024-05-25", "2024-06-01" + ] + } + }, + "output_type": { + "pmf": { + "output_type_id": { + "required": [ + "large_decrease", + "decrease", + "stable", + "increase", + "large_increase" + ], + "optional": null + }, + "value": { + "type": "double", + "minimum": 0, + "maximum": 1 + } + } + }, + "target_metadata": [ + { + "target_id": "flu hosp rate change", + "target_name": "week ahead weekly influenza hospitalization rate change", + "target_units": "rate per 100,000 population", + "target_keys": { + "target": [ + "wk flu hosp rate change" + ] + }, + "target_type": "ordinal", + "description": "This target represents the change in the rate of new hospitalizations per week comparing the week ending on the reference_date to the week ending [horizon] weeks after the reference_date, on target_end_date.", + "is_step_ahead": true, + "time_unit": "week" + } + ] + }, + { + "task_ids": { + "reference_date": { + "required": null, + "optional": [ + "2023-10-07", "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11" + ] + }, + "target": { + "required": null, + "optional": ["wk inc flu hosp"] + }, + "horizon": { + "required": null, + "optional": [-1, 0, 1, 2, 3] + }, + "location": { + "required": null, + "optional": [ + "US", + "01", + "02", + "04", + "05", + "06", + "08", + "09", + "10", + "11", + "12", + "13", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "53", + "54", + "55", + "56", + "72" + ] + }, + "target_end_date": { + "required": null, + "optional": [ + "2023-09-23", "2023-09-30", "2023-10-07", + "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11", "2024-05-18", + "2024-05-25", "2024-06-01" + ] + } + }, + "output_type": { + "quantile": { + "output_type_id": { + "required": [ + 0.010, + 0.025, + 0.050, + 0.100, + 0.150, + 0.200, + 0.250, + 0.300, + 0.350, + 0.400, + 0.450, + 0.500, + 0.550, + 0.600, + 0.650, + 0.700, + 0.750, + 0.800, + 0.850, + 0.900, + 0.950, + 0.975, + 0.990 + ], + "optional": null + }, + "value": { + "type": "double", + "minimum": 0 + } + } + }, + "target_metadata": [ + { + "target_id": "wk inc flu hosp", + "target_name": "incident influenza hospitalizations", + "target_units": "count", + "target_keys": { + "target": [ + "wk inc flu hosp" + ] + }, + "target_type": "continuous", + "description": "This target represents the count of new hospitalizations in the week ending on the date [horizon] weeks after the reference_date, on the target_end_date.", + "is_step_ahead": true, + "time_unit": "week" + } + ] + } + ], + "submissions_due": { + "relative_to": "reference_date", + "start": -6, + "end": -3 + } + } + ] +} diff --git a/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-10-28-UMass-gbq.csv b/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-10-28-UMass-gbq.csv new file mode 100644 index 00000000..389903d1 --- /dev/null +++ b/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-10-28-UMass-gbq.csv @@ -0,0 +1,9 @@ +"reference_date","horizon","target","location","target_end_date","output_type","output_type_id","value" +2023-10-28,0,"wk flu hosp rate change","01",2023-10-28,"pmf","large_increase",0.1 +2023-10-28,0,"wk flu hosp rate change","01",2023-10-28,"pmf","increase",0.2 +2023-10-28,0,"wk flu hosp rate change","01",2023-10-28,"pmf","stable",0.25 +2023-10-28,0,"wk flu hosp rate change","01",2023-10-28,"pmf","decrease",0.3 +2023-10-28,0,"wk flu hosp rate change","01",2023-10-28,"pmf","large_decrease",0.15 +2023-10-28,0,"wk inc flu hosp","01",2023-10-28,"quantile",0.1000000000000000055511,4 +2023-10-28,0,"wk inc flu hosp","01",2023-10-28,"quantile",0.150,5 +2023-10-28,0,"wk inc flu hosp","01",2023-10-28,"quantile",0.2,6 diff --git a/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-11-04-UMass-gbq.csv b/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-11-04-UMass-gbq.csv new file mode 100644 index 00000000..4ec6d503 --- /dev/null +++ b/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-11-04-UMass-gbq.csv @@ -0,0 +1,2 @@ +"reference_date","horizon","target","location","target_end_date","output_type","output_type_id","value" +2023-11-04,0,"wk inc flu hosp","01",2023-11-04,"quantile",0.1000000000000000055511,4 diff --git a/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-11-11-UMass-gbq.csv b/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-11-11-UMass-gbq.csv new file mode 100644 index 00000000..23367040 --- /dev/null +++ b/tests/testthat/testdata/hub-chr/model-output/UMass-gbq/2023-11-11-UMass-gbq.csv @@ -0,0 +1,2 @@ +"reference_date","horizon","target","location","target_end_date","output_type","output_type_id","value" +2023-11-11,0,"wk inc flu hosp","01",2023-11-11,"quantile",0.09999999999999997779554,4 diff --git a/tests/testthat/testdata/hub-it/hub-config/admin.json b/tests/testthat/testdata/hub-it/hub-config/admin.json new file mode 100644 index 00000000..2adb412a --- /dev/null +++ b/tests/testthat/testdata/hub-it/hub-config/admin.json @@ -0,0 +1,17 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json", + "name": "Influenza Forecasting Hub Europe", + "maintainer": "ISI Foundation", + "contact": { + "name": "Hub Admin Name", + "email": "hub-admin-email@example.com" + }, + "repository_url": "https://github.com/european-modelling-hubs/flu-forecast-hub", + "file_format": ["csv"], + "hub_models": [{ + "team_abbr": "ECDC", + "model_abbr": "TBD", + "model_type": "ensemble" + }], + "timezone": "Europe/Stockholm" +} diff --git a/tests/testthat/testdata/hub-it/hub-config/model-metadata-schema.json b/tests/testthat/testdata/hub-it/hub-config/model-metadata-schema.json new file mode 100644 index 00000000..0d8aa447 --- /dev/null +++ b/tests/testthat/testdata/hub-it/hub-config/model-metadata-schema.json @@ -0,0 +1,113 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Schema for Modeling Hub model metadata", + "description": "This is the schema for hub model metadata files, please refer to https://github.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/wiki/Metadata for more information.", + "type": "object", + "properties": { + "team_name": { + "description": "The name of the team submitting the model", + "type": "string", + "maxLength": 50 + }, + "team_abbr": { + "description": "Abbreviated name of the team submitting the model", + "type": "string", + "pattern": "^[a-zA-Z0-9_+]+$", + "maxLength": 16 + }, + "model_name": { + "description": "The name of the model", + "type": "string", + "maxLength": 50 + }, + "model_abbr": { + "description": "Abbreviated name of the model", + "type": "string", + "pattern": "^[a-zA-Z0-9_+]+$", + "maxLength": 16 + }, + "model_contributors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "affiliation": { + "type": "string" + }, + "orcid": { + "type": "string", + "pattern": "^\\d{4}\\-\\d{4}\\-\\d{4}\\-[\\dX]{4}$" + }, + "email": { + "type": "string", + "format": "email" + }, + "twitter": { + "type": "string" + }, + "additionalProperties": false + } + } + }, + "team_model_designation": { + "type": "string", + "enum": ["primary", "secondary", "proposed", "other"] + }, + "model_version": { + "description": "Identifier of the version of the model", + "type": "string" + }, + "methods": { + "description": "A brief (200 char.) description of the methods used by this model", + "type": "string", + "maxLength": 200 + }, + "license": { + "description": "License for use of model output data", + "type": "string", + "enum": [ + "CC0-1.0", + "CC-BY-4.0", + "CC-BY_SA-4.0", + "PPDL", + "ODC-by", + "ODbL", + "OGL-3.0" + ] + }, + "team_funding": { + "description": "Any information about funding source for the team or members of the team.", + "type": "string" + }, + "website_url": { + "description": "Public facing website for the model", + "type": "string", + "format": "uri" + }, + "data_inputs": { + "description": "List or description of data inputs used by the model", + "type": "string" + }, + "citation": { + "description": "One or more citations for this model", + "type": "string" + }, + "methods_long": { + "description": "A full description of the methods used by this model.", + "type": "string" + } + }, + "additionalProperties": true, + "required": [ + "team_name", + "model_name", + "team_abbr", + "model_abbr", + "model_contributors", + "team_model_designation", + "methods" + ] +} diff --git a/tests/testthat/testdata/hub-it/hub-config/tasks.json b/tests/testthat/testdata/hub-it/hub-config/tasks.json new file mode 100644 index 00000000..7231aa88 --- /dev/null +++ b/tests/testthat/testdata/hub-it/hub-config/tasks.json @@ -0,0 +1,246 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json", + "rounds": [ + { + "round_id_from_variable": true, + "round_id": "origin_date", + "model_tasks": [ + { + "task_ids": { + "origin_date": { + "required": null, + "optional": [ + "2023-10-07", "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11" + ] + }, + "target": { + "required": ["ILI incidence"], + "optional": null + }, + "target_end_date": { + "required": null, + "optional": [ + "2023-09-23", "2023-09-30", "2023-10-07", + "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11", "2024-05-18", + "2024-05-25", "2024-06-01" + ] + }, + "horizon": { + "required": null, + "optional": [1, 2, 3, 4] + }, + "location": { + "required": null, + "optional": [ + "AT", + "BE", + "BG", + "CH", + "CY", + "CZ", + "DE", + "DK", + "EE", + "ES", + "FI", + "FR", + "UK", + "GR", + "HR", + "HU", + "IE", + "IS", + "IT", + "LI", + "LT", + "LU", + "LV", + "MT", + "NL", + "NO", + "PL", + "PT", + "RO", + "SE", + "SI", + "SK" + ] + } + }, + "output_type": { + "quantile": { + "output_type_id": { + "required": [ + 0.010, + 0.025, + 0.050, + 0.100, + 0.150, + 0.200, + 0.250, + 0.300, + 0.350, + 0.400, + 0.450, + 0.500, + 0.550, + 0.600, + 0.650, + 0.700, + 0.750, + 0.800, + 0.850, + 0.900, + 0.950, + 0.975, + 0.990 + ], + "optional": null + }, + "value": { + "type": "double", + "minimum": 0 + } + } + }, + "target_metadata": [ + { + "target_id": "ILI incidence", + "target_name": "Weekly incidence for Influenza like illness", + "target_units": "cases per 100,000 population", + "target_keys": { + "target": "ILI incidence" + }, + "description": "This target represents the count of new ILI cases per 100,000 in the week ending on the date [horizon] weeks after the reference_date", + "target_type": "continuous", + "is_step_ahead": true, + "time_unit": "week" + } + ] + }, + { + "task_ids": { + "origin_date": { + "required": null, + "optional": [ + "2023-10-07", "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11" + ] + }, + "target": { + "required": ["ILI incidence median"], + "optional": null + }, + "target_end_date": { + "required": null, + "optional": [ + "2023-09-23", "2023-09-30", "2023-10-07", + "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11", "2024-05-18", + "2024-05-25", "2024-06-01" + ] + }, + "horizon": { + "required": null, + "optional": [1, 2, 3, 4] + }, + "location": { + "required": null, + "optional": [ + "AT", + "BE", + "BG", + "CH", + "CY", + "CZ", + "DE", + "DK", + "EE", + "ES", + "FI", + "FR", + "UK", + "GR", + "HR", + "HU", + "IE", + "IS", + "IT", + "LI", + "LT", + "LU", + "LV", + "MT", + "NL", + "NO", + "PL", + "PT", + "RO", + "SE", + "SI", + "SK" + ] + } + }, + "output_type": { + "median": { + "output_type_id": { + "required": null, + "optional": ["NA"] + }, + "value": { + "type": "double", + "minimum": 0 + } + } + }, + "target_metadata": [ + { + "target_id": "ILI incidence median", + "target_name": "Weekly incidence for Influenza like illness", + "target_units": "cases per 100,000 population", + "target_keys": { + "target": "ILI incidence median" + }, + "description": "This target represents the count of new ILI cases per 100,000 in the week ending on the date [horizon] weeks after the reference_date", + "target_type": "continuous", + "is_step_ahead": true, + "time_unit": "week" + } + ] + } + ], + "submissions_due": { + "relative_to": "origin_date", + "start": -6, + "end": 0 + } + } + ] +} diff --git a/tests/testthat/testdata/hub-it/model-metadata/Tm-Md.yml b/tests/testthat/testdata/hub-it/model-metadata/Tm-Md.yml new file mode 100644 index 00000000..6660fdb2 --- /dev/null +++ b/tests/testthat/testdata/hub-it/model-metadata/Tm-Md.yml @@ -0,0 +1,14 @@ +team_name: Tm +model_name: Md +team_abbr: Tm +model_abbr: Md +model_contributors: + - name: Contributor 1 + affiliation: Affiliation 1 + email: user1@example.com + - name: Contributor 2 + affiliation: Affiliation 2 + email: user2@example.com +team_model_designation: primary +methods: A simple SIR model. +license: CC-BY-4.0 diff --git a/tests/testthat/testdata/hub-it/model-output/README.md b/tests/testthat/testdata/hub-it/model-output/README.md new file mode 100644 index 00000000..43146a7f --- /dev/null +++ b/tests/testthat/testdata/hub-it/model-output/README.md @@ -0,0 +1,3 @@ +# Model outputs folder + +This folder should contain a set of subdirectories, one for each model, that contains submitted model output files for that model. The structure of the directories and their contents should follow [the model output guidelines in our documentation](https://hubdocs.readthedocs.io/en/latest/format/model-output.html). diff --git a/tests/testthat/testdata/hub-it/model-output/Tm-Md/2023-11-04-Tm-Md.csv b/tests/testthat/testdata/hub-it/model-output/Tm-Md/2023-11-04-Tm-Md.csv new file mode 100644 index 00000000..c508bd26 --- /dev/null +++ b/tests/testthat/testdata/hub-it/model-output/Tm-Md/2023-11-04-Tm-Md.csv @@ -0,0 +1,289 @@ +origin_date,target,target_end_date,horizon,location,output_type,output_type_id,value +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.250,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.300,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.350,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.400,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.450,0.0 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.500,0.029357258968932362 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.550,1.1028429277464646 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.600,1.9897653596461344 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.650,3.195233137566309 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.700,4.091944600108246 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.750,5.066574962855521 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.800,6.091125868172943 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.850,7.530892678941803 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.900,9.216293392980175 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.950,11.718109389348063 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.975,13.588703326238031 +2023-11-04,ILI incidence,2023-11-11,1,IT,quantile,0.990,15.821737313936694 +2023-11-04,ILI incidence median,2023-11-11,1,IT,median,,0.029357258968932362 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.250,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.300,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.350,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.400,0.0 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.450,0.36478785327876123 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.500,1.3821681703818822 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.550,2.2663641769030005 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.600,3.058890361375467 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.650,4.00825453112969 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.700,5.1290251233874535 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.750,6.242572299079935 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.800,7.183467594478021 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.850,8.118591542840528 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.900,10.165970786367879 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.950,12.606152162278894 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.975,14.745793405653766 +2023-11-04,ILI incidence,2023-11-18,2,IT,quantile,0.990,17.13058699664763 +2023-11-04,ILI incidence median,2023-11-18,2,IT,median,,1.3821681703818822 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.250,0.8872104479681555 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.300,1.6313249059859454 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.350,2.5456615136957064 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.400,3.4425795368240752 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.450,4.291804120733905 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.500,5.122966398671371 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.550,5.789914494998996 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.600,6.841075235545624 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.650,7.994582572009458 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.700,8.816716918156281 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.750,9.79475722980808 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.800,10.948294168061963 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.850,12.087070615443702 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.900,13.942553787152306 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.950,16.792213920563935 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.975,18.410187588229373 +2023-11-04,ILI incidence,2023-11-25,3,IT,quantile,0.990,20.58749002211248 +2023-11-04,ILI incidence median,2023-11-25,3,IT,median,,5.122966398671371 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.050,1.6403576700586624 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.100,4.392918187290246 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.150,5.759214792065849 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.200,7.095353655213531 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.250,8.61951975834146 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.300,9.782587866397227 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.350,10.627494197815544 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.400,11.584024967230706 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.450,12.479959443270397 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.500,13.368567978290859 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.550,14.103381249987258 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.600,15.0161992888687 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.650,15.67165221034719 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.700,16.532875046478374 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.750,17.703354338429325 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.800,18.963765034705066 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.850,20.288260303412578 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.900,22.15983617915686 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.950,24.759813539474603 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.975,26.60325290383086 +2023-11-04,ILI incidence,2023-12-02,4,IT,quantile,0.990,29.0118799816005 +2023-11-04,ILI incidence median,2023-12-02,4,IT,median,,13.368567978290859 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.250,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.300,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.350,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.400,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.450,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.500,0.0 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.550,0.5513689809674843 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.600,1.7595035896304996 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.650,2.5933343640103974 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.700,3.891651690257573 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.750,4.98639068128074 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.800,6.0547599431715255 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.850,7.210155109622276 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.900,9.538909314325496 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.950,12.18751515884576 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.975,14.527339153137785 +2023-11-04,ILI incidence,2023-11-11,1,FR,quantile,0.990,16.56587874801082 +2023-11-04,ILI incidence median,2023-11-11,1,FR,median,,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.250,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.300,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.350,0.0 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.400,0.3191145517655898 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.450,1.2514087079722875 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.500,2.1315825915097126 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.550,3.1084959622694566 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.600,4.043892246032586 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.650,5.093639138317161 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.700,6.053687778585977 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.750,7.0654037099771445 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.800,8.319885062218265 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.850,10.043139823690527 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.900,11.803107616507221 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.950,14.327115817358882 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.975,15.993648946073941 +2023-11-04,ILI incidence,2023-11-18,2,FR,quantile,0.990,18.9323698647802 +2023-11-04,ILI incidence median,2023-11-18,2,FR,median,,2.1315825915097126 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.150,0.3568275008502194 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.200,1.7610435066274328 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.250,2.697399278548078 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.300,3.880722119019967 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.350,4.869321511818473 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.400,5.783964359050683 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.450,6.520158928382167 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.500,7.263954773043887 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.550,7.966179129747899 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.600,8.789156515441624 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.650,9.625124998754535 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.700,10.549485442252655 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.750,11.538300846143915 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.800,12.823697648810217 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.850,14.066687401204867 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.900,15.731463368587924 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.950,18.445595933037872 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.975,19.959166485419498 +2023-11-04,ILI incidence,2023-11-25,3,FR,quantile,0.990,22.32436829439133 +2023-11-04,ILI incidence median,2023-11-25,3,FR,median,,7.263954773043887 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.025,0.9878162707850902 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.050,3.372428514256848 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.100,5.511273581791094 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.150,7.122779848395153 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.200,9.002638720045551 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.250,10.171568397428484 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.300,11.263204501908756 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.350,12.243747213351726 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.400,13.185400793117871 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.450,13.839419557279172 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.500,15.018539279101738 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.550,15.881851065281284 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.600,16.754082047217818 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.650,17.518762357050136 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.700,18.591216563411074 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.750,19.770972796764973 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.800,21.024745566047336 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.850,22.45878865303707 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.900,23.911884770834845 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.950,26.622900672196497 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.975,28.776949595905126 +2023-11-04,ILI incidence,2023-12-02,4,FR,quantile,0.990,31.284213624791317 +2023-11-04,ILI incidence median,2023-12-02,4,FR,median,,15.018539279101738 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.250,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.300,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.350,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.400,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.450,0.0 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.500,0.3171374687037829 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.550,0.9878582851588306 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.600,1.871438880276944 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.650,2.629902392380684 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.700,3.637362289136109 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.750,4.676889582693112 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.800,5.77391169370684 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.850,7.3334751400420215 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.900,9.082664779232045 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.950,11.854470001034825 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.975,13.915645070164647 +2023-11-04,ILI incidence,2023-11-11,1,ES,quantile,0.990,15.27160420686785 +2023-11-04,ILI incidence median,2023-11-11,1,ES,median,,0.3171374687037829 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.010,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.025,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.050,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.100,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.150,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.200,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.250,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.300,0.0 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.350,0.3419882266994862 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.400,1.2430372814006276 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.450,2.066471829969892 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.500,2.8820539300691927 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.550,3.791425526369985 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.600,4.705801810994205 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.650,5.732698757510665 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.700,6.852471246454816 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.750,7.774491647381478 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.800,8.880888601414934 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.850,10.08775430958972 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.900,11.708896853165676 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.950,14.733093322679881 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.975,16.874231193596305 +2023-11-04,ILI incidence,2023-11-18,2,ES,quantile,0.990,18.85917795420126 +2023-11-04,ILI incidence median,2023-11-18,2,ES,median,,2.8820539300691927 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.010,0.7663022266147527 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.025,3.5146811784017733 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.050,5.890324954795662 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.100,9.117723837967564 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.150,10.561875382580412 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.200,11.789381006811906 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.250,12.999993953034938 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.300,13.933908915028539 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.350,14.898090853468652 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.400,15.736821420734254 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.450,16.668837294134466 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.500,17.6224903592929 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.550,18.396208026668013 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.600,19.256065297425273 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.650,20.350679238908498 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.700,21.3694346042684 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.750,22.31138130465997 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.800,23.46726583725081 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.850,24.92219427546655 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.900,26.926606264663207 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.950,28.986617622474046 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.975,30.951266794264235 +2023-11-04,ILI incidence,2023-11-25,3,ES,quantile,0.990,32.509746721793675 +2023-11-04,ILI incidence median,2023-11-25,3,ES,median,,17.6224903592929 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.010,27.57893031775253 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.025,30.622152097852883 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.050,32.56009913180381 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.100,34.698449274759575 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.150,36.22012812501539 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.200,37.87081452109703 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.250,39.00899819181687 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.300,40.16441445216445 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.350,41.00136798745539 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.400,41.96912051328987 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.450,42.91590097697498 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.500,43.972536380212475 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.550,44.646231372292426 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.600,45.36868379542609 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.650,46.218267603630366 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.700,47.26922240351603 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.750,48.24713233798704 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.800,49.67872675846686 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.850,50.78665848515012 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.900,52.828492442105876 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.950,55.21238164918899 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.975,57.021246206153364 +2023-11-04,ILI incidence,2023-12-02,4,ES,quantile,0.990,60.241883984190714 +2023-11-04,ILI incidence median,2023-12-02,4,ES,median,,43.972536380212475 diff --git a/tests/testthat/testdata/hub-num/hub-config/admin.json b/tests/testthat/testdata/hub-num/hub-config/admin.json new file mode 100644 index 00000000..9dc93e50 --- /dev/null +++ b/tests/testthat/testdata/hub-num/hub-config/admin.json @@ -0,0 +1,14 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/admin-schema.json", + "name": "US CDC FluSight", + "maintainer": "US CDC", + "contact": { + "name": "Hub Admin Name", + "email": "hub-admin-email@example.com" + }, + "repository_host": "GitHub", + "repository_url": "https://github.com/cdcepi/FluSight-forecast-hub", + "file_format": ["csv"], + "timezone": "US/Eastern", + "model_output_dir": "model-output" +} diff --git a/tests/testthat/testdata/hub-num/hub-config/tasks.json b/tests/testthat/testdata/hub-num/hub-config/tasks.json new file mode 100644 index 00000000..b0e0c2d7 --- /dev/null +++ b/tests/testthat/testdata/hub-num/hub-config/tasks.json @@ -0,0 +1,166 @@ +{ + "schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v2.0.0/tasks-schema.json", + "rounds": [ + { + "round_id_from_variable": true, + "round_id": "reference_date", + "model_tasks": [ + { + "task_ids": { + "reference_date": { + "required": null, + "optional": [ + "2023-10-07", "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11" + ] + }, + "target": { + "required": null, + "optional": ["wk inc flu hosp"] + }, + "horizon": { + "required": null, + "optional": [-1, 0, 1, 2, 3] + }, + "location": { + "required": null, + "optional": [ + "US", + "01", + "02", + "04", + "05", + "06", + "08", + "09", + "10", + "11", + "12", + "13", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "53", + "54", + "55", + "56", + "72" + ] + }, + "target_end_date": { + "required": null, + "optional": [ + "2023-09-23", "2023-09-30", "2023-10-07", + "2023-10-14", "2023-10-21", "2023-10-28", "2023-11-04", + "2023-11-11", "2023-11-18", "2023-11-25", "2023-12-02", + "2023-12-09", "2023-12-16", "2023-12-23", "2023-12-30", + "2024-01-06", "2024-01-13", "2024-01-20", "2024-01-27", + "2024-02-03", "2024-02-10", "2024-02-17", "2024-02-24", + "2024-03-02", "2024-03-09", "2024-03-16", "2024-03-23", + "2024-03-30", "2024-04-06", "2024-04-13", "2024-04-20", + "2024-04-27", "2024-05-04", "2024-05-11", "2024-05-18", + "2024-05-25", "2024-06-01" + ] + } + }, + "output_type": { + "quantile": { + "output_type_id": { + "required": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.25, + 0.3, + 0.35, + 0.4, + 0.45, + 0.5, + 0.55, + 0.6, + 0.65, + 0.7, + 0.75, + 0.8, + 0.85, + 0.9, + 0.95, + 0.975, + 0.99 + ], + "optional": null + }, + "value": { + "type": "double", + "minimum": 0 + } + } + }, + "target_metadata": [ + { + "target_id": "wk inc flu hosp", + "target_name": "incident influenza hospitalizations", + "target_units": "count", + "target_keys": { + "target": [ + "wk inc flu hosp" + ] + }, + "target_type": "continuous", + "description": "This target represents the count of new hospitalizations in the week ending on the date [horizon] weeks after the reference_date, on the target_end_date.", + "is_step_ahead": true, + "time_unit": "week" + } + ] + } + ], + "submissions_due": { + "relative_to": "reference_date", + "start": -6, + "end": -3 + } + } + ] +} diff --git a/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-10-28-UMass-gbq.csv b/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-10-28-UMass-gbq.csv new file mode 100644 index 00000000..eb3ce681 --- /dev/null +++ b/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-10-28-UMass-gbq.csv @@ -0,0 +1,4 @@ +reference_date,horizon,target,location,target_end_date,output_type,output_type_id,value +2023-10-28,0,wk inc flu hosp,1,2023-10-28,quantile,0.1000000000000000000000,4 +2023-10-28,0,wk inc flu hosp,1,2023-10-28,quantile,0.150,5 +2023-10-28,0,wk inc flu hosp,1,2023-10-28,quantile,0.2,6 \ No newline at end of file diff --git a/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-11-04-UMass-gbq.csv b/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-11-04-UMass-gbq.csv new file mode 100644 index 00000000..4ec6d503 --- /dev/null +++ b/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-11-04-UMass-gbq.csv @@ -0,0 +1,2 @@ +"reference_date","horizon","target","location","target_end_date","output_type","output_type_id","value" +2023-11-04,0,"wk inc flu hosp","01",2023-11-04,"quantile",0.1000000000000000055511,4 diff --git a/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-11-11-UMass-gbq.csv b/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-11-11-UMass-gbq.csv new file mode 100644 index 00000000..23367040 --- /dev/null +++ b/tests/testthat/testdata/hub-num/model-output/UMass-gbq/2023-11-11-UMass-gbq.csv @@ -0,0 +1,2 @@ +"reference_date","horizon","target","location","target_end_date","output_type","output_type_id","value" +2023-11-11,0,"wk inc flu hosp","01",2023-11-11,"quantile",0.09999999999999997779554,4