diff --git a/R/dist.R b/R/dist.R index 1b8ca80ad..03fb7d555 100644 --- a/R/dist.R +++ b/R/dist.R @@ -87,41 +87,56 @@ dist_skel <- function(n, dist = FALSE, cum = TRUE, model, if (model == "exp") { # define support functions for exponential dist rdist <- function(n) { - rexp(n, params$rate) + rexp(n, params[["rate"]]) } pdist <- function(n) { - pexp(n, params$rate) / pexp(max_value, params$rate) + pexp(n, params[["rate"]]) / pexp(max_value, params[["rate"]]) } ddist <- function(n) { - (pexp(n + 1, params$rate) - - pexp(n, params$rate)) / - pexp(max_value + 1, params$rate) + (pexp(n + 1, params[["rate"]]) - + pexp(n, params[["rate"]])) / + pexp(max_value + 1, params[["rate"]]) } } else if (model == "gamma") { rdist <- function(n) { - rgamma(n, params$shape, params$scale) + rgamma(n = n, shape = params[["shape"]], rate = params[["rate"]]) } pdist <- function(n) { - pgamma(n, params$shape, params$scale) / - pgamma(max_value + 1, params$shape, params$scale) + pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]]) / + pgamma( + q = max_value + 1, shape = params[["shape"]], rate = params[["rate"]] + ) } ddist <- function(n) { - (pgamma(n + 1, params$shape, params$scale) - - pgamma(n, params$shape, params$scale)) / - pgamma(max_value + 1, params$shape, params$scale) + (pgamma(q = n + 1, shape = params[["shape"]], rate = params[["rate"]]) - + pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]])) / + pgamma(q = max_value + 1, params[["shape"]], rate = params[["rate"]]) } } else if (model == "lognormal") { rdist <- function(n) { - rlnorm(n, params$mean, params$sd) + rlnorm(n, params[["mean"]], params[["sd"]]) + } + pdist <- function(n) { + plnorm(n, params[["mean"]], params[["sd"]]) / + plnorm(max_value + 1, params[["mean"]], params[["sd"]]) + } + ddist <- function(n) { + (plnorm(n + 1, params[["mean"]], params[["sd"]]) - + plnorm(n, params[["mean"]], params[["sd"]])) / + plnorm(max_value + 1, params[["mean"]], params[["sd"]]) + } + } else if (model %in% "normal") { + rdist <- function(n) { + rnorm(n, params[["mean"]], params[["sd"]]) } pdist <- function(n) { - plnorm(n, params$mean, params$sd) / - plnorm(max_value + 1, params$mean, params$sd) + pnorm(n, params[["mean"]], params[["sd"]]) / + pnorm(max_value + 1, params[["mean"]], params[["sd"]]) } ddist <- function(n) { - (plnorm(n + 1, params$mean, params$sd) - - plnorm(n, params$mean, params$sd)) / - plnorm(max_value + 1, params$mean, params$sd) + (pnorm(n + 1, params[["mean"]], params[["sd"]]) - + pnorm(n, params[["mean"]], params[["sd"]])) / + pnorm(max_value + 1, params[["mean"]], params[["sd"]]) } } @@ -542,7 +557,6 @@ bootstrapped_dist_fit <- function(values, dist = "lognormal", return(out) } - if (bootstraps == 1) { dist_samples <- get_single_dist(values, samples = samples) } else { @@ -893,7 +907,7 @@ tune_inv_gamma <- function(lower = 2, upper = 21) { #' @author Sebastian Funk #' @author Sam Abbott #' @importFrom rlang warn -#' @export +#' @keywords internal #' @examples #' # A fixed lognormal distribution with mean 5 and sd 1. #' dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") @@ -903,9 +917,12 @@ tune_inv_gamma <- function(lower = 2, upper = 21) { #' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, #' distribution = "gamma" #' ) -dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0, - distribution = c("lognormal", "gamma"), max, - pmf = numeric(0), fixed = FALSE) { +dist_spec <- function(distribution = c( + "lognormal", "normal", "gamma", "fixed", "empty" + ), + params_mean = c(), params_sd = c(), + mean, sd, mean_sd = 0, sd_sd = 0, + max = Inf, pmf = numeric(0), fixed = FALSE) { ## deprecate previous behaviour warn( message = paste( @@ -926,98 +943,87 @@ dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0, "fix_dist()", "The argument will be removed completely in version 2.1.0." ) - mean_sd <- 0 - sd_sd <- 0 + } + ## check for deprecated parameters + if (!all(missing(mean), missing(sd), missing(mean_sd), missing(sd_sd)) && + (!missing(params_mean || !missing(params_sd)))) { + stop("Distributional should not be given as `mean`, `sd`, etc. ", + "in addition to `params_mean` or `params_sd`") + } + call <- match.call() + for (deprecated_arg in c("mean", "sd", "mean_sd", "sd_sd")) { + if (!is.null(call[[deprecated_arg]])) { + deprecate_warn( + "2.0.0", + paste0("dist_spec(", deprecated_arg, ")"), + "dist_spec(param)", + "The argument will be removed completely in version 2.1.0." + ) + params[[deprecated_arg]] <- get(deprecated_arg) + } } ## check if parametric or nonparametric if (length(pmf) > 0 && !all( - missing(mean), missing(sd), missing(mean_sd), - missing(sd_sd), missing(distribution), missing(max) + missing(distribution), missing(params_mean), missing(params_sd), + missing(mean), missing(sd), missing(mean_sd), missing(sd_sd) )) { stop("Distributional parameters or a pmf can be specified, but not both.") } - fixed <- mean_sd == 0 && sd_sd == 0 + distribution <- match.arg(distribution) - ## check parametric parameters make sense - if (!missing(mean)) { - if (sd == 0 && sd_sd == 0) { ## integer fixed - if (mean %% 1 != 0) { - stop( - "When a distribution is set to a constant ", - "(sd == 0 and sd_sd == 0) then the mean parameter ", - "must be an integer." - ) - } - max <- mean - if (mean_sd > 0) { - stop( - "When a distribution has sd == 0 and ", - "sd_sd == 0 then mean_sd must be 0, too." - ) - } + if (distribution == "fixed") { + ## if integer fixed then can write the PMF + if (as.integer(params_mean) == params_mean) { + max <- params_mean + parametric <- TRUE } else { - if (missing(max)) { - stop("Maximum of parametric distributions must be specified.") - } + parametric <- FALSE + } + if (length(params_sd) > 0 && any(params_sd) > 0) { + stop("Fixed parameters cannot have a nonzero standard deviation.") } + params_sd <- numeric(0) } else { - if (!all( - missing(sd), missing(mean_sd), - missing(sd_sd), missing(distribution), missing(max) - )) { - stop( - "If any distributional parameters are given then so must the mean." - ) + ## if PMF is given, set max + if (is.infinite(max) && length(pmf) > 0) { + max <- length(pmf) - 1 } + parametric <- all(params_sd == 0) && is.finite(max) } - - distribution <- match.arg(distribution) - if (fixed) { + if (parametric) { ## calculate pmf ret <- list( - mean_mean = numeric(0), - mean_sd = numeric(0), - sd_mean = numeric(0), - sd_sd = numeric(0), + params_mean = numeric(0), + params_sd = numeric(0), dist = character(0), - max = integer(0) + max = integer(0), + parametric = FALSE ) if (length(pmf) == 0) { - if (missing(mean)) { ## empty + if (distribution == "empty") { ## empty ret <- c(ret, list( n = 0, n_p = 0, n_np = 0, np_pmf = numeric(0), - fixed = integer(0) + parametric = logical(0) )) } else { ## parametric fixed - if (sd == 0) { ## delta - pmf <- c(rep(0, mean), 1) + if (distribution == "fixed") { ## delta + pmf <- c(rep(0, params_mean), 1) } else { - if (distribution == "lognormal") { - params <- lognorm_dist_def( - mean = mean, mean_sd = mean_sd, - sd = sd, sd_sd = sd_sd, max_value = max, samples = 1 - ) - } else if (distribution == "gamma") { - params <- gamma_dist_def( - mean = mean, mean_sd = mean_sd, - sd = sd, sd_sd = sd_sd, max_value = max, samples = 1 - ) - } + params <- params_mean + names(params) <- natural_params(distribution) pmf <- dist_skel( n = seq_len(max + 1) - 1, dist = TRUE, cum = FALSE, - model = distribution, params = params$params[[1]], max_value = max, + model = distribution, params = params, max_value = max, discrete = TRUE ) } } } else { ## nonparametric fixed - if (!missing(max) && (max + 1) < length(pmf)) { - pmf <- pmf[1:(max + 1)] - } + pmf <- pmf[1:(max + 1)] pmf <- pmf / sum(pmf) } if (length(pmf) > 0) { @@ -1025,28 +1031,26 @@ dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0, n = 1, n_p = 0, n_np = 1, - np_pmf = pmf, - fixed = 1L + np_pmf = pmf )) } } else { ret <- list( - mean_mean = mean, - mean_sd = mean_sd, - sd_mean = sd, - sd_sd = sd_sd, + params_mean = params_mean, + params_sd = params_sd, dist = distribution, max = max, n = 1, n_p = 1, n_np = 0, np_pmf = numeric(0), - fixed = 0L + parametric = TRUE ) } ret <- purrr::map(ret, array) sum_args <- grep("(^n$|^n_$)", names(ret)) ret$np_pmf_length <- length(ret$np_pmf) + ret$params_length <- length(ret$params_mean) ret[sum_args] <- purrr::map(ret[sum_args], sum) attr(ret, "class") <- c("list", "dist_spec") return(ret) @@ -1198,26 +1202,88 @@ c.dist_spec <- function(...) { #' # The mean of the sum of two distributions #' mean(lognormal + gamma) mean.dist_spec <- function(x, ...) { - ret <- rep(0, x$n) - ## nonparametric + if (x$n > 1) { + stop("Cannot calculate mean of composite distributions.") + } if (x$n_np > 0) { - init_id <- c(1, head(cumsum(x$np_pmf_length) + 1, n = -1)) - ret[x$fixed == 1L] <- vapply(seq_along(init_id), function(id) { - pmf <- x$np_pmf[seq(init_id[id], cumsum(x$np_pmf_length)[id])] - return(sum((seq_len(x$np_pmf_length[id]) - 1) * pmf)) - }, 0) + ## nonparametric + ret <- sum((seq_len(x$np_pmf_length) - 1) * x$np_pmf) + } else { + ## parametric + if (any(x$params_sd > 0)) { + stop("Cannot calculate standard deviation of uncertain distribution") + } + if (x$dist == "lognormal") { + ret <- exp(x$params_mean[[1]] + x$params_mean[[2]]**2 / 2) + } else if (x$dist == "gamma") { + ret <- x$params_mean[[1]] / x$params_mean[[2]] + } else if (x$dist == "normal") { + ret <- x$params_mean[[1]] + } else if (x$dist == "fixed") { + ret <- x$params_mean[[1]] + } else { + stop("Don't know how to calculate mean of ", x$dist, " distribution.") + } } - ## parametric - if (x$n_p > 0) { - ret[x$fixed == 0L] <- vapply(seq_along(which(x$fixed == 0L)), function(id) { - if (x$dist[id] == "lognormal") { - return(exp(x$mean_mean[id] + x$sd_mean[id] / 2)) - } else if (x$dist[id] == "gamma") { - return(x$mean_mean[id]) - } else { - stop("Unknown distribution: ", x$dist[id]) - } - }, 0) + return(ret) +} + +##' Returns the standard deviation of one or more delay distribution +##' +##' This works out the standard deviation of all the (parametric / +##' nonparametric) delay distributions combined in the passed [dist_spec()]. +##' +##' @param x The [dist_spec()] to use +##' @param ... Not used +##' @return A vector of standard deviations. +##' @author Sebastian Funk +##' @method sd dist_spec +##' @importFrom utils head +##' @export +#' @examples +#' # A fixed lognormal distribution with sd 5 and sd 1. +#' lognormal <- dist_spec( +#' sd = 5, sd = 1, max = 20, distribution = "lognormal" +#' ) +#' sd(lognormal) +#' +#' # An uncertain gamma distribution with sd 3 and sd 2 +#' gamma <- dist_spec( +#' sd = 3, sd = 2, sd_sd = 0.5, sd_sd = 0.5, max = 20, +#' distribution = "gamma" +#' ) +#' sd(gamma) +#' +#' # The sd of the sum of two distributions +#' sd(lognormal + gamma) +sd.dist_spec <- function(x, ...) { + if (x$n > 1) { + stop("Cannot calculate standard deviation of composite distributions.") + } + if (x$n_np > 0) { + ## nonparametric + mean_pmf <- sum((seq_len(x$np_pmf_length) - 1) * x$np_pmf) + ret <- sum((seq_len(x$np_pmf_length) - 1)**2 * x$np_pmf) - mean_pmf^2 + } else { + ## parametric + if (any(x$params_sd > 0)) { + stop("Cannot calculate standard deviation of uncertain distribution") + } + if (x$dist == "lognormal") { + ret <- sqrt(exp(x$params_mean[2]**2) - 1) * + exp(x$params_mean[1] + 0.5 * x$params_mean[2]**2) + } else if (x$dist == "gamma") { + ret <- sqrt(x$params_mean[1] / x$params_mean[2]**2) + } else if (x$dist == "normal") { + ret <- x$params_mean[2] + } else if (x$dist == "fixed") { + ret <- 0 + } else { + stop( + "Don't know how to calculate standard deviation of ", x$dist, + " distribution." + ) + } } return(ret) } @@ -1251,42 +1317,63 @@ print.dist_spec <- function(x, ...) { cat("Empty `dist_spec` distribution.\n") return(invisible(NULL)) } else if (x$n > 1) { - cat("Combination of delay distributions:\n") + cat("Composite delay distribution:\n") } fixed_id <- 1 fixed_pos <- 1 variable_id <- 1 + variable_pos <- 1 for (i in 1:x$n) { cat(" ") if (!is.null(x$names) && nchar(x$names[i]) > 0) { cat(x$names[i], ": ", sep = "") } - if (x$fixed[i] == 0) { + if (x$parametric[i] > 0) { dist <- x$dist[variable_id] - cat( - "Uncertain ", dist, " distribution with (untruncated) ", - ifelse(dist == "lognormal", "log", ""), - "mean ", signif(x$mean_mean[variable_id], digits = 2), - " (SD ", signif(x$mean_sd[variable_id], digits = 2), ") and ", - ifelse(dist == "lognormal", "log", ""), - "SD ", signif(x$sd_mean[variable_id], 2), - " (SD ", signif(x$sd_sd[variable_id], 2), ")\n", - sep = "" - ) + cat(dist, " distribution", sep = "") + if (is.finite(x$max)) { + cat("(max: ", x$max, ")", sep = "") + } + cat(" with ", sep = "") + ## loop over natural parameters and print + for (id in seq(variable_pos, x$params_length[variable_pos])) { + if (id > variable_pos) { + if (id == x$params_length[variable_pos]) { + cat(" and ") + } else { + cat(", ") + } + } + if (x$params_sd[id] > 0) { + cat("uncertain ") + } + cat(natural_params(dist)[id]) + if (x$params_sd[id] > 0) { + cat( + " (mean = ", signif(x$params_mean[id], digits = 2), ", ", + "sd = ", signif(x$params_sd[id], digits = 2), ")", + sep = "" + ) + } else { + cat(" = ", signif(x$params_mean[id], digits = 2), sep = "") + } + } variable_id <- variable_id + 1 + variable_pos <- variable_pos + x$params_length[i] } else { cat( - "Fixed distribution with PMF [", + "distribution with PMF [", paste(signif( x$np_pmf[seq(fixed_pos, fixed_pos + x$np_pmf_length[fixed_id] - 1)], digits = 2 ), collapse = " "), - "]\n", + "]", sep = "" ) fixed_id <- fixed_id + 1 fixed_pos <- fixed_pos + x$np_pmf_length[i] } + cat(".\n") } cat("\n") } @@ -1394,7 +1481,7 @@ plot.dist_spec <- function(x, ...) { ##' @importFrom truncnorm rtruncnorm fix_dist <- function(x, strategy = c("mean", "sample")) { ## if x is fixed already we don't have to do anything - if (x$fixed) return(x) + if (x$mean_sd == 0 && x$sd_sd == 0) return(x) ## match startegy argument to options strategy <- match.arg(strategy) ## apply stragey depending on choice @@ -1424,3 +1511,128 @@ fix_dist <- function(x, strategy = c("mean", "sample")) { } return(x) } + +lognormal <- function(meanlog, sdlog, mean, sd, median, max = Inf) { + params <- as.list(environment()) + lower_bounds <- c(meanlog = -Inf, sdlog = 0, mean = 0, sd = 0, median = 0) + return(process_dist(params, lower_bounds, "lognormal")) +} + +gamma <- function(shape, rate, scale, mean, sd, max = Inf) { + params <- as.list(environment()) + lower_bounds <- c(shape = 0, rate = 0, scale = 0, mean = 0, sd = 0) + return(process_dist(params, lower_bounds, "gamma")) +} + +normal <- function(mean, sd, max = Inf) { + params <- as.list(environment()) + lower_bounds <- c(mean = -Inf, sd = 0) + return(process_dist(params, lower_bounds, "normal")) +} + +fixed <- function(value) { + params <- as.list(environment()) + params <- extract_params(params, "fixed") + if (is(params$value, "dist_spec")) { + return(params) + } else if (is.numeric(params$value)) { + fixed_value <- params$value + } + return(dist_spec(params_mean = fixed_value, distribution = "fixed")) +} + +pmf <- function(x) { + return(dist_spec(pmf = x)) +} + +natural_params <- function(distribution) { + if (distribution == "gamma") { + ret <- c("shape", "rate") + } else if (distribution == "lognormal") { + ret <- c("meanlog", "sdlog") + } else if (distribution == "normal") { + ret <- c("mean", "sd") + } else if (distribution == "fixed") { + ret <- "value" + } + return(ret) +} + +extract_params <- function(params, distribution) { + params <- params[!vapply(params, inherits, "name", FUN.VALUE = TRUE)] + n_params <- length(natural_params(distribution)) + if (length(params) != n_params) { + stop( + "Exactly ", n_params, " parameters of the ", distribution, + " distribution must be specified." + ) + } + return(params) +} + +process_dist <- function(params, lower_bounds, distribution) { + ## process min/max first + max <- params$max + params$max <- NULL + ## extract parameters and convert all to dist_spec + params <- extract_params(params, distribution) + params <- lapply(params, function(x) { + if (is(x, "dist_spec") && x$dist == "normal") { + if (any(x$param_sd > 0)) { + stop( + "Normal distribution indicating uncertainty cannot itself ", + "be uncertain." + ) + } + x + } else if (is.numeric(x)) { + fixed(x) + } else { + stop("Parameter ", x, " must be numeric or normally distributed.") + } + }) + ## sample parameters if they are uncertain + samples <- lapply(names(params), function(x) { + rtruncnorm( + n = 2000, a = lower_bounds[x], + mean = mean.dist_spec(params[[x]]), sd = sd.dist_spec(params[[x]]) + ) + }) + names(samples) <- names(params) + ## generate natural parameters + converted_params <- convert_to_natural(samples, distribution) + + dist <- dist_spec( + distribution = distribution, + params_mean = converted_params$params_mean, + params_sd = converted_params$params_sd, + max = max + ) + + ## now we have a distribution with natural parameters - return dist_spec + return(dist) +} + +convert_to_natural <- function(x, distribution) { + if (distribution == "gamma") { + if ("mean" %in% names(x) && "sd" %in% names(x)) { + x$shape <- x$mean**2 / x$sd**2 + x$rate <- x$shape / x$mean + } else if (!("rate" %in% names(x)) && ("scale" %in% names(x))) { + x$rate <- 1 / x$scale + } + } else if (distribution == "lognormal" && + "mean" %in% names(x) && "sd" %in% names(x)) { + x$meanlog <- convert_to_logmean(x$mean, x$sd) + x$sdlog <- convert_to_logsd(x$mean, x$sd) + } + params <- list( + params_mean = unname(vapply(natural_params(distribution), function(param) { + mean(x[[param]]) + }, numeric(1))), + params_sd = unname(vapply(natural_params(distribution), function(param) { + sd(x[[param]]) + }, numeric(1))) + ) + return(params) +} diff --git a/R/opts.R b/R/opts.R index 8e2101ad5..ae1d1c394 100644 --- a/R/opts.R +++ b/R/opts.R @@ -124,7 +124,7 @@ generation_time_opts <- function(dist = dist_spec(mean = 1), ..., #' #' # Multiple delays (in this case twice the same) #' delay_opts(delay + delay) -delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { +delay_opts <- function(dist = dist_spec("empty"), ..., fixed = FALSE) { dot_options <- list(...) if (!is(dist, "dist_spec")) { ## could be old syntax if (is.list(dist)) { @@ -166,8 +166,9 @@ delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { #' estimate these distributions. #' #' @param dist A delay distribution or series of delay distributions reflecting -#' the truncation generated using [dist_spec()] or [estimate_truncation()]. -#' Default is an empty call to [dist_spec()], i.e. no truncation +#' the truncation generated using [dist_spec()] or [estimate_truncation()]. +#' Default is an call to [dist_spec()], with `distribution = "empty", i.e. no +#' truncation #' @return A list summarising the input truncation distribution. #' #' @author Sam Abbott @@ -180,7 +181,7 @@ delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) { #' #' # truncation dist #' trunc_opts(dist = dist_spec(mean = 3, sd = 2, max = 10)) -trunc_opts <- function(dist = dist_spec()) { +trunc_opts <- function(dist = dist_spec("empty")) { if (!is(dist, "dist_spec")) { if (is.list(dist)) { dist <- do.call(dist_spec, dist) diff --git a/inst/stan/data/delays.stan b/inst/stan/data/delays.stan index 0c624ffbd..7b55a66aa 100644 --- a/inst/stan/data/delays.stan +++ b/inst/stan/data/delays.stan @@ -7,11 +7,17 @@ array[delay_n_p] real delay_sd_sd; // prior sd of sd of delay distribution array[delay_n_p] int delay_max; // maximum delay distribution array[delay_n_p] int delay_dist; // 0 = lognormal; 1 = gamma + int delay_np_pmf_length; // number of nonparametric pmf elements vector[delay_np_pmf_length] delay_np_pmf; // ragged array of fixed PMFs array[delay_n_np + 1] int delay_np_pmf_groups; // links to ragged array - array[delay_n_p] int delay_weight; + int delay_params_length; // number of parameters across all parametric delay distributions + vector[delay_params_length] delay_params_mean; // ragged array of mean parameters for parametric delay distributions + vector[delay_params_length] delay_params_sd; // ragged array of sd of parameters for parametric delay distributions + array[delay_n_p + 1] int delay_params_groups; // links to ragged array + + array[delay_n_p] int delay_weight; // delay weights int delay_types; // number of delay types array[delay_n] int delay_types_p; // whether delay types are parametric array[delay_n] int delay_types_id; // whether delay types are parametric diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index ce0ed932d..d94a4a8ba 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -18,6 +18,6 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) { withr::defer(future::plan("sequential"), teardown_env()) ## process warning once as previous behaviour has been deprecated -empty <- suppressWarnings(dist_spec()) +empty <- suppressWarnings(dist_spec("empty")) diff --git a/tests/testthat/test-dist_spec.R b/tests/testthat/test-dist_spec.R index 00a4343de..9d6ebd225 100644 --- a/tests/testthat/test-dist_spec.R +++ b/tests/testthat/test-dist_spec.R @@ -185,7 +185,7 @@ test_that("print.dist_spec correctly prints the parameters of the uncertain logn }) test_that("print.dist_spec correctly prints the parameters of an empty distribution", { - empty <- dist_spec() + empty <- dist_spec("empty") expect_output(print(empty), "Empty `dist_spec` distribution.") })