From 3c273ba62ab2da75ca72221949f10d1fe6d31716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Tue, 6 Feb 2024 12:41:00 +0200 Subject: [PATCH 1/5] Added randomisation for loo_pit of discrete data. --- R/loo-functions.R | 27 +++++++++++++++++++++++--- tests/testthat/loo_pit.RDS | Bin 64 -> 72 bytes tests/testthat/loo_pit_discrete.RDS | Bin 0 -> 71 bytes tests/testthat/test-default-methods.R | 16 +++++++++++++-- 4 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/loo_pit_discrete.RDS diff --git a/R/loo-functions.R b/R/loo-functions.R index 056ea54..1e30cba 100644 --- a/R/loo-functions.R +++ b/R/loo-functions.R @@ -60,11 +60,32 @@ loo_pit.default <- function(object, y, lw, ...) { # internal ---------------------------------------------------------------- .loo_pit <- function(y, yrep, lw) { - vapply(seq_len(ncol(yrep)), function(j) { - sel <- yrep[, j] <= y[j] - .exp_log_sum_exp(lw[sel, j]) + if (is.null(lw) || !all(is.finite(lw))) { + stop("lw needs to be not null and finite.") + } + pits <- vapply(seq_len(ncol(yrep)), function(j) { + sel_min <- yrep[, j] < y[j] + pit <- .exp_log_sum_exp(lw[sel_min, j]) + sel_sup <- yrep[, j] == y[j] + if (any(sel_sup)) { + # randomized PIT for discrete y (see, e.g., Czado, C., Gneiting, T., + # Held, L.: Predictive model assessment for count data. + # Biometrics 65(4), 1254–1261 (2009).) + pit_sup <- pit + .exp_log_sum_exp(lw[sel_sup, j]) + pit <- stats::runif(1, pit, pit_sup) + } + pit }, FUN.VALUE = 1) + if (any(pits > 1)) { + warning(cat( + "Some PIT values larger than 1! Largest: ", + max(pits), + "\nRounding PIT > 1 to 1." + )) + } + pmin(1, pits) } + .exp_log_sum_exp <- function(x) { m <- suppressWarnings(max(x)) exp(m + log(sum(exp(x - m)))) diff --git a/tests/testthat/loo_pit.RDS b/tests/testthat/loo_pit.RDS index b135a0772c353feebbd93abbc6d8ab1272de390a..878f5409e2712c3de6eba7c3694dcb45b11b8b67 100644 GIT binary patch literal 72 zcmb2|=3oE==I#ec2?+^l35jVb32CfGk`d0%cS>{{cm%%bqm|$mU8lK>!0>b Q=k_jkhWtlX@AQBg0ol74jsO4v diff --git a/tests/testthat/loo_pit_discrete.RDS b/tests/testthat/loo_pit_discrete.RDS new file mode 100644 index 0000000000000000000000000000000000000000..4b8daf58226aedcd820278b72603fedd004e6f41 GIT binary patch literal 71 zcmb2|=3oE==I#ec2?+^l35jVb32CfGk`d0%cS>{{c Date: Tue, 6 Feb 2024 12:51:25 +0200 Subject: [PATCH 2/5] Updated loo_pit change to NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 14be5a6..b425c09 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ Items for next release go here +Extended `loo_pit()` for discrete data. + # rstantools 2.4.0 * Update to match CRAN's patched version by @jgabry in #114 From 75d7999e3ac5ecfd37992e3d9a3774c2b2cdb924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 15 Feb 2024 12:27:11 +0200 Subject: [PATCH 3/5] removed dependency on the matrixStats package, and changed the reference data save format. --- tests/testthat/loo_pit.RDS | Bin 72 -> 65 bytes tests/testthat/loo_pit_discrete.RDS | Bin 71 -> 64 bytes tests/testthat/test-default-methods.R | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/loo_pit.RDS b/tests/testthat/loo_pit.RDS index 878f5409e2712c3de6eba7c3694dcb45b11b8b67..2b9c1fc934ed3b143f201957801a86d580bec295 100644 GIT binary patch literal 65 zcmb2|=3oE==I#ec2?+^F35jVb2}x{5l0O<|-28V+`1{iA@{>3Aul=)fYgg&Qzq;!~ SpFjG3pOGOoyWCh0s1pEBS{z~k literal 72 zcmb2|=3oE==I#ec2?+^l35jVb32CfGk`d0%cS>{{c{{c Date: Thu, 15 Feb 2024 14:12:01 +0200 Subject: [PATCH 4/5] Updated documentation of loo_pit to include refernce for discrete randomisation and comment about fixing random seed. --- R/loo-functions.R | 5 ++++- man-roxygen/reference-randomised-pit.R | 5 +++++ man/loo-prediction.Rd | 10 ++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 man-roxygen/reference-randomised-pit.R diff --git a/R/loo-functions.R b/R/loo-functions.R index 1e30cba..184dd51 100644 --- a/R/loo-functions.R +++ b/R/loo-functions.R @@ -10,12 +10,15 @@ #' @return `loo_predict()`, `loo_linpred()`, and `loo_pit()` #' (probability integral transform) methods should return a vector with length #' equal to the number of observations in the data. +#' For discrete observations, probability integral transform is randomised to +#' ensure theoretical uniformity. Fix random seed for reproducible results +#' with discrete data. For more details, see Czado et al. (2009). #' `loo_predictive_interval()` methods should return a two-column matrix #' formatted in the same way as for [predictive_interval()]. #' #' @template seealso-rstanarm-pkg #' @template seealso-vignettes -#' +#' @template reference-randomised-pit #' @rdname loo-prediction #' @export diff --git a/man-roxygen/reference-randomised-pit.R b/man-roxygen/reference-randomised-pit.R new file mode 100644 index 0000000..8e9a48a --- /dev/null +++ b/man-roxygen/reference-randomised-pit.R @@ -0,0 +1,5 @@ +#' @references Czado, C., Gneiting, T., and Held, L. (2009). +#' Predictive Model Assessment for Count Data. +#' *Biometrics*. 65(4), 1254-1261. +#' doi:10.1111/j.1541-0420.2009.01191.x. +#' Journal version: diff --git a/man/loo-prediction.Rd b/man/loo-prediction.Rd index 9655a59..dda7866 100644 --- a/man/loo-prediction.Rd +++ b/man/loo-prediction.Rd @@ -35,12 +35,22 @@ the same length as the number of columns in the matrix used as \code{object}.} \code{loo_predict()}, \code{loo_linpred()}, and \code{loo_pit()} (probability integral transform) methods should return a vector with length equal to the number of observations in the data. +For discrete observations, probability integral transform is randomised to +ensure theoretical uniformity. Fix random seed for reproducible results +with discrete data. For more details, see Czado et al. (2009). \code{loo_predictive_interval()} methods should return a two-column matrix formatted in the same way as for \code{\link[=predictive_interval]{predictive_interval()}}. } \description{ See the methods in the \pkg{rstanarm} package for examples. } +\references{ +Czado, C., Gneiting, T., and Held, L. (2009). +Predictive Model Assessment for Count Data. +\emph{Biometrics}. 65(4), 1254-1261. +doi:10.1111/j.1541-0420.2009.01191.x. +Journal version: \url{https://doi.org/10.1111/j.1541-0420.2009.01191.x} +} \seealso{ \itemize{ \item The \pkg{rstanarm} package (\href{https://mc-stan.org/rstanarm/}{mc-stan.org/rstanarm}) From 46b08c9cad9a88b1f9fd0a226eaebe06794d0335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 15 Feb 2024 14:14:09 +0200 Subject: [PATCH 5/5] Removed a bug where a line starting with > was parsed as a block quote. --- man-roxygen/details-license.R | 4 ++-- man/rstan_create_package.Rd | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/man-roxygen/details-license.R b/man-roxygen/details-license.R index e118853..009b90f 100644 --- a/man-roxygen/details-license.R +++ b/man-roxygen/details-license.R @@ -1,6 +1,6 @@ #' @details In order to enable Stan functionality, \pkg{\link{rstantools}} -#' copies some files to your package. Since these files are licensed as GPL -#' >= 3, the same license applies to your package should you choose to +#' copies some files to your package. Since these files are licensed as +#' GPL >= 3, the same license applies to your package should you choose to #' distribute it. Even if you don't use \pkg{\link{rstantools}} to create #' your package, it is likely that you will be linking to \pkg{\link{Rcpp}} to #' export the Stan C++ `stanmodel` objects to \R. Since diff --git a/man/rstan_create_package.Rd b/man/rstan_create_package.Rd index c4184ad..c8b048e 100644 --- a/man/rstan_create_package.Rd +++ b/man/rstan_create_package.Rd @@ -114,7 +114,13 @@ must be manually configured by running \code{\link[=rstan_config]{rstan_config() \code{stanmodel} files in \code{inst/stan} are added, removed, or modified. In order to enable Stan functionality, \pkg{\link{rstantools}} -copies some files to your package. Since these files are licensed as GPL= 3, the same license applies to your package should you choose todistribute it. Even if you don't use \pkg{\link{rstantools}} to createyour package, it is likely that you will be linking to \pkg{\link{Rcpp}} toexport the Stan C++ stanmodel objects to \R. Since\pkg{\link{Rcpp}} is released under GPL >= 2, the same license would applyto your package upon distribution. +copies some files to your package. Since these files are licensed as +GPL >= 3, the same license applies to your package should you choose to +distribute it. Even if you don't use \pkg{\link{rstantools}} to create +your package, it is likely that you will be linking to \pkg{\link{Rcpp}} to +export the Stan C++ \code{stanmodel} objects to \R. Since +\pkg{\link{Rcpp}} is released under GPL >= 2, the same license would apply +to your package upon distribution. Authors willing to license their Stan programs of general interest under the GPL are invited to contribute their \code{.stan} files and