epiforecasts · sbfnk · Mar 25, 2024 · Nov 10, 2023 · Nov 16, 2023 · Nov 17, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,10 @@
 # EpiNow2 1.4.9000
 
-## Breaking changes
+## Major changes
+
+* Delay discretisation is now based on a two-day censoring window (with uniform probability in between), based on recommendations in [Park et al, medRxiv, 2024](https://doi.org/10.1101/2024.01.12.24301247). By @sbfnk in #518 and reviewed by @jamesmbaazam.
+
+## OTher breaking changes
 
 * The functions `get_dist`, `get_generation_time`, `get_incubation_period` have been deprecated and replaced with examples. By @sbfnk in #481 and reviewed by @seabbs.
 * The utility function `update_list()` has been deprecated in favour of `utils::modifyList()` because it comes with an installation of R. By @jamesmbaazam in #491 and reviewed by @seabbs.

diff --git a/R/dist_spec.R b/R/dist_spec.R
@@ -20,10 +20,12 @@
 #'
 #' @param discrete Logical,  defaults to `FALSE`. Should the probability
 #'   distribution be discretised. In this case each entry of the probability
-#'   mass function corresponds to the 1-length interval ending at the entry,
-#'   i.e. the probability mass function is a vector where the first entry
-#'   corresponds to the integral over the (0,1] interval of the continuous
-#'   distribution, the second entry corresponds to the (1,2] interval etc.
+#'   mass function corresponds to the 2-length interval ending at the entry
+#'   except for the first interval that covers (0, 1).  That is, the probability
+#'   mass function is a vector where the first entry corresponds to the integral
+#'   over the (0,1] interval of the continuous distribution, the second entry
+#'   corresponds to the (0,2] interval, the third entry corresponds to the (1,
+#'   3] interval etc.
 #'
 #' @param params A list of parameters values (by name) required for each model.
 #' For the exponential model this is a rate parameter and for the gamma model
@@ -84,74 +86,34 @@
 #' )
 dist_skel <- function(n, dist = FALSE, cum = TRUE, model,
                       discrete = FALSE, params, max_value = 120) {
+  ## define unnormalised support function
   if (model == "exp") {
-    # define support functions for exponential dist
-    rdist <- function(n) {
-      rexp(n, params[["rate"]])
-    }
-    pdist <- function(n) {
-      pexp(n, params[["rate"]]) / pexp(max_value, params[["rate"]])
-    }
-    ddist <- function(n) {
-      (pexp(n + 1, params[["rate"]]) -
-        pexp(n, params[["rate"]])) /
-        pexp(max_value + 1, params[["rate"]])
+    updist <- function(n) {
+      pexp(n, params[["rate"]])
     }
   } else if (model == "gamma") {
-    rdist <- function(n) {
-      rgamma(n = n, shape = params[["shape"]], rate = params[["rate"]])
-    }
-    pdist <- function(n) {
-      pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]]) /
-        pgamma(
-          q = max_value + 1, shape = params[["shape"]], rate = params[["rate"]]
-        )
-    }
-    ddist <- function(n) {
-      (pgamma(q = n + 1, shape = params[["shape"]], rate = params[["rate"]]) -
-        pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]])) /
-        pgamma(q = max_value + 1, params[["shape"]], rate = params[["rate"]])
+    updist <- function(n) {
+      pgamma(n, params[["shape"]], params[["rate"]])
     }
   } else if (model == "lognormal") {
-    rdist <- function(n) {
-      rlnorm(n, params[["meanlog"]], params[["sdlog"]])
-    }
-    pdist <- function(n) {
-      plnorm(n, params[["meanlog"]], params[["sdlog"]]) /
-        plnorm(max_value + 1, params[["meanlog"]], params[["sdlog"]])
-    }
-    ddist <- function(n) {
-      (plnorm(n + 1, params[["meanlog"]], params[["sdlog"]]) -
-        plnorm(n, params[["meanlog"]], params[["sdlog"]])) /
-        plnorm(max_value + 1, params[["meanlog"]], params[["sdlog"]])
+    updist <- function(n) {
+      plnorm(n, params[["meanlog"]], params[["sdlog"]])
     }
   } else if (model == "normal") {
-    rdist <- function(n) {
-      rnorm(n, params[["mean"]], params[["sd"]])
-    }
-    pdist <- function(n) {
-      pnorm(n, params[["mean"]], params[["sd"]]) /
-        pnorm(max_value + 1, params[["mean"]], params[["sd"]])
-    }
-    ddist <- function(n) {
-      (pnorm(n + 1, params[["mean"]], params[["sd"]]) -
-        pnorm(n, params[["mean"]], params[["sd"]])) /
-        pnorm(max_value + 1, params[["mean"]], params[["sd"]])
+    updist <- function(n) {
+      pnorm(n, params[["mean"]], params[["sd"]])
     }
   } else if (model == "fixed") {
-    rdist <- function(n) {
-      rep(params[["value"]], n)
-    }
-    pdist <- function(n) {
+    updist <- function(n) {
       as.integer(n > params[["value"]])
     }
-    ddist <- function(n) {
-      as.integer(n == params[["value"]])
-    }
   }
 
   if (discrete) {
-    cmf <- c(0, pdist(seq_len(max_value + 1)))
+    cmf <- c(0, updist(1),
+      updist(seq_len(max_value)) + updist(seq_len(max_value) + 1)
+    ) /
+      (updist(max_value) + updist(max_value + 1))
     pmf <- diff(cmf)
     rdist <- function(n) {
       sample(
@@ -164,6 +126,26 @@
     ddist <- function(n) {
       pmf[n + 1]
     }
+  } else {
+    pdist <- function(n) {
+      updist(n) / updist(max_value + 1)
+    }
+    ddist <- function(n) {
+      pdist(n + 1) - pdist(n)
+    }
+    if (model == "exp") {
+      rdist <- function(n) {
+        rexp(n, params[["rate"]])
+      }
+    } else if (model == "gamma") {
+      rdist <- function(n) {
+        rgamma(n, params[["shape"]], params[["rate"]])
+      }
+    } else if (model == "lognormal") {
+      rdist <- function(n) {
+        rlnorm(n, params[["meanlog"]], params[["sdlog"]])
+      }
+    }
   }
 
   # define internal sampling function

diff --git a/inst/stan/functions/pmfs.stan b/inst/stan/functions/pmfs.stan
@@ -17,10 +17,17 @@ vector discretised_pmf(vector params, int n, int dist) {
     reject("Unknown distribution function provided.");
   }
   // discretise
-  lpmf[1] = upper_lcdf[1];
-  lpmf[2:n] = log_diff_exp(upper_lcdf[2:n], upper_lcdf[1:(n-1)]);
-  // normalize
-  lpmf = lpmf - upper_lcdf[n];
+  if (n > 1) {
+    lpmf[1] = upper_lcdf[1];
+    lpmf[2] = upper_lcdf[2];
+    if (n > 2) {
+      lpmf[3:n] = log_diff_exp(upper_lcdf[3:n], upper_lcdf[1:(n - 2)]);
+    }
+    // normalize
+    lpmf = lpmf - log_sum_exp(upper_lcdf[(n - 1):n]);
+  } else {
+    lpmf[1] = 0;
+  }
   return(exp(lpmf));
 }
 

diff --git a/man/dist_skel.Rd b/man/dist_skel.Rd
diff --git a/tests/testthat/_snaps/simulate-infections.md b/tests/testthat/_snaps/simulate-infections.md
@@ -36,33 +36,33 @@
 
               variable       date    value
                 <char>     <Date>    <num>
-     1:     infections 2023-01-01 214.1121
-     2:     infections 2023-01-02 225.6793
-     3:     infections 2023-01-03 237.8341
-     4:     infections 2023-01-04 250.6285
-     5:     infections 2023-01-05 264.1041
-     6:     infections 2023-01-06 278.3004
-     7:     infections 2023-01-07 293.2573
-     8:     infections 2023-01-08 206.0109
-     9:     infections 2023-01-09 197.6193
-    10:     infections 2023-01-10 188.2225
-    11:     infections 2023-01-11 178.7223
-    12:     infections 2023-01-12 169.4362
-    13:     infections 2023-01-13 160.4852
-    14:     infections 2023-01-14 151.9122
-    15: reported_cases 2023-01-01 157.0000
-    16: reported_cases 2023-01-02 146.0000
-    17: reported_cases 2023-01-03 164.0000
-    18: reported_cases 2023-01-04 136.0000
-    19: reported_cases 2023-01-05 601.0000
-    20: reported_cases 2023-01-06 263.0000
-    21: reported_cases 2023-01-07 443.0000
-    22: reported_cases 2023-01-08 173.0000
-    23: reported_cases 2023-01-09 201.0000
-    24: reported_cases 2023-01-10  15.0000
-    25: reported_cases 2023-01-11 229.0000
-    26: reported_cases 2023-01-12 151.0000
-    27: reported_cases 2023-01-13 232.0000
-    28: reported_cases 2023-01-14  82.0000
+     1:     infections 2023-01-01 212.0302
+     2:     infections 2023-01-02 223.0571
+     3:     infections 2023-01-03 234.4941
+     4:     infections 2023-01-04 246.4754
+     5:     infections 2023-01-05 259.0516
+     6:     infections 2023-01-06 272.2608
+     7:     infections 2023-01-07 286.1383
+     8:     infections 2023-01-08 200.4796
+     9:     infections 2023-01-09 194.5554
+    10:     infections 2023-01-10 186.0554
+    11:     infections 2023-01-11 177.1761
+    12:     infections 2023-01-12 168.4014
+    13:     infections 2023-01-13 159.8970
+    14:     infections 2023-01-14 151.7212
+    15: reported_cases 2023-01-01 152.0000
+    16: reported_cases 2023-01-02 142.0000
+    17: reported_cases 2023-01-03 158.0000
+    18: reported_cases 2023-01-04 131.0000
+    19: reported_cases 2023-01-05 577.0000
+    20: reported_cases 2023-01-06 252.0000
+    21: reported_cases 2023-01-07 424.0000
+    22: reported_cases 2023-01-08 169.0000
+    23: reported_cases 2023-01-09 233.0000
+    24: reported_cases 2023-01-10 285.0000
+    25: reported_cases 2023-01-11 191.0000
+    26: reported_cases 2023-01-12 154.0000
+    27: reported_cases 2023-01-13 236.0000
+    28: reported_cases 2023-01-14  84.0000
               variable       date    value
 
diff --git a/tests/testthat/_snaps/simulate-secondary.md b/tests/testthat/_snaps/simulate-secondary.md
@@ -18,15 +18,15 @@
 
           date.date secondary
              <Date>     <num>
-      1: 2020-02-22         2
-      2: 2020-02-23        18
-      3: 2020-02-24        83
-      4: 2020-02-25        31
-      5: 2020-02-26       185
+      1: 2020-02-22         1
+      2: 2020-02-23        12
+      3: 2020-02-24        54
+      4: 2020-02-25        27
+      5: 2020-02-26       157
      ---                     
-    126: 2020-06-26       163
-    127: 2020-06-27       375
-    128: 2020-06-28       124
-    129: 2020-06-29       172
-    130: 2020-06-30       194
+    126: 2020-06-26       315
+    127: 2020-06-27       157
+    128: 2020-06-28       237
+    129: 2020-06-29       259
+    130: 2020-06-30       234
 
diff --git a/tests/testthat/test-delays.R b/tests/testthat/test-delays.R
@@ -33,7 +33,7 @@ test_that("generation times can be specified in different ways", {
       ),
       params = delay_params
     ), digits = 2),
-    c(0.02, 0.11, 0.22, 0.30, 0.35, 1.00, 1.00)
+    c(0.01, 0.08, 0.20, 0.32, 0.40, 1.00, 1.00)
   )
 })
 
@@ -52,7 +52,7 @@ test_that("delay parameters can be specified in different ways", {
       ),
       params = delay_params
     ), digits = 2), n = -2),
-    c(0.02, 0.11, 0.22, 0.30, 0.35, 1.00)
+    c(0.01, 0.08, 0.20, 0.32, 0.40, 1.00)
   )
 })
 
@@ -64,7 +64,7 @@ test_that("truncation parameters can be specified in different ways", {
       ),
       params = delay_params
     ), digits = 2), n = -2),
-    c(1.00, 0.02, 0.11, 0.22, 0.30, 0.35)
+    c(1.00, 0.01, 0.08, 0.20, 0.32, 0.40)
   )
 })
 

diff --git a/tests/testthat/test-dist_spec.R b/tests/testthat/test-dist_spec.R
@@ -8,7 +8,7 @@ test_that("dist_spec returns correct output for fixed lognormal distribution", {
     as.vector(round(result[[1]]$pmf, 2)),
     c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03,
       0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09,
-      0.10, 0.10, 0.11, 0.12)
+      0.10, 0.11, 0.11, 0.12)
   )
 })
 
@@ -43,7 +43,7 @@ test_that("dist_spec returns correct output for fixed distribution", {
     as.vector(round(result[[1]]$pmf, 2)),
     c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03,
       0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09,
-      0.10, 0.10, 0.11, 0.12)
+      0.10, 0.11, 0.11, 0.12)
   )
 })
 
@@ -111,10 +111,8 @@ test_that("Testing `apply_tolerance` function applied to a convolution", {
 
 test_that("summary functions return correct output for fixed lognormal distribution", {
   dist <- discretise(LogNormal(mean = 3, sd = 1, max = 19))
-  ## here we can see the bias from
-  # using this kind of discretisation approach
-  expect_equal(EpiNow2:::mean.dist_spec(dist), 2.49, tolerance = 0.01)
-  expect_equal(EpiNow2:::sd_dist(dist), 1.09, tolerance = 0.01)
+  expect_equal(EpiNow2:::mean.dist_spec(dist), 3.0, tolerance = 0.01)
+  expect_equal(EpiNow2:::sd_dist(dist), 1.34, tolerance = 0.01)
   expect_equal(EpiNow2:::max.dist_spec(dist), 19L)
 })
 
@@ -147,7 +145,7 @@ test_that("sd_dist returns NA when applied to uncertain distributions", {
 test_that("print.dist_spec correctly prints the parameters of the fixed lognormal", {
   dist <- discretise(LogNormal(meanlog = 1.5, sdlog = 0.5, max = 19))
 
-  expect_output(print(dist), "- nonparametric distribution\\n  PMF: \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]")
+  expect_output(print(dist),"- nonparametric distribution\\n  PMF: \\[0\\.00068 0\\.027 0\\.11 0\\.18 0\\.19 0\\.16 0\\.11 0\\.078 0\\.052 0\\.035 0\\.023 0\\.015 0\\.0099 0\\.0065 0\\.0044 0\\.003 0\\.002 0\\.0014 0\\.00095 0\\.00066\\]" )
 })
 
 test_that("print.dist_spec correctly prints the parameters of the uncertain gamma", {
@@ -170,7 +168,6 @@ test_that("print.dist_spec correctly prints the parameters of a combination of d
   dist1 <- LogNormal(meanlog = 1.5, sdlog = 0.5, max = 19)
   dist2 <- Gamma(shape =  Normal(3, 0.5), rate = Normal(2, 0.5), max = 19)
   combined <- dist1 + dist2
-
   expect_output(print(combined), "Composite distribution:\\n- lognormal distribution \\(max: 19\\):\\n  meanlog:\\n    1\\.5\\n  sdlog:\\n    0\\.5\\n- gamma distribution \\(max: 19\\):\\n  shape:\\n    - normal distribution:\\n      mean:\\n        3\\n      sd:\\n        0\\.5\\n  rate:\\n    - normal distribution:\\n      mean:\\n        2\\n      sd:\\n        0\\.5")
 })
 
@@ -227,7 +224,7 @@ test_that("delay distributions can be specified in different ways", {
   )
   expect_equal(
     round(discretise(LogNormal(mean = 4, sd = 1, max = 10))[[1]]$pmf, 2),
-    c(0.00, 0.00, 0.14, 0.40, 0.30, 0.11, 0.03, 0.01, 0.00, 0.00, 0.00)
+    c(0.00, 0.00, 0.07, 0.27, 0.35, 0.21, 0.07, 0.02, 0.00, 0.00, 0.00)
   )
   expect_equal(
     unname(as.numeric(Gamma(mean = 4, sd = 1)[[1]]$parameters)),
@@ -236,7 +233,7 @@ test_that("delay distributions can be specified in different ways", {
   )
   expect_equal(
     round(discretise(Gamma(mean = 4, sd = 1, max = 7))[[1]]$pmf, 2),
-    c(0.00, 0.01, 0.15, 0.38, 0.31, 0.12, 0.03, 0.00)
+    c(0.00, 0.00, 0.08, 0.26, 0.35, 0.22, 0.08, 0.02)
   )
   expect_equal(
     unname(as.numeric(
@@ -259,7 +256,7 @@ test_that("delay distributions can be specified in different ways", {
   )
   expect_equal(
     round(discretise(Normal(mean = 4, sd = 1, max = 5))[[1]]$pmf, 2),
-    c(0.00, 0.02, 0.14, 0.35, 0.35, 0.14)
+    c(0.00, 0.01, 0.09, 0.26, 0.38, 0.26)
   )
   expect_equal(discretise(Fixed(value = 3))[[1]]$pmf, c(0, 0, 0, 1))
   expect_equal(Fixed(value = 3.5)[[1]]$parameters$value, 3.5)