Skip to content

Commit

Permalink
Discretise with two-day censoring windows (#518)
Browse files Browse the repository at this point in the history
* discretise with two-day window in stan code

* discretise with two-day window in `dist_skel`

* update tests

* add news item

* add @jamesmbaazam as reviewer

* highlight update as major change
  • Loading branch information
sbfnk authored Mar 25, 2024
1 parent adf34dd commit 10b2c6a
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 131 deletions.
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# EpiNow2 1.4.9000

## Breaking changes
## Major changes

* Delay discretisation is now based on a two-day censoring window (with uniform probability in between), based on recommendations in [Park et al, medRxiv, 2024](https://doi.org/10.1101/2024.01.12.24301247). By @sbfnk in #518 and reviewed by @jamesmbaazam.

## OTher breaking changes

* The functions `get_dist`, `get_generation_time`, `get_incubation_period` have been deprecated and replaced with examples. By @sbfnk in #481 and reviewed by @seabbs.
* The utility function `update_list()` has been deprecated in favour of `utils::modifyList()` because it comes with an installation of R. By @jamesmbaazam in #491 and reviewed by @seabbs.
Expand Down
98 changes: 40 additions & 58 deletions R/dist_spec.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
#'
#' @param discrete Logical, defaults to `FALSE`. Should the probability
#' distribution be discretised. In this case each entry of the probability
#' mass function corresponds to the 1-length interval ending at the entry,
#' i.e. the probability mass function is a vector where the first entry
#' corresponds to the integral over the (0,1] interval of the continuous
#' distribution, the second entry corresponds to the (1,2] interval etc.
#' mass function corresponds to the 2-length interval ending at the entry
#' except for the first interval that covers (0, 1). That is, the probability
#' mass function is a vector where the first entry corresponds to the integral
#' over the (0,1] interval of the continuous distribution, the second entry
#' corresponds to the (0,2] interval, the third entry corresponds to the (1,
#' 3] interval etc.
#'
#' @param params A list of parameters values (by name) required for each model.
#' For the exponential model this is a rate parameter and for the gamma model
Expand Down Expand Up @@ -84,74 +86,34 @@
#' )
dist_skel <- function(n, dist = FALSE, cum = TRUE, model,
discrete = FALSE, params, max_value = 120) {
## define unnormalised support function
if (model == "exp") {
# define support functions for exponential dist
rdist <- function(n) {
rexp(n, params[["rate"]])
}
pdist <- function(n) {
pexp(n, params[["rate"]]) / pexp(max_value, params[["rate"]])
}
ddist <- function(n) {
(pexp(n + 1, params[["rate"]]) -
pexp(n, params[["rate"]])) /
pexp(max_value + 1, params[["rate"]])
updist <- function(n) {
pexp(n, params[["rate"]])
}
} else if (model == "gamma") {
rdist <- function(n) {
rgamma(n = n, shape = params[["shape"]], rate = params[["rate"]])
}
pdist <- function(n) {
pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]]) /
pgamma(
q = max_value + 1, shape = params[["shape"]], rate = params[["rate"]]
)
}
ddist <- function(n) {
(pgamma(q = n + 1, shape = params[["shape"]], rate = params[["rate"]]) -
pgamma(q = n, shape = params[["shape"]], rate = params[["rate"]])) /
pgamma(q = max_value + 1, params[["shape"]], rate = params[["rate"]])
updist <- function(n) {
pgamma(n, params[["shape"]], params[["rate"]])
}
} else if (model == "lognormal") {
rdist <- function(n) {
rlnorm(n, params[["meanlog"]], params[["sdlog"]])
}
pdist <- function(n) {
plnorm(n, params[["meanlog"]], params[["sdlog"]]) /
plnorm(max_value + 1, params[["meanlog"]], params[["sdlog"]])
}
ddist <- function(n) {
(plnorm(n + 1, params[["meanlog"]], params[["sdlog"]]) -
plnorm(n, params[["meanlog"]], params[["sdlog"]])) /
plnorm(max_value + 1, params[["meanlog"]], params[["sdlog"]])
updist <- function(n) {
plnorm(n, params[["meanlog"]], params[["sdlog"]])
}
} else if (model == "normal") {
rdist <- function(n) {
rnorm(n, params[["mean"]], params[["sd"]])
}
pdist <- function(n) {
pnorm(n, params[["mean"]], params[["sd"]]) /
pnorm(max_value + 1, params[["mean"]], params[["sd"]])
}
ddist <- function(n) {
(pnorm(n + 1, params[["mean"]], params[["sd"]]) -
pnorm(n, params[["mean"]], params[["sd"]])) /
pnorm(max_value + 1, params[["mean"]], params[["sd"]])
updist <- function(n) {
pnorm(n, params[["mean"]], params[["sd"]])
}
} else if (model == "fixed") {
rdist <- function(n) {
rep(params[["value"]], n)
}
pdist <- function(n) {
updist <- function(n) {
as.integer(n > params[["value"]])
}
ddist <- function(n) {
as.integer(n == params[["value"]])
}
}

if (discrete) {
cmf <- c(0, pdist(seq_len(max_value + 1)))
cmf <- c(0, updist(1),
updist(seq_len(max_value)) + updist(seq_len(max_value) + 1)
) /
(updist(max_value) + updist(max_value + 1))
pmf <- diff(cmf)
rdist <- function(n) {
sample(
Expand All @@ -164,6 +126,26 @@ dist_skel <- function(n, dist = FALSE, cum = TRUE, model,
ddist <- function(n) {
pmf[n + 1]
}
} else {
pdist <- function(n) {
updist(n) / updist(max_value + 1)
}
ddist <- function(n) {
pdist(n + 1) - pdist(n)
}
if (model == "exp") {
rdist <- function(n) {
rexp(n, params[["rate"]])
}
} else if (model == "gamma") {
rdist <- function(n) {
rgamma(n, params[["shape"]], params[["rate"]])
}
} else if (model == "lognormal") {
rdist <- function(n) {
rlnorm(n, params[["meanlog"]], params[["sdlog"]])
}
}
}

# define internal sampling function
Expand Down
15 changes: 11 additions & 4 deletions inst/stan/functions/pmfs.stan
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,17 @@ vector discretised_pmf(vector params, int n, int dist) {
reject("Unknown distribution function provided.");
}
// discretise
lpmf[1] = upper_lcdf[1];
lpmf[2:n] = log_diff_exp(upper_lcdf[2:n], upper_lcdf[1:(n-1)]);
// normalize
lpmf = lpmf - upper_lcdf[n];
if (n > 1) {
lpmf[1] = upper_lcdf[1];
lpmf[2] = upper_lcdf[2];
if (n > 2) {
lpmf[3:n] = log_diff_exp(upper_lcdf[3:n], upper_lcdf[1:(n - 2)]);
}
// normalize
lpmf = lpmf - log_sum_exp(upper_lcdf[(n - 1):n]);
} else {
lpmf[1] = 0;
}
return(exp(lpmf));
}

Expand Down
10 changes: 6 additions & 4 deletions man/dist_skel.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 28 additions & 28 deletions tests/testthat/_snaps/simulate-infections.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,33 +36,33 @@

variable date value
<char> <Date> <num>
1: infections 2023-01-01 214.1121
2: infections 2023-01-02 225.6793
3: infections 2023-01-03 237.8341
4: infections 2023-01-04 250.6285
5: infections 2023-01-05 264.1041
6: infections 2023-01-06 278.3004
7: infections 2023-01-07 293.2573
8: infections 2023-01-08 206.0109
9: infections 2023-01-09 197.6193
10: infections 2023-01-10 188.2225
11: infections 2023-01-11 178.7223
12: infections 2023-01-12 169.4362
13: infections 2023-01-13 160.4852
14: infections 2023-01-14 151.9122
15: reported_cases 2023-01-01 157.0000
16: reported_cases 2023-01-02 146.0000
17: reported_cases 2023-01-03 164.0000
18: reported_cases 2023-01-04 136.0000
19: reported_cases 2023-01-05 601.0000
20: reported_cases 2023-01-06 263.0000
21: reported_cases 2023-01-07 443.0000
22: reported_cases 2023-01-08 173.0000
23: reported_cases 2023-01-09 201.0000
24: reported_cases 2023-01-10 15.0000
25: reported_cases 2023-01-11 229.0000
26: reported_cases 2023-01-12 151.0000
27: reported_cases 2023-01-13 232.0000
28: reported_cases 2023-01-14 82.0000
1: infections 2023-01-01 212.0302
2: infections 2023-01-02 223.0571
3: infections 2023-01-03 234.4941
4: infections 2023-01-04 246.4754
5: infections 2023-01-05 259.0516
6: infections 2023-01-06 272.2608
7: infections 2023-01-07 286.1383
8: infections 2023-01-08 200.4796
9: infections 2023-01-09 194.5554
10: infections 2023-01-10 186.0554
11: infections 2023-01-11 177.1761
12: infections 2023-01-12 168.4014
13: infections 2023-01-13 159.8970
14: infections 2023-01-14 151.7212
15: reported_cases 2023-01-01 152.0000
16: reported_cases 2023-01-02 142.0000
17: reported_cases 2023-01-03 158.0000
18: reported_cases 2023-01-04 131.0000
19: reported_cases 2023-01-05 577.0000
20: reported_cases 2023-01-06 252.0000
21: reported_cases 2023-01-07 424.0000
22: reported_cases 2023-01-08 169.0000
23: reported_cases 2023-01-09 233.0000
24: reported_cases 2023-01-10 285.0000
25: reported_cases 2023-01-11 191.0000
26: reported_cases 2023-01-12 154.0000
27: reported_cases 2023-01-13 236.0000
28: reported_cases 2023-01-14 84.0000
variable date value

20 changes: 10 additions & 10 deletions tests/testthat/_snaps/simulate-secondary.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@

date.date secondary
<Date> <num>
1: 2020-02-22 2
2: 2020-02-23 18
3: 2020-02-24 83
4: 2020-02-25 31
5: 2020-02-26 185
1: 2020-02-22 1
2: 2020-02-23 12
3: 2020-02-24 54
4: 2020-02-25 27
5: 2020-02-26 157
---
126: 2020-06-26 163
127: 2020-06-27 375
128: 2020-06-28 124
129: 2020-06-29 172
130: 2020-06-30 194
126: 2020-06-26 315
127: 2020-06-27 157
128: 2020-06-28 237
129: 2020-06-29 259
130: 2020-06-30 234

6 changes: 3 additions & 3 deletions tests/testthat/test-delays.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("generation times can be specified in different ways", {
),
params = delay_params
), digits = 2),
c(0.02, 0.11, 0.22, 0.30, 0.35, 1.00, 1.00)
c(0.01, 0.08, 0.20, 0.32, 0.40, 1.00, 1.00)
)
})

Expand All @@ -52,7 +52,7 @@ test_that("delay parameters can be specified in different ways", {
),
params = delay_params
), digits = 2), n = -2),
c(0.02, 0.11, 0.22, 0.30, 0.35, 1.00)
c(0.01, 0.08, 0.20, 0.32, 0.40, 1.00)
)
})

Expand All @@ -64,7 +64,7 @@ test_that("truncation parameters can be specified in different ways", {
),
params = delay_params
), digits = 2), n = -2),
c(1.00, 0.02, 0.11, 0.22, 0.30, 0.35)
c(1.00, 0.01, 0.08, 0.20, 0.32, 0.40)
)
})

Expand Down
19 changes: 8 additions & 11 deletions tests/testthat/test-dist_spec.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test_that("dist_spec returns correct output for fixed lognormal distribution", {
as.vector(round(result[[1]]$pmf, 2)),
c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03,
0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09,
0.10, 0.10, 0.11, 0.12)
0.10, 0.11, 0.11, 0.12)
)
})

Expand Down Expand Up @@ -43,7 +43,7 @@ test_that("dist_spec returns correct output for fixed distribution", {
as.vector(round(result[[1]]$pmf, 2)),
c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03,
0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09,
0.10, 0.10, 0.11, 0.12)
0.10, 0.11, 0.11, 0.12)
)
})

Expand Down Expand Up @@ -111,10 +111,8 @@ test_that("Testing `apply_tolerance` function applied to a convolution", {

test_that("summary functions return correct output for fixed lognormal distribution", {
dist <- discretise(LogNormal(mean = 3, sd = 1, max = 19))
## here we can see the bias from
# using this kind of discretisation approach
expect_equal(EpiNow2:::mean.dist_spec(dist), 2.49, tolerance = 0.01)
expect_equal(EpiNow2:::sd_dist(dist), 1.09, tolerance = 0.01)
expect_equal(EpiNow2:::mean.dist_spec(dist), 3.0, tolerance = 0.01)
expect_equal(EpiNow2:::sd_dist(dist), 1.34, tolerance = 0.01)
expect_equal(EpiNow2:::max.dist_spec(dist), 19L)
})

Expand Down Expand Up @@ -147,7 +145,7 @@ test_that("sd_dist returns NA when applied to uncertain distributions", {
test_that("print.dist_spec correctly prints the parameters of the fixed lognormal", {
dist <- discretise(LogNormal(meanlog = 1.5, sdlog = 0.5, max = 19))

expect_output(print(dist), "- nonparametric distribution\\n PMF: \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]")
expect_output(print(dist),"- nonparametric distribution\\n PMF: \\[0\\.00068 0\\.027 0\\.11 0\\.18 0\\.19 0\\.16 0\\.11 0\\.078 0\\.052 0\\.035 0\\.023 0\\.015 0\\.0099 0\\.0065 0\\.0044 0\\.003 0\\.002 0\\.0014 0\\.00095 0\\.00066\\]" )
})

test_that("print.dist_spec correctly prints the parameters of the uncertain gamma", {
Expand All @@ -170,7 +168,6 @@ test_that("print.dist_spec correctly prints the parameters of a combination of d
dist1 <- LogNormal(meanlog = 1.5, sdlog = 0.5, max = 19)
dist2 <- Gamma(shape = Normal(3, 0.5), rate = Normal(2, 0.5), max = 19)
combined <- dist1 + dist2

expect_output(print(combined), "Composite distribution:\\n- lognormal distribution \\(max: 19\\):\\n meanlog:\\n 1\\.5\\n sdlog:\\n 0\\.5\\n- gamma distribution \\(max: 19\\):\\n shape:\\n - normal distribution:\\n mean:\\n 3\\n sd:\\n 0\\.5\\n rate:\\n - normal distribution:\\n mean:\\n 2\\n sd:\\n 0\\.5")
})

Expand Down Expand Up @@ -227,7 +224,7 @@ test_that("delay distributions can be specified in different ways", {
)
expect_equal(
round(discretise(LogNormal(mean = 4, sd = 1, max = 10))[[1]]$pmf, 2),
c(0.00, 0.00, 0.14, 0.40, 0.30, 0.11, 0.03, 0.01, 0.00, 0.00, 0.00)
c(0.00, 0.00, 0.07, 0.27, 0.35, 0.21, 0.07, 0.02, 0.00, 0.00, 0.00)
)
expect_equal(
unname(as.numeric(Gamma(mean = 4, sd = 1)[[1]]$parameters)),
Expand All @@ -236,7 +233,7 @@ test_that("delay distributions can be specified in different ways", {
)
expect_equal(
round(discretise(Gamma(mean = 4, sd = 1, max = 7))[[1]]$pmf, 2),
c(0.00, 0.01, 0.15, 0.38, 0.31, 0.12, 0.03, 0.00)
c(0.00, 0.00, 0.08, 0.26, 0.35, 0.22, 0.08, 0.02)
)
expect_equal(
unname(as.numeric(
Expand All @@ -259,7 +256,7 @@ test_that("delay distributions can be specified in different ways", {
)
expect_equal(
round(discretise(Normal(mean = 4, sd = 1, max = 5))[[1]]$pmf, 2),
c(0.00, 0.02, 0.14, 0.35, 0.35, 0.14)
c(0.00, 0.01, 0.09, 0.26, 0.38, 0.26)
)
expect_equal(discretise(Fixed(value = 3))[[1]]$pmf, c(0, 0, 0, 1))
expect_equal(Fixed(value = 3.5)[[1]]$parameters$value, 3.5)
Expand Down
Loading

0 comments on commit 10b2c6a

Please sign in to comment.