diff --git a/NAMESPACE b/NAMESPACE
index 8b17878e3..0b7f2ef01 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -61,6 +61,7 @@ export(extract_CrIs)
 export(extract_inits)
 export(extract_samples)
 export(extract_stan_param)
+export(fill_missing)
 export(fix_dist)
 export(fix_parameters)
 export(forecast_infections)
@@ -137,6 +138,7 @@ importFrom(cli,col_blue)
 importFrom(cli,col_red)
 importFrom(data.table,":=")
 importFrom(data.table,.N)
+importFrom(data.table,CJ)
 importFrom(data.table,as.data.table)
 importFrom(data.table,copy)
 importFrom(data.table,data.table)
diff --git a/NEWS.md b/NEWS.md
index ed92b8f02..009ba40fc 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,13 +1,26 @@
 # EpiNow2 (development version)
 
-## Documentation
+## Model changes
 
-- Brought the docs on `alpha_sd` up to date with the code change from prior PR #853. By @zsusswein in #862 and reviewed by @jamesmbaazam.
+- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. By @sbfnk in #851 and reviewed by @seabbs and @jamesmbaazam..
 
-## Model changes
+  ```r
+  # Deprecated
+  data |>
+    estimate_infections(obs_opts(na = "accumulate"))
 
-- A bug was fixed where the initial growth was never estimated (i.e. the prior mean was always zero). By @sbfnk in #853 and reviewed by @seabbs.
-- A bug was fixed where an internal function for applying a default cdf cutoff failed due to a difference a vector length issue. By @jamesmbaazam in #858 and reviewed by @sbfnk.
+  # Recommended workflow e.g. for weekly incidence data
+  data |>
+    fill_missing(missing = "accumulate", initial_accumulate = 7) |>
+    estimate_infections()
+  ```
+  
+  - A bug was fixed where the initial growth was never estimated (i.e. the prior mean was always zero). By @sbfnk in #853 and reviewed by @seabbs.
+  - A bug was fixed where an internal function for applying a default cdf cutoff failed due to a difference a vector length issue. By @jamesmbaazam in #858 and reviewed by @sbfnk.
+
+## Documentation
+
+- Brought the docs on `alpha_sd` up to date with the code change from prior PR #853. By @zsusswein in #862 and reviewed by @jamesmbaazam.
 
 # EpiNow2 1.6.1
 
diff --git a/R/checks.R b/R/checks.R
index 575700173..fb44140d5 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -15,14 +15,14 @@
 #' "estimate_infections", "estimate_truncation", or "estimate_secondary".
 #' This is used to determine which checks to perform on the data input.
 #' @importFrom checkmate assert_data_frame assert_date assert_names
-#' assert_numeric
+#'   assert_numeric
+#' @importFrom purrr walk
 #' @importFrom rlang arg_match
 #' @return Called for its side effects.
 #' @keywords internal
 check_reports_valid <- function(data,
                                 model = c(
                                   "estimate_infections",
-                                  "estimate_truncation",
                                   "estimate_secondary"
                                 )) {
   # Check that the case time series (reports) is a data frame
@@ -30,14 +30,13 @@ check_reports_valid <- function(data,
   # Perform checks depending on the model to the data is meant to be used with
   model <- arg_match(model)
 
+  assert_date(data$date, any.missing = FALSE)
   if (model == "estimate_secondary") {
     # Check that data has the right column names
     assert_names(
       names(data),
       must.include = c("date", "primary", "secondary")
     )
-    # Check that the data data.frame has the right column types
-    assert_date(data$date, any.missing = FALSE)
     assert_numeric(data$primary, lower = 0)
     assert_numeric(data$secondary, lower = 0)
   } else {
@@ -46,10 +45,10 @@ check_reports_valid <- function(data,
       names(data),
       must.include = c("date", "confirm")
     )
-    # Check that the data data.frame has the right column types
-    assert_date(data$date, any.missing = FALSE)
     assert_numeric(data$confirm, lower = 0)
   }
+  assert_logical(data$accumulate, null.ok = TRUE)
+  return(invisible(data))
 }
 
 #' Validate probability distribution for passing to stan
@@ -224,7 +223,7 @@ test_data_complete <- function(data, cols_to_check) {
 
 #' Cross-check treatment of `NA` in obs_opts() against input data
 #'
-#' @description `r lifecycle::badge("experimental")`
+#' @description `r lifecycle::badge("deprecated")`
 #'
 #' This function checks the input data for implicit and/or explicit missingness
 #' and checks if the user specified `na = "missing"` in [obs_opts()].
diff --git a/R/create.R b/R/create.R
index 1309fd508..2ffcfaf62 100644
--- a/R/create.R
+++ b/R/create.R
@@ -77,6 +77,10 @@ create_clean_reported_cases <- function(data, horizon = 0,
   }
   reported_cases[is.na(confirm), confirm := fill]
   reported_cases[, "average_7_day" := NULL]
+  ## set accumulate to FALSE in added rows
+  if ("accumulate" %in% colnames(reported_cases)) {
+    reported_cases[is.na(accumulate), accumulate := FALSE]
+  }
   return(reported_cases)
 }
 
@@ -484,7 +488,6 @@ create_obs_model <- function(obs = obs_opts(), dates) {
     obs_scale = as.integer(obs$scale$sd > 0 || obs$scale$mean != 1),
     obs_scale_mean = obs$scale$mean,
     obs_scale_sd = obs$scale$sd,
-    accumulate = obs$accumulate,
     likelihood = as.numeric(obs$likelihood),
     return_likelihood = as.numeric(obs$return_likelihood)
   )
@@ -525,14 +528,16 @@ create_obs_model <- function(obs = obs_opts(), dates) {
 create_stan_data <- function(data, seeding_time,
                              rt, gp, obs, horizon,
                              backcalc, shifted_cases) {
-
   cases <- data[(seeding_time + 1):(.N - horizon)]
   complete_cases <- create_complete_cases(cases)
   cases <- cases$confirm
+  accumulate <- data[-(1:seeding_time)]$accumulate
 
   stan_data <- list(
     cases = complete_cases$confirm,
     cases_time = complete_cases$lookup,
+    any_accumulate = as.integer(any(accumulate)),
+    accumulate = as.integer(accumulate),
     lt = nrow(complete_cases),
     shifted_cases = shifted_cases,
     t = length(data$date),
diff --git a/R/estimate_infections.R b/R/estimate_infections.R
index 7af5b2f1a..8c1f457c9 100644
--- a/R/estimate_infections.R
+++ b/R/estimate_infections.R
@@ -17,8 +17,14 @@
 #' for an example of using `estimate_infections` within the `epinow` wrapper to
 #' estimate Rt for Covid-19 in a country from the ECDC data source.
 #'
-#' @param data A `<data.frame>` of confirmed cases (confirm) by date
-#' (date). `confirm` must be numeric and `date` must be in date format.
+#' @param data A `<data.frame>` of confirmed cases (confirm) by date (date).
+#' `confirm` must be numeric and `date` must be in date format. Optionally
+#' this can also have a logical `accumulate` column which indicates whether
+#' data should be added to the next data point. This is useful when modelling
+#' e.g. weekly incidence data. See also the [fill_missing()] function which
+#' helps add the `accumulate` column with the desired properties when dealing
+#' with non-daily data. If any accumulation is done this happens after
+#' truncation as specified by the `truncation` argument.
 #'
 #' @param reported_cases Deprecated; use `data` instead.
 #'
@@ -176,10 +182,11 @@ estimate_infections <- function(data,
     data = dirty_reported_cases,
     cols_to_check = c("date", "confirm")
   )
+  # Fill missing dates
+  reported_cases <- default_fill_missing_obs(data, obs, "confirm")
   # Create clean and complete cases
-  # Order cases
   reported_cases <- create_clean_reported_cases(
-    data, horizon,
+    reported_cases, horizon,
     filter_leading_zeros = filter_leading_zeros,
     zero_threshold = zero_threshold
   )
@@ -197,9 +204,9 @@ estimate_infections <- function(data,
         min(reported_cases$date) - 1,
         by = "days"
       ),
-      confirm = 0, breakpoint = 0
+      confirm = 0, accumulate = FALSE, breakpoint = 0
     ),
-    reported_cases
+    reported_cases[, .(date, confirm, accumulate, breakpoint)]
   ))
 
   shifted_cases <- create_shifted_cases(
diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
index 1af53fbb5..9c24eafe2 100644
--- a/R/estimate_secondary.R
+++ b/R/estimate_secondary.R
@@ -26,7 +26,13 @@
 #' respectively on the log scale).
 #'
 #' @param data A `<data.frame>` containing the `date` of report and both
-#' `primary` and `secondary` reports.
+#' `primary` and `secondary` reports. Optionally this can also have a logical
+#' `accumulate` column which indicates whether data should be added to the
+#' next data point. This is useful when modelling e.g. weekly incidence data.
+#' See also the [fill_missing()] function which helps add the `accumulate`
+#' column with the desired properties when dealing with non-daily data. If any
+#' accumulation is done this happens after truncation as specified by the
+#' `truncation` argument.
 #'
 #' @param reports Deprecated; use `data` instead.
 #'
@@ -190,7 +196,10 @@ estimate_secondary <- function(data,
     data = reports,
     cols_to_check = c("date", "primary", "secondary")
   )
-  secondary_reports_dirty <- reports[, list(date, confirm = secondary)]
+  reports <- default_fill_missing_obs(reports, obs, "secondary")
+
+  secondary_reports_dirty <-
+    reports[, list(date, confirm = secondary, accumulate)]
   secondary_reports <- create_clean_reported_cases(
     secondary_reports_dirty,
     filter_leading_zeros = filter_leading_zeros,
@@ -225,7 +234,9 @@ estimate_secondary <- function(data,
     obs_time = complete_secondary[lookup > burn_in]$lookup - burn_in,
     lt = sum(complete_secondary$lookup > burn_in),
     burn_in = burn_in,
-    seeding_time = 0
+    seeding_time = 0,
+    any_accumulate = as.integer(any(reports$accumulate > 0)),
+    accumulate = as.integer(reports$accumulate)
   )
   # secondary model options
   stan_data <- c(stan_data, secondary)
diff --git a/R/estimate_truncation.R b/R/estimate_truncation.R
index 55efc164f..0601bd073 100644
--- a/R/estimate_truncation.R
+++ b/R/estimate_truncation.R
@@ -145,7 +145,7 @@ estimate_truncation <- function(data,
     )
   }
    # Validate inputs
-  walk(data, check_reports_valid, model = "estimate_truncation")
+  walk(data, check_reports_valid, model = "estimate_infections")
   assert_class(truncation, "dist_spec")
   assert_class(model, "stanfit", null.ok = TRUE)
   assert_numeric(CrIs, lower = 0, upper = 1)
diff --git a/R/opts.R b/R/opts.R
index 2627b14aa..d56a31a32 100644
--- a/R/opts.R
+++ b/R/opts.R
@@ -594,15 +594,7 @@ gp_opts <- function(basis_prop = 0.2,
 #'   scale) or a list with numeric elements mean (`mean`) and standard deviation
 #'   (`sd`) defining a normally distributed scaling factor. Defaults to 1, i.e.
 #'   no scaling.
-#' @param na Character. Options are "missing" (the default) and "accumulate".
-#'   This determines how NA values in the data are interpreted. If set to
-#'   "missing", any NA values in the observation data set will be interpreted as
-#'   missing and skipped in the likelihood. If set to "accumulate", modelled
-#'   observations will be accumulated and added to the next non-NA data point.
-#'   This can be used to model incidence data that is reported at less than
-#'   daily intervals. If set to "accumulate", the first data point is not
-#'   included in the likelihood but used only to reset modelled observations to
-#'   zero.
+#' @param na Deprecated; use the [fill_missing()] function instead
 #' @param likelihood Logical, defaults to `TRUE`. Should the likelihood be
 #'   included in the model.
 #' @param return_likelihood Logical, defaults to `FALSE`. Should the likelihood
@@ -631,6 +623,21 @@ obs_opts <- function(family = c("negbin", "poisson"),
                      return_likelihood = FALSE) {
   # NB: This has to be checked first before the na argument is touched anywhere.
   na_default_used <- missing(na)
+  if (!na_default_used) {
+    lifecycle::deprecate_warn(
+      "1.7.0",
+      "obs_opts(na)",
+      "fill_missing()",
+      details =  c(
+        paste0(
+          "If NA values are not to be treated as missing use the ",
+          "`fill_missing()` function instead."
+        ),
+        "This argument will be removed in the next release of EpiNow2."
+      )
+    )
+
+  }
   na <- arg_match(na)
   if (na == "accumulate") {
     #nolint start: duplicate_argument_linter
@@ -641,8 +648,8 @@ obs_opts <- function(family = c("negbin", "poisson"),
         "i" = "This means that the first data point is not included in the
       likelihood but used only to reset modelled observations to zero.",
         "i" = "{col_red('If the first data point should be included in the
-        likelihood this can be achieved by adding a data point of arbitrary
-        value before the first data point.')}"
+        likelihood this can be achieved by using the `fill_missing()` function
+        with a non-zero `initial_missing` argument.')}"
       ),
       .frequency = "regularly",
       .frequency_id = "obs_opts"
diff --git a/R/preprocessing.R b/R/preprocessing.R
new file mode 100644
index 000000000..a43171bad
--- /dev/null
+++ b/R/preprocessing.R
@@ -0,0 +1,179 @@
+##' Fill missing data in a data set to prepare it for use within the package
+##'
+##' @description `r lifecycle::badge("experimental")`
+##' This function ensures that all days between the first and last date in the
+##'   data are present. It adds an `accumulate` column that indicates whether
+##'   modelled observations should be accumulated onto a later data point.
+##'   point. This is useful for modelling data that is reported less frequently
+##'   than daily, e.g. weekly incidence data, as well as other reporting
+##'   artifacts such as delayed weekedn reporting. The function can also be used
+##'   to fill in missing observations with zeros.
+##'
+##' @param data Data frame with a `date` column. The other columns depend on the
+##'   model that the data are to be used, e.g. [estimate_infections()] or
+##'   [estimate_secondary()]. See the documentation there for the expected
+##'   format. The data must not already have an `accumulate` function, otherwise
+##'   the function will fail with an error.
+##' @param missing_dates Character. Options are "ignore" (the default),
+##'   "accumulate" and "zero". This determines how missing dates in the data are
+##'   interpreted.  If set to "ignore", any missing dates in the observation
+##'   data will be interpreted as missing and skipped in the likelihood. If set
+##'   to "accumulate", modelled observations on dates that are missing in the
+##'   data will be accumulated and added to the next non-missing data point.
+##'   This can be used to model incidence data that is reported less frequently
+##'   than daily. In that case, the first data point is not included in the
+##'   likelihood (unless `initial_accumulate` is set to a value greater than
+##'   one) but used only to reset modelled observations to zero. If "zero" then
+##'   all observations on missing dates will be assumed to be of value 0.
+##' @param missing_obs Character. How to process dates that exist in the data
+##'   but have observations with NA values. The options available are the same
+##'   ones as for the `missing_dates` argument.
+##' @param initial_accumulate Integer. The number of initial dates to accumulate
+##'   if `missing_dates` or `missing_obs` is set to `"accumulate"`. This
+##'   argument needs ot have a minimum of 1. If it is set to 1 then no
+##'   accumulation is happening on the first data point. If it is greater than 1
+##'   then dates are added to the beginning of the data set to get be able to
+##'   have a sufficient number of modelled observations accumulated onto the
+##'   first data point. For modelling weekly incidence data this should be set
+##'   to 7. If accumulating and the first data point is not NA and this is
+##'   argument is not set, then that data point will be removed with a warning.
+##' @param obs_column Character (default: "confirm"). If given, only the column
+##'   specified here will be used for checking missingness. This is useful if
+##'   using a data set that has multiple columns of hwich one of them
+##'   corresponds to observations that are to be processed here.
+##' @param by Character vector. Name(s) of any additional column(s) where
+##'   missing data should be processed separately for each value in the column.
+##'   This is useful when using data representing e.g. multiple geographies. If
+##'   NULL (default) no such grouping is done.
+##' @return a data.table with an `accumulate` column that indicates whether
+##'   values are accumulated (see the documentation of the `data` argument in
+##'   [estimate_infections()])
+##' @importFrom rlang arg_match
+##' @importFrom data.table as.data.table CJ
+##' @importFrom checkmate assert_data_frame assert_names assert_date
+##'   assert_character assert_integerish
+##' @export
+##' @examples
+##' cases <- data.table::copy(example_confirmed)
+##' ## calculate weekly sum
+##' cases[, confirm := data.table::frollsum(confirm, 7)]
+##' ## limit to dates once a week
+##' cases <- cases[seq(7, nrow(cases), 7)]
+##' ## set the second observation to missing
+##' cases[2, confirm := NA]
+##' ## fill missing data
+##' fill_missing(cases, missing_dates = "accumulate", initial_accumulate = 7)
+fill_missing <- function(data,
+                         missing_dates = c("ignore", "accumulate", "zero"),
+                         missing_obs = c("ignore", "accumulate", "zero"),
+                         initial_accumulate,
+                         obs_column = "confirm",
+                         by = NULL) {
+  assert_data_frame(data)
+  assert_character(missing_dates)
+  assert_character(missing_obs)
+  assert_character(obs_column)
+  assert_character(by, null.ok = TRUE)
+  if (!missing(initial_accumulate)) {
+    assert_integerish(initial_accumulate, lower = 1)
+  }
+  assert_names(
+    colnames(data),
+    must.include = c("date", by, obs_column),
+    disjunct.from = "accumulate"
+  )
+  assert_date(data$date, any.missing = FALSE)
+
+  data <- as.data.table(data)
+
+  missing_dates <- arg_match(missing_dates)
+  missing_obs <- arg_match(missing_obs)
+
+  ## first, processing missing dates
+  initial_add <- ifelse(missing(initial_accumulate), 1, initial_accumulate)
+
+  cols <- list(
+    date = seq(min(data$date) - initial_add + 1, max(data$date), by = "day")
+  )
+  if (!is.null(by))  {
+    for (by_col in by) {
+      cols[[by_col]] <- unique(data[[by_col]])
+    }
+  }
+
+  complete <- do.call(CJ, cols)
+
+  ## mark dates that are present in the data
+  data[, ..present := TRUE]
+  data <- merge(data, complete, by = c("date", by), all.y = TRUE)
+
+  data[, accumulate := FALSE]
+  for (col in obs_column) {
+    if (missing_dates == "accumulate") {
+      data[is.na(..present), accumulate := TRUE]
+    } else if (missing_dates == "zero") {
+      data[is.na(..present), paste(col) := 0]
+    }
+  }
+  if (missing(initial_accumulate) &&
+      isFALSE(data$accumulate[1]) &&
+      any(data$accumulate)) {
+    #nolint start: duplicate_argument_linter
+    cli_warn(
+      c(
+        "!" =
+          "Initial data point not marked as accumulated but some others are.",
+        "i" =
+          "This means that the first data point will not be included in the
+           likelihood.",
+        "i" =
+          "To change this behaviour, set `initial_accumulate`
+           (including setting it to 1 if the first data point is to be taken
+            as is, i.e. as the first daily data point)."
+      )
+    )
+    #nolint end
+    data <- data[date > min(date)]
+  }
+
+  ## second, processing missing observations
+  for (col in obs_column) {
+    if (missing_obs == "accumulate") {
+      data[is.na(get(col)) & !is.na(..present), accumulate := TRUE]
+    } else if (missing_obs == "zero") {
+      data[is.na(get(col)) & !is.na(..present), paste(col) := 0]
+    }
+  }
+
+  data[, ..present := NULL]
+
+  return(data[])
+}
+
+##' Temporary function to support the transition to full support of missing
+##' data.
+##'
+##' @description `r lifecycle::badge("deprecated")`
+##'
+##' @inheritParams create_stan_data
+##' @inheritParams fill_missing
+##' @return data set with missing dates filled in as na values
+##' @keywords internal
+default_fill_missing_obs <- function(data, obs, obs_column) {
+  if (!("accumulate" %in% colnames(data))) {
+    data_rows <- nrow(data)
+    data <- fill_missing(data = data, obs_column = obs_column)
+    if (nrow(data) > data_rows && !obs$accumulate) {
+      #nolint start: duplicate_argument_linter
+      cli_warn(
+        "!" = "Data contains missing dates.",
+        "i" = "Missing dates are interpreted as truly missing data.
+        In the future, this behaviour will be deprecated and the package
+        functions will expect complete data.",
+        "i" = "Complete data can be created using the `fill_missing` function."
+      )
+      #nolint end
+    }
+  }
+  return(data)
+}
diff --git a/R/utilities.R b/R/utilities.R
index 91867b062..5fb413fc5 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -441,6 +441,7 @@ globalVariables(
     "central_lower", "central_upper", "mean_sd", "sd_sd", "average_7_day",
     "..lowers", "..upper_CrI", "..uppers", "timing", "dataset", "last_confirm",
     "report_date", "secondary", "id", "conv", "meanlog", "primary", "scaled",
-    "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution"
+    "scaling", "sdlog", "lookup", "new_draw", ".draw", "p", "distribution",
+    "accumulate", "..present"
   )
 )
diff --git a/inst/dev/accumulation.Rmd b/inst/dev/accumulation.Rmd
new file mode 100644
index 000000000..cb4c14f15
--- /dev/null
+++ b/inst/dev/accumulation.Rmd
@@ -0,0 +1,63 @@
+---
+output: github_document
+---
+
+```{r setup, echo = FALSE}
+library("knitr")
+```
+
+# Supporting missing data
+
+We want to support reporting patterns that include *accumulation* (i.e. batch reporting of data from multiple dates, for example weekly) and *missingness* (dates which are lacking reports) in incidence and prevalence data.
+
+## Proposed interface
+
+### Missing data
+
+Any dates between the minimum date and maximum date in the data that is either absent, or present with an `NA` value (currently called `confirm`) is interpreted as missing and ignored in the likelihood.
+All other data points are used in the likelihood.
+This matches the current default behaviour, introduced in version 1.5.0.
+
+### Accumulation
+
+If instead modelled values on these days should be accumulated onto the next reporting date, the passed `data.frame` must have an additional logical column, `accumulate`.
+If `accumulate` is TRUE then the modelled value of the observed latent variable on that day is added to any existing accumulation and stored for later observation.
+If `accumulate` is FALSE the modelled value is added to any stored accumulated variables before potentially being used in the likelihood on that day (if not `NA`).
+Subsequently the stored accumulated variable is reset to zero.
+
+### Example
+
+```{r results = "asis"}
+df <- data.frame(
+  date = as.Date(c("2024-10-23", "2024-10-24", "2024-10-26", "2024-10-27", "2024-10-28")),
+  confirm = c(NA, 10, NA, NA, 17),
+  accumulate = c(TRUE, FALSE, TRUE, TRUE, FALSE)
+)
+df |>
+  kable(align = "l")
+```
+
+The likelihood is evaluated on two days, 24 October and 28 October.
+On 24 October the data (10) is compared to (modelled value on 23 October) + (modelled value on 24 October).
+On 28 October the data (17) is compared to (modelled value on 26 October) + (modelled value on 27 October) + (modelled value on 28 October).
+
+## Helper functions
+
+A helper function, `fill_missing()` can be used to convert weekly data to the required format, i.e.
+
+```{r, results = "asis"}
+df <- data.frame(
+  date = as.Date(c("2024-10-24", "2024-10-31", "2024-11-07")),
+  confirm = c(10, 17, 11)
+)
+df |>
+  kable(align = "l")
+```
+
+can be converted with `fill_missing(missing_dates = "accumulate", initial_accumulate = 7)` to
+
+```{r, results = "asis"}
+df |>
+  fill_missing(missing_dates = "accumulate", initial_accumulate = 7) |>
+  kable(align = "l")
+```
diff --git a/inst/dev/accumulation.md b/inst/dev/accumulation.md
index b10c5a108..63956a22c 100644
--- a/inst/dev/accumulation.md
+++ b/inst/dev/accumulation.md
@@ -1,50 +1,89 @@
+
 # Supporting missing data
 
-We want to support reporting patterns that include *accumulation* (i.e. batch reporting of data from multiple dates, for example weekly) and *missingness* (dates which are lacking reports) in incidence and prevalence data.
+We want to support reporting patterns that include *accumulation*
+(i.e. batch reporting of data from multiple dates, for example weekly)
+and *missingness* (dates which are lacking reports) in incidence and
+prevalence data.
 
 ## Proposed interface
 
 ### Missing data
 
-Any dates between the minimum date and maximum date in the data that is either absent, or present with an `NA` value (currently called `confirm`) is interpreted as missing and ignored in the likelihood.
-All other data points are used in the likelihood.
-This matches the current default behaviour, introduced in version 1.5.0.
+Any dates between the minimum date and maximum date in the data that is
+either absent, or present with an `NA` value (currently called
+`confirm`) is interpreted as missing and ignored in the likelihood. All
+other data points are used in the likelihood. This matches the current
+default behaviour, introduced in version 1.5.0.
 
 ### Accumulation
 
-If instead modelled values on these days should be accumulated onto the next reporting date, the passed `data.frame` must have an additional logical column, `accumulate`.
-If `accumulate` is TRUE then the modelled value of the observed latent variable on that day is added to any existing accumulation and stored for later observation.
-If `accumulate` is FALSE the modelled value is added to any stored accumulated variables before potentially being used in the likelihood on that day (if not `NA`).
-Subsequently the stored accumulated variable is reset to zero.
+If instead modelled values on these days should be accumulated onto the
+next reporting date, the passed `data.frame` must have an additional
+logical column, `accumulate`. If `accumulate` is TRUE then the modelled
+value of the observed latent variable on that day is added to any
+existing accumulation and stored for later observation. If `accumulate`
+is FALSE the modelled value is added to any stored accumulated variables
+before potentially being used in the likelihood on that day (if not
+`NA`). Subsequently the stored accumulated variable is reset to zero.
 
 ### Example
 
+``` r
+df <- data.frame(
+  date = as.Date(c("2024-10-23", "2024-10-24", "2024-10-26", "2024-10-27", "2024-10-28")),
+  confirm = c(NA, 10, NA, NA, 17),
+  accumulate = c(TRUE, FALSE, TRUE, TRUE, FALSE)
+)
+df |>
+  kable(align = "l")
+```
+
 | date       | confirm | accumulate |
-|------------|---------|------------|
+|:-----------|:--------|:-----------|
 | 2024-10-23 | NA      | TRUE       |
 | 2024-10-24 | 10      | FALSE      |
 | 2024-10-26 | NA      | TRUE       |
 | 2024-10-27 | NA      | TRUE       |
 | 2024-10-28 | 17      | FALSE      |
 
-The likelihood is evaluated on two days, 24 October and 28 October.
-On 24 October the data (10) is compared to (modelled value on 23 October) + (modelled value on 24 October).
-On 28 October the data (17) is compared to (modelled value on 26 October) + (modelled value on 27 October) + (modelled value on 28 October).
+The likelihood is evaluated on two days, 24 October and 28 October. On
+24 October the data (10) is compared to (modelled value on 23 October) +
+(modelled value on 24 October). On 28 October the data (17) is compared
+to (modelled value on 26 October) + (modelled value on 27 October) +
+(modelled value on 28 October).
 
 ## Helper functions
 
-A helper function, `fill_missing_dates()` can be used to convert weekly data to the required format, i.e.
+A helper function, `fill_missing()` can be used to convert weekly data
+to the required format, i.e.
+
+``` r
+df <- data.frame(
+  date = as.Date(c("2024-10-24", "2024-10-31", "2024-11-07")),
+  confirm = c(10, 17, 11)
+)
+df |>
+  kable(align = "l")
+```
 
 | date       | confirm |
-|------------|---------|
+|:-----------|:--------|
 | 2024-10-24 | 10      |
 | 2024-10-31 | 17      |
 | 2024-11-07 | 11      |
 
-can be converted with `fill_missing_dates(missing = "accumulate", initial = 7)` to
+can be converted with
+`fill_missing(missing_dates = "accumulate", initial_accumulate = 7)` to
+
+``` r
+df |>
+  fill_missing(missing_dates = "accumulate", initial_accumulate = 7) |>
+  kable(align = "l" )
+```
 
 | date       | confirm | accumulate |
-|------------|---------|------------|
+|:-----------|:--------|:-----------|
 | 2024-10-18 | NA      | TRUE       |
 | 2024-10-19 | NA      | TRUE       |
 | 2024-10-20 | NA      | TRUE       |
@@ -65,4 +104,4 @@ can be converted with `fill_missing_dates(missing = "accumulate", initial = 7)`
 | 2024-11-04 | NA      | TRUE       |
 | 2024-11-05 | NA      | TRUE       |
 | 2024-11-06 | NA      | TRUE       |
-| 2024-11-07 | 11      | TRUE       |
+| 2024-11-07 | 11      | FALSE      |
diff --git a/inst/stan/data/observation_model.stan b/inst/stan/data/observation_model.stan
index 671004ef4..0ce9ef3bb 100644
--- a/inst/stan/data/observation_model.stan
+++ b/inst/stan/data/observation_model.stan
@@ -9,6 +9,5 @@
   real obs_weight;                   // weight given to observation in log density
   int likelihood;                    // Should the likelihood be included in the model
   int return_likelihood; // Should the likehood be returned by the model
-  int accumulate; // Should missing values be accumulated
   int<lower = 0> trunc_id; // id of truncation
   int<lower = 0> delay_id; // id of delay
diff --git a/inst/stan/data/observations.stan b/inst/stan/data/observations.stan
index 11fe8463c..1b41f54d6 100644
--- a/inst/stan/data/observations.stan
+++ b/inst/stan/data/observations.stan
@@ -5,4 +5,6 @@
   int future_time;                                  // time in future for Rt
   array[lt] int<lower = 0> cases; // observed cases
   array[lt] int cases_time; // time of observed cases
+  int any_accumulate; // Should any missing values be accumulated?
+  array[t - seeding_time] int accumulate;  // Should missing values be accumulated (by time)
   vector<lower = 0>[t] shifted_cases;               // prior infections (for backcalculation)
diff --git a/inst/stan/estimate_infections.stan b/inst/stan/estimate_infections.stan
index 303b6b0e4..7d7ea1018 100644
--- a/inst/stan/estimate_infections.stan
+++ b/inst/stan/estimate_infections.stan
@@ -156,6 +156,13 @@ transformed parameters {
   } else {
     obs_reports = reports[1:ot];
   }
+
+  // accumulate reports
+  if (any_accumulate) {
+     profile("accumulate") {
+       obs_reports = accumulate_reports(obs_reports, accumulate);
+    }
+  }
 }
 
 model {
@@ -199,7 +206,7 @@ model {
     profile("report lp") {
       report_lp(
         cases, cases_time, obs_reports, rep_phi, phi_mean, phi_sd, model_type,
-        obs_weight, accumulate
+        obs_weight
       );
     }
   }
diff --git a/inst/stan/estimate_secondary.stan b/inst/stan/estimate_secondary.stan
index 8f5081fb0..15836243c 100644
--- a/inst/stan/estimate_secondary.stan
+++ b/inst/stan/estimate_secondary.stan
@@ -13,6 +13,8 @@ data {
   array[lt] int obs_time;             // observed secondary data
   vector[t] primary;                 // observed primary data
   int burn_in;                       // time period to not use for fitting
+  int any_accumulate; // Should any missing values be accumulated?
+  array[t] int accumulate;  // Should missing values be accumulated (by time)
 #include data/secondary.stan
 #include data/delays.stan
 #include data/observation_model.stan
@@ -70,6 +72,7 @@ transformed parameters {
  if (week_effect > 1) {
    secondary = day_of_week_effect(secondary, day_of_week, day_of_week_simplex);
  }
+
  // truncate near time cases to observed reports
  if (trunc_id) {
     vector[delay_type_max[trunc_id]] trunc_rev_cmf = get_delay_rev_pmf(
@@ -80,6 +83,13 @@ transformed parameters {
     );
     secondary = truncate_obs(secondary, trunc_rev_cmf, 0);
  }
+
+ // accumulate reports
+ if (any_accumulate) {
+   profile("accumulate") {
+     secondary = accumulate_reports(secondary, accumulate);
+   }
+ }
 }
 
 model {
@@ -97,7 +107,7 @@ model {
   if (likelihood) {
     report_lp(
       obs[(burn_in + 1):t][obs_time], obs_time, secondary[(burn_in + 1):t],
-      rep_phi, phi_mean, phi_sd, model_type, 1, accumulate
+      rep_phi, phi_mean, phi_sd, model_type, 1
     );
   }
 }
diff --git a/inst/stan/functions/observation_model.stan b/inst/stan/functions/observation_model.stan
index aa10ccfda..9a96d958e 100644
--- a/inst/stan/functions/observation_model.stan
+++ b/inst/stan/functions/observation_model.stan
@@ -102,52 +102,54 @@ void truncation_lp(array[] real truncation_mean, array[] real truncation_sd,
  * @param phi_sd Real value for standard deviation of reporting overdispersion prior.
  * @param model_type Integer indicating the model type (0 for Poisson, >0 for Negative Binomial).
  * @param weight Real value for weighting the log density contribution.
- * @param accumulate Integer flag indicating whether to accumulate reports (1) or not (0).
+ * @param accumulate Array of integers indicating, for each time point, whether
+ * to accumulate reports (1) or not (0).
  */
 void report_lp(array[] int cases, array[] int cases_time, vector reports,
                array[] real rep_phi, real phi_mean, real phi_sd,
-               int model_type, real weight, int accumulate) {
-  int n = num_elements(cases_time) - accumulate; // number of observations
-  vector[n] obs_reports; // reports at observation time
-  array[n] int obs_cases; // observed cases at observation time
-  if (accumulate) {
-    int t = num_elements(reports);
-    int i = 0;
-    int current_obs = 0;
-    obs_reports = rep_vector(0, n);
-    while (i <= t && current_obs <= n) {
-      if (current_obs > 0) { // first observation gets ignored when accumulating
-        obs_reports[current_obs] += reports[i];
-      }
-      if (i == cases_time[current_obs + 1]) {
-        current_obs += 1;
-      }
-      i += 1;
-    }
-    obs_cases = cases[2:(n + 1)];
-  } else {
-    obs_reports = reports[cases_time];
-    obs_cases = cases;
-  }
+               int model_type, real weight) {
+  int n = num_elements(cases_time); // number of observations
+  vector[n] obs_reports = reports[cases_time]; // reports at observation time
   if (model_type) {
     real dispersion = inv_square(phi_sd > 0 ? rep_phi[model_type] : phi_mean);
     if (phi_sd > 0) {
       rep_phi[model_type] ~ normal(phi_mean, phi_sd) T[0,];
     }
     if (weight == 1) {
-      obs_cases ~ neg_binomial_2(obs_reports, dispersion);
+      cases ~ neg_binomial_2(obs_reports, dispersion);
     } else {
       target += neg_binomial_2_lpmf(
-        obs_cases | obs_reports, dispersion
+        cases | obs_reports, dispersion
       ) * weight;
     }
   } else {
     if (weight == 1) {
-      obs_cases ~ poisson(obs_reports);
+      cases ~ poisson(obs_reports);
     } else {
-      target += poisson_lpmf(obs_cases | obs_reports) * weight;
+      target += poisson_lpmf(cases | obs_reports) * weight;
+    }
+  }
+}
+
+/**
+ * Accumulate reports according to a binary flag at each time point
+ *
+ * This function accumulates reports according to a binary flag at each time point.
+ *
+ * @param reports Vector of expected reports.
+ * @param accumulate Array of integers indicating, for each time point, whether to accumulate or not.
+ *
+ * @return A vector of accumulated reports.
+ */
+vector accumulate_reports(vector reports, array[] int accumulate) {
+  int ot_h = num_elements(reports); // number of reporting time points modelled
+  vector[ot_h] accumulated_reports = reports;
+  for (i in 1:(ot_h - 1)) {
+    if (accumulate[i]) { // first observation gets ignored when accumulating
+      accumulated_reports[i + 1] += accumulated_reports[i];
     }
   }
+  return accumulated_reports;
 }
 
 /**
diff --git a/man/check_na_setting_against_data.Rd b/man/check_na_setting_against_data.Rd
index 54631c09c..0fc510489 100644
--- a/man/check_na_setting_against_data.Rd
+++ b/man/check_na_setting_against_data.Rd
@@ -17,7 +17,7 @@ check_na_setting_against_data(data, cols_to_check, obs)
 \code{\link[=obs_opts]{obs_opts()}}
 }
 \description{
-\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
 
 This function checks the input data for implicit and/or explicit missingness
 and checks if the user specified \code{na = "missing"} in \code{\link[=obs_opts]{obs_opts()}}.
diff --git a/man/check_reports_valid.Rd b/man/check_reports_valid.Rd
index de18dc20d..03e19a540 100644
--- a/man/check_reports_valid.Rd
+++ b/man/check_reports_valid.Rd
@@ -6,7 +6,7 @@
 \usage{
 check_reports_valid(
   data,
-  model = c("estimate_infections", "estimate_truncation", "estimate_secondary")
+  model = c("estimate_infections", "estimate_secondary")
 )
 }
 \arguments{
diff --git a/man/create_clean_reported_cases.Rd b/man/create_clean_reported_cases.Rd
index f782e588e..949880c81 100644
--- a/man/create_clean_reported_cases.Rd
+++ b/man/create_clean_reported_cases.Rd
@@ -14,8 +14,14 @@ create_clean_reported_cases(
 )
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 
 \item{horizon}{Numeric, defaults to 7. Number of days into the future to
 forecast.}
diff --git a/man/create_shifted_cases.Rd b/man/create_shifted_cases.Rd
index 1a1dff5a6..a9d6fc048 100644
--- a/man/create_shifted_cases.Rd
+++ b/man/create_shifted_cases.Rd
@@ -7,8 +7,14 @@
 create_shifted_cases(data, shift, smoothing_window, horizon)
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 
 \item{shift}{Numeric, mean delay shift to apply.}
 
diff --git a/man/create_stan_data.Rd b/man/create_stan_data.Rd
index ed80f379e..383f0844e 100644
--- a/man/create_stan_data.Rd
+++ b/man/create_stan_data.Rd
@@ -16,8 +16,14 @@ create_stan_data(
 )
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 
 \item{seeding_time}{Integer; seeding time, usually obtained using
 \code{\link[=get_seeding_time]{get_seeding_time()}}.}
diff --git a/man/default_fill_missing_obs.Rd b/man/default_fill_missing_obs.Rd
new file mode 100644
index 000000000..81e6f60b5
--- /dev/null
+++ b/man/default_fill_missing_obs.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/preprocessing.R
+\name{default_fill_missing_obs}
+\alias{default_fill_missing_obs}
+\title{Temporary function to support the transition to full support of missing
+data.}
+\usage{
+default_fill_missing_obs(data, obs, obs_column)
+}
+\arguments{
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
+
+\item{obs}{A list of options as generated by \code{\link[=obs_opts]{obs_opts()}} defining the
+observation model. Defaults to \code{\link[=obs_opts]{obs_opts()}}.}
+
+\item{obs_column}{Character (default: "confirm"). If given, only the column
+specified here will be used for checking missingness. This is useful if
+using a data set that has multiple columns of hwich one of them
+corresponds to observations that are to be processed here.}
+}
+\value{
+data set with missing dates filled in as na values
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
+}
+\keyword{internal}
diff --git a/man/epinow.Rd b/man/epinow.Rd
index 6046dea56..2654dfe0b 100644
--- a/man/epinow.Rd
+++ b/man/epinow.Rd
@@ -30,8 +30,14 @@ epinow(
 )
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 
 \item{generation_time}{A call to \code{\link[=gt_opts]{gt_opts()}} (or its alias
 \code{\link[=generation_time_opts]{generation_time_opts()}}) defining the generation time distribution used.
diff --git a/man/estimate_infections.Rd b/man/estimate_infections.Rd
index cab919f85..528a76edf 100644
--- a/man/estimate_infections.Rd
+++ b/man/estimate_infections.Rd
@@ -26,8 +26,14 @@ estimate_infections(
 )
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 
 \item{generation_time}{A call to \code{\link[=gt_opts]{gt_opts()}} (or its alias
 \code{\link[=generation_time_opts]{generation_time_opts()}}) defining the generation time distribution used.
diff --git a/man/estimate_secondary.Rd b/man/estimate_secondary.Rd
index 06ff2e848..b985c121c 100644
--- a/man/estimate_secondary.Rd
+++ b/man/estimate_secondary.Rd
@@ -26,7 +26,13 @@ estimate_secondary(
 }
 \arguments{
 \item{data}{A \verb{<data.frame>} containing the \code{date} of report and both
-\code{primary} and \code{secondary} reports.}
+\code{primary} and \code{secondary} reports. Optionally this can also have a logical
+\code{accumulate} column which indicates whether data should be added to the
+next data point. This is useful when modelling e.g. weekly incidence data.
+See also the \code{\link[=fill_missing]{fill_missing()}} function which helps add the \code{accumulate}
+column with the desired properties when dealing with non-daily data. If any
+accumulation is done this happens after truncation as specified by the
+\code{truncation} argument.}
 
 \item{secondary}{A call to \code{\link[=secondary_opts]{secondary_opts()}} or a list containing the
 following  binary variables: cumulative, historic, primary_hist_additive,
diff --git a/man/fill_missing.Rd b/man/fill_missing.Rd
new file mode 100644
index 000000000..2120c6ddf
--- /dev/null
+++ b/man/fill_missing.Rd
@@ -0,0 +1,84 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/preprocessing.R
+\name{fill_missing}
+\alias{fill_missing}
+\title{Fill missing data in a data set to prepare it for use within the package}
+\usage{
+fill_missing(
+  data,
+  missing_dates = c("ignore", "accumulate", "zero"),
+  missing_obs = c("ignore", "accumulate", "zero"),
+  initial_accumulate,
+  obs_column = "confirm",
+  by = NULL
+)
+}
+\arguments{
+\item{data}{Data frame with a \code{date} column. The other columns depend on the
+model that the data are to be used, e.g. \code{\link[=estimate_infections]{estimate_infections()}} or
+\code{\link[=estimate_secondary]{estimate_secondary()}}. See the documentation there for the expected
+format. The data must not already have an \code{accumulate} function, otherwise
+the function will fail with an error.}
+
+\item{missing_dates}{Character. Options are "ignore" (the default),
+"accumulate" and "zero". This determines how missing dates in the data are
+interpreted.  If set to "ignore", any missing dates in the observation
+data will be interpreted as missing and skipped in the likelihood. If set
+to "accumulate", modelled observations on dates that are missing in the
+data will be accumulated and added to the next non-missing data point.
+This can be used to model incidence data that is reported less frequently
+than daily. In that case, the first data point is not included in the
+likelihood (unless \code{initial_accumulate} is set to a value greater than
+one) but used only to reset modelled observations to zero. If "zero" then
+all observations on missing dates will be assumed to be of value 0.}
+
+\item{missing_obs}{Character. How to process dates that exist in the data
+but have observations with NA values. The options available are the same
+ones as for the \code{missing_dates} argument.}
+
+\item{initial_accumulate}{Integer. The number of initial dates to accumulate
+if \code{missing_dates} or \code{missing_obs} is set to \code{"accumulate"}. This
+argument needs ot have a minimum of 1. If it is set to 1 then no
+accumulation is happening on the first data point. If it is greater than 1
+then dates are added to the beginning of the data set to get be able to
+have a sufficient number of modelled observations accumulated onto the
+first data point. For modelling weekly incidence data this should be set
+to 7. If accumulating and the first data point is not NA and this is
+argument is not set, then that data point will be removed with a warning.}
+
+\item{obs_column}{Character (default: "confirm"). If given, only the column
+specified here will be used for checking missingness. This is useful if
+using a data set that has multiple columns of hwich one of them
+corresponds to observations that are to be processed here.}
+
+\item{by}{Character vector. Name(s) of any additional column(s) where
+missing data should be processed separately for each value in the column.
+This is useful when using data representing e.g. multiple geographies. If
+NULL (default) no such grouping is done.}
+}
+\value{
+a data.table with an \code{accumulate} column that indicates whether
+values are accumulated (see the documentation of the \code{data} argument in
+\code{\link[=estimate_infections]{estimate_infections()}})
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
+This function ensures that all days between the first and last date in the
+data are present. It adds an \code{accumulate} column that indicates whether
+modelled observations should be accumulated onto a later data point.
+point. This is useful for modelling data that is reported less frequently
+than daily, e.g. weekly incidence data, as well as other reporting
+artifacts such as delayed weekedn reporting. The function can also be used
+to fill in missing observations with zeros.
+}
+\examples{
+cases <- data.table::copy(example_confirmed)
+## calculate weekly sum
+cases[, confirm := data.table::frollsum(confirm, 7)]
+## limit to dates once a week
+cases <- cases[seq(7, nrow(cases), 7)]
+## set the second observation to missing
+cases[2, confirm := NA]
+## fill missing data
+fill_missing(cases, missing_dates = "accumulate", initial_accumulate = 7)
+}
diff --git a/man/obs_opts.Rd b/man/obs_opts.Rd
index 36f1d9ed2..432f1c3af 100644
--- a/man/obs_opts.Rd
+++ b/man/obs_opts.Rd
@@ -44,15 +44,7 @@ scale) or a list with numeric elements mean (\code{mean}) and standard deviation
 (\code{sd}) defining a normally distributed scaling factor. Defaults to 1, i.e.
 no scaling.}
 
-\item{na}{Character. Options are "missing" (the default) and "accumulate".
-This determines how NA values in the data are interpreted. If set to
-"missing", any NA values in the observation data set will be interpreted as
-missing and skipped in the likelihood. If set to "accumulate", modelled
-observations will be accumulated and added to the next non-NA data point.
-This can be used to model incidence data that is reported at less than
-daily intervals. If set to "accumulate", the first data point is not
-included in the likelihood but used only to reset modelled observations to
-zero.}
+\item{na}{Deprecated; use the \code{\link[=fill_missing]{fill_missing()}} function instead}
 
 \item{likelihood}{Logical, defaults to \code{TRUE}. Should the likelihood be
 included in the model.}
diff --git a/man/save_input.Rd b/man/save_input.Rd
index 2f1ae2747..7afff45dc 100644
--- a/man/save_input.Rd
+++ b/man/save_input.Rd
@@ -7,8 +7,14 @@
 save_input(data, target_folder)
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 
 \item{target_folder}{Character string specifying where to save results (will
 create if not present).}
diff --git a/man/setup_dt.Rd b/man/setup_dt.Rd
index e3811555c..32a7347d4 100644
--- a/man/setup_dt.Rd
+++ b/man/setup_dt.Rd
@@ -7,8 +7,14 @@
 setup_dt(data)
 }
 \arguments{
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 }
 \value{
 A data table
diff --git a/man/update_horizon.Rd b/man/update_horizon.Rd
index 387646e0b..a86477339 100644
--- a/man/update_horizon.Rd
+++ b/man/update_horizon.Rd
@@ -13,8 +13,14 @@ forecast.}
 \item{target_date}{Date, defaults to maximum found in the data if not
 specified.}
 
-\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date
-(date). \code{confirm} must be numeric and \code{date} must be in date format.}
+\item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date).
+\code{confirm} must be numeric and \code{date} must be in date format. Optionally
+this can also have a logical \code{accumulate} column which indicates whether
+data should be added to the next data point. This is useful when modelling
+e.g. weekly incidence data. See also the \code{\link[=fill_missing]{fill_missing()}} function which
+helps add the \code{accumulate} column with the desired properties when dealing
+with non-daily data. If any accumulation is done this happens after
+truncation as specified by the \code{truncation} argument.}
 }
 \value{
 Numeric forecast horizon adjusted for the users intention
diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R
index 14d207cdb..68c5ce6a4 100644
--- a/tests/testthat/test-checks.R
+++ b/tests/testthat/test-checks.R
@@ -205,7 +205,7 @@ test_that("check_na_setting_against_data works as expected", {
   # expect no message
   expect_no_message(
     check_na_setting_against_data(
-      obs = obs_opts(na = "missing"),
+      obs = suppressWarnings(obs_opts(na = "missing")),
       data = copy(example_confirmed)[c(1, 3), confirm := NA],
       cols_to_check = c("date", "confirm")
     )
diff --git a/tests/testthat/test-create_obs_model.R b/tests/testthat/test-create_obs_model.R
index b704ca54d..a9115e261 100644
--- a/tests/testthat/test-create_obs_model.R
+++ b/tests/testthat/test-create_obs_model.R
@@ -3,10 +3,10 @@ dates <- seq(as.Date("2020-03-15"), by = "days", length.out = 15)
 
 test_that("create_obs_model works with default settings", {
   obs <- create_obs_model(dates = dates)
-  expect_equal(length(obs), 12)
+  expect_equal(length(obs), 11)
   expect_equal(names(obs), c(
     "model_type", "phi_mean", "phi_sd", "week_effect", "obs_weight",
-    "obs_scale", "obs_scale_mean", "obs_scale_sd", "accumulate",
+    "obs_scale", "obs_scale_mean", "obs_scale_sd",
     "likelihood", "return_likelihood", "day_of_week"
   ))
   expect_equal(obs$model_type, 1)
diff --git a/tests/testthat/test-estimate_infections.R b/tests/testthat/test-estimate_infections.R
index e089f887b..549cfeef0 100644
--- a/tests/testthat/test-estimate_infections.R
+++ b/tests/testthat/test-estimate_infections.R
@@ -65,7 +65,10 @@ test_that("estimate_infections successfully returns estimates when accumulating
   reported_cases_weekly[, confirm := frollsum(confirm, 7)]
   reported_cases_weekly <-
     reported_cases_weekly[seq(7, nrow(reported_cases_weekly), 7)]
-  test_estimate_infections(reported_cases_weekly, obs = obs_opts(na = "accumulate"))
+  reported_cases_weekly <- fill_missing(
+    reported_cases_weekly, missing_obs = "accumulate", initial_accumulate = 7
+  )
+  test_estimate_infections(reported_cases_weekly)
 })
 
 test_that("estimate_infections successfully returns estimates using no delays", {
diff --git a/tests/testthat/test-estimate_secondary.R b/tests/testthat/test-estimate_secondary.R
index 9b46e7641..dfb9277a6 100644
--- a/tests/testthat/test-estimate_secondary.R
+++ b/tests/testthat/test-estimate_secondary.R
@@ -73,7 +73,7 @@ test_that("estimate_secondary can return values from simulated data and plot
   expect_equal(
     names(inc$predictions),
     c(
-      "date", "primary", "secondary", "mean", "se_mean", "sd",
+      "date", "primary", "secondary", "accumulate", "mean", "se_mean", "sd",
       "lower_90", "lower_50", "lower_20", "median", "upper_20", "upper_50", "upper_90"
     )
   )
@@ -115,6 +115,9 @@ test_that("estimate_secondary successfully returns estimates when accumulating t
   cases_weekly <- merge(
     cases[, list(date, primary)], secondary_weekly, by = "date", all.x = TRUE
   )
+  cases_weekly <- fill_missing(
+    cases_weekly, missing_obs = "accumulate", obs_column = "secondary"
+  )
   inc_weekly <- estimate_secondary(cases_weekly,
     delays = delay_opts(
       LogNormal(
@@ -122,7 +125,7 @@ test_that("estimate_secondary successfully returns estimates when accumulating t
       )
     ),
     obs = obs_opts(
-      scale = list(mean = 0.4, sd = 0.05), week_effect = FALSE, na = "accumulate"
+      scale = list(mean = 0.4, sd = 0.05), week_effect = FALSE
     ), verbose = FALSE
   )
   expect_true(is.list(inc_weekly$data))
diff --git a/tests/testthat/test-obs_opts.R b/tests/testthat/test-obs_opts.R
index a9cb17db0..a90b3e6fe 100644
--- a/tests/testthat/test-obs_opts.R
+++ b/tests/testthat/test-obs_opts.R
@@ -18,14 +18,12 @@ test_that("obs_opts returns expected messages", {
   # NB: We change the local setting here to throw the message on demand, rather
   # than every 8 hours, for the sake of multiple runs of the test within
   # 8 hours.
-  rlang::local_options(rlib_message_verbosity = "verbose")
-  expect_message(
-    obs_opts(na = "accumulate"),
-    "modelled values that correspond to NA values"
-  )
+  suppressMessages(expect_deprecated(obs_opts(na = "accumulate")))
 })
 
 test_that("obs_opts behaves as expected for user specified na treatment", {
 # If user explicitly specifies NA as missing, then don't throw message
-  expect_false(obs_opts(na = "missing")$na_as_missing_default_used)
-})
\ No newline at end of file
+  expect_false(
+    suppressWarnings(obs_opts(na = "missing"))$na_as_missing_default_used
+  )
+})
diff --git a/tests/testthat/test-preprocessing.R b/tests/testthat/test-preprocessing.R
new file mode 100644
index 000000000..1a9462d2b
--- /dev/null
+++ b/tests/testthat/test-preprocessing.R
@@ -0,0 +1,35 @@
+cases <- data.table::copy(example_confirmed)
+cases[, confirm := frollsum(confirm, 7)]
+cases <- cases[seq(7, nrow(cases), 7)]
+cases[2, confirm := NA]
+
+test_that("fill_missing works with NA and missing cases", {
+  filled <- fill_missing(
+    cases, missing_dates = "accumulate", initial_accumulate = 7
+  )
+  expect_equal(nrow(filled), nrow(cases) * 7)
+  expect_true(all(filled[!is.na(confirm), accumulate] == FALSE))
+  expect_equal(filled[1:7, accumulate], c(rep(TRUE, 6), FALSE))
+})
+
+test_that("fill_missing works with by columns", {
+  complete <- CJ(
+    date = cases$date,
+    country = c("A", "B"),
+    obs = c("Cases", "Deaths")
+  )
+  more_cases <- merge(cases, complete, by = "date")
+  filled <- fill_missing(
+    more_cases, missing_obs = "accumulate", initial_accumulate = 7,
+    by = c("country", "obs")
+  )
+  expect_equal(nrow(filled), nrow(more_cases) * 7)
+  expect_true(all(filled[!is.na(confirm), accumulate] == FALSE))
+})
+
+test_that("fill_missing warns about initial data points", {
+  expect_warning(
+    fill_missing(cases, missing_dates = "accumulate"),
+    "Initial data point not marked as accumulated"
+  )
+})