diff --git a/DESCRIPTION b/DESCRIPTION index c0345d5ec..ee591ae47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.11 +Version: 0.12.2.12 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -156,4 +156,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/see +Remotes: easystats/see, easystats/insight diff --git a/NEWS.md b/NEWS.md index fa8d7451d..7be029839 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,12 @@ * `check_dag()`, to check DAGs for correct adjustment sets. +## Changes + +* `check_heterogeneity_bias()` gets a `nested` argument. Furthermore, `by` can + specify more than one variable, meaning that nested or cross-classified + model designs can also be tested for heterogeneity bias. + # performance 0.12.2 Patch release, to ensure that _performance_ runs with older version of diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index 424bf8b5b..3c9b502ce 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -9,8 +9,24 @@ #' that should be checked. If `x` is a mixed model object, this argument #' will be ignored. #' @param by Character vector (or formula) with the name of the variable that -#' indicates the group- or cluster-ID. If `x` is a model object, this -#' argument will be ignored. +#' indicates the group- or cluster-ID. For cross-classified or nested designs, +#' `by` can also identify two or more variables as group- or cluster-IDs. If +#' the data is nested and should be treated as such, set `nested = TRUE`. Else, +#' if `by` defines two or more variables and `nested = FALSE`, a cross-classified +#' design is assumed. If `x` is a model object, this argument will be ignored. +#' +#' For nested designs, `by` can be: +#' - a character vector with the name of the variable that indicates the +#' levels, ordered from *highest* level to *lowest* (e.g. +#' `by = c("L4", "L3", "L2")`. +#' - a character vector with variable names in the format `by = "L4/L3/L2"`, +#' where the levels are separated by `/`. +#' +#' See also section _De-meaning for cross-classified designs_ and +#' _De-meaning for nested designs_ below. +#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`, +#' the data is treated as cross-classified. Only applies if `by` contains more +#' than one variable. #' @param group Deprecated. Use `by` instead. #' #' @seealso @@ -28,7 +44,7 @@ #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' @export -check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) { +check_heterogeneity_bias <- function(x, select = NULL, by = NULL, nested = FALSE, group = NULL) { insight::check_if_installed("datawizard", minimum_version = "0.12.0") ## TODO: deprecate later @@ -54,8 +70,14 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) my_data <- x } - unique_groups <- .n_unique(my_data[[by]]) - combinations <- expand.grid(select, by) + # for nested designs? + if (nested) { + # separate level-indicators with "/", as supported by datawizard + by <- paste(by, collapse = "/") + } + + # create all combinations that should be checked + combinations <- expand.grid(select, by[1]) result <- Map(function(predictor, id) { # demean predictor @@ -72,7 +94,7 @@ check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) } else { NULL } - }, as.character(combinations[[1]]), as.character(combinations[[2]])) + }, as.character(combinations[[1]]), by) out <- unlist(insight::compact_list(result), use.names = FALSE) diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 46f9f70a5..228d26510 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -4,7 +4,13 @@ \alias{check_heterogeneity_bias} \title{Check model predictor for heterogeneity bias} \usage{ -check_heterogeneity_bias(x, select = NULL, by = NULL, group = NULL) +check_heterogeneity_bias( + x, + select = NULL, + by = NULL, + nested = FALSE, + group = NULL +) } \arguments{ \item{x}{A data frame or a mixed model object.} @@ -14,8 +20,27 @@ that should be checked. If \code{x} is a mixed model object, this argument will be ignored.} \item{by}{Character vector (or formula) with the name of the variable that -indicates the group- or cluster-ID. If \code{x} is a model object, this -argument will be ignored.} +indicates the group- or cluster-ID. For cross-classified or nested designs, +\code{by} can also identify two or more variables as group- or cluster-IDs. If +the data is nested and should be treated as such, set \code{nested = TRUE}. Else, +if \code{by} defines two or more variables and \code{nested = FALSE}, a cross-classified +design is assumed. If \code{x} is a model object, this argument will be ignored. + +For nested designs, \code{by} can be: +\itemize{ +\item a character vector with the name of the variable that indicates the +levels, ordered from \emph{highest} level to \emph{lowest} (e.g. +\code{by = c("L4", "L3", "L2")}. +\item a character vector with variable names in the format \code{by = "L4/L3/L2"}, +where the levels are separated by \code{/}. +} + +See also section \emph{De-meaning for cross-classified designs} and +\emph{De-meaning for nested designs} below.} + +\item{nested}{Logical, if \code{TRUE}, the data is treated as nested. If \code{FALSE}, +the data is treated as cross-classified. Only applies if \code{by} contains more +than one variable.} \item{group}{Deprecated. Use \code{by} instead.} }