Skip to content

Commit

Permalink
Changed meaning of 1st col NotMissing slots, NM.* slots
Browse files Browse the repository at this point in the history
- "Intercept" column of NotMissing removed in favor of "_non-null record_"
- This column need not be all 1's
- If a term has no missingness, this is indicated in NM.terms,
  NM.Covariates by a 0.

cf. #85 , #89
  • Loading branch information
benthestatistician committed Dec 6, 2017
1 parent a34e6ab commit 674f354
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 39 deletions.
66 changes: 40 additions & 26 deletions R/Design.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ setClassUnion("Contrasts", c("list", "NULL"))
##' @slot term.labels labels of terms of the originating model formula
##' @slot contrasts Contrasts, a list of contrasts or NULL, as returned by `model.matrix.default`
##' @slot NotMissing Matrix of numbers in [0,1] with as many rows as Covariates but only one more col than there are distinct covariate missingness patterns (at least 1, nothing missing). First col is entirely T or 1, like an intercept.
##' @slot NM.Covariates integer look-up table mapping Covariates columns to columns of NotMissing. (If nothing missing for that column, this is 1.)
##' @slot NM.terms integer look-up table mapping term labels to columns of NotMissing
##' @slot NM.Covariates integer look-up table mapping Covariates columns to columns of NotMissing. (If nothing missing for that column, this is 0.)
##' @slot NM.terms integer look-up table mapping term labels to columns of NotMissing (0 means nothing missing in that column)
##' @keywords internal
##'
setClass("DesignMatrix",
Expand Down Expand Up @@ -109,31 +109,45 @@ design_matrix <- function(object, data = environment(object), remove.intercept=T
complete.cases(covariates[,whichcols])
})
names(ccs.by.term) <- term.labels
null.record <- rowSums(as.data.frame(ccs.by.term))==0
if (any(null.record)) #in this case make sure it's NA in each covariate column
null.record[null.record] <- apply(is.na(covariates[null.record,,drop=FALSE]), 1, all)

terms.with.missings <- !sapply(ccs.by.term, all)

ccs.by.term <- c(list('_non-null record_'=!null.record),
ccs.by.term)
terms.with.missings <- c(TRUE, # in order always to have same leading entry
terms.with.missings)

nm.covs <- integer(ncol(covariates))
nm.terms <- integer(length(terms.with.missings))

if (any(terms.with.missings)) {
nmcols <- ccs.by.term[terms.with.missings]
nmcols.dupes <- duplicated(nmcols, fromLast=FALSE)
if (any(nmcols.dupes))
{
nmcols <- ccs.by.term[terms.with.missings]
nmcols.dupes <- duplicated(nmcols, fromLast=FALSE)
if (any(nmcols.dupes))
{
nm.terms[terms.with.missings][!nmcols.dupes] <- 1L:sum(!nmcols.dupes)
nm.terms[terms.with.missings][nmcols.dupes] <-
match(nmcols[nmcols.dupes], nmcols[!nmcols.dupes])
} else nm.terms[terms.with.missings] <- 1L:length(nmcols)
## At this point nm.terms is a look-up table with an entry for
## _non-null record_ and for each provided term. Entries are
## 0 if there are no null records, or no missings on the term in question;
## otherwise if there are null records and/or term missings, the
## entry gives the column of the matrix `notmissing`, formed a few lines
## down, that describes the relevant missingness pattern.
} else nm.terms[terms.with.missings] <- 1L:length(nmcols)

notmissing <- as.data.frame(nmcols[!nmcols.dupes])
names(notmissing) <- names(ccs.by.term)[terms.with.missings][!nmcols.dupes]
## Now form the look-up table associating columns of the covariates matrix
## with columns of matrix `notmissing` describing relevant missingness patterns.
## Columns associated with terms on which there was no missingness get a 0 here.
nm.terms <- nm.terms[-1L]
nm.covs[assign>0] <- nm.terms[assign[assign>0]]
notmissing <- as.matrix(as.data.frame(nmcols[!nmcols.dupes]))
} else {
notmissing <- matrix(FALSE, nrow(covariates), 0)
}

notmissing <- cbind(matrix(TRUE, nrow(covariates), 1), notmissing)
colnames(notmissing)[1] <- "Intercept"
nm.covs <- nm.covs + 1L
nm.terms <- nm.terms + 1L

notmissing <- as.matrix(notmissing)

new("DesignMatrix",
Covariates=covariates,
OriginalVariables=assign,
Expand Down Expand Up @@ -550,7 +564,7 @@ designToDescriptives <- function(design, covariate.scaling = NULL) {
covars <- cbind(covars, design@NotMissing[, NMcolperm])
vars <- c(colnames(design@Covariates), paste0("(", colnames(design@NotMissing)[NMcolperm], ")") )
colnames(covars) <- vars
covars.nmcols <- c(design@NM.Covariates, rep(1L, k.NM ) )
covars.nmcols <- c(pmax(1L, design@NM.Covariates), rep(1L, k.NM ) )
stratifications <- colnames(design@StrataFrame)

Uweights <- design@UnitWeights * design@NotMissing
Expand Down Expand Up @@ -686,14 +700,14 @@ aggregateDesigns <- function(design) {
dim(unit.weights) <- NULL

Uweights.tall <- design@UnitWeights * design@NotMissing
Covariates <- as.matrix(t(C) %*% ifelse(Uweights.tall[,design@NM.Covariates, drop=FALSE],
Covariates <- as.matrix(t(C) %*% ifelse(Uweights.tall[,pmax(1L,design@NM.Covariates), drop=FALSE],
design@Covariates *
Uweights.tall[,design@NM.Covariates, drop=FALSE],
Uweights.tall[,pmax(1L,design@NM.Covariates), drop=FALSE],
0)
)
Uweights <- as.matrix(t(C) %*% Uweights.tall)
Covariates <- ifelse(Uweights[,design@NM.Covariates, drop=FALSE] > 0,
Covariates/Uweights[,design@NM.Covariates, drop=FALSE],
Covariates <- ifelse(Uweights[,pmax(1L,design@NM.Covariates), drop=FALSE] > 0,
Covariates/Uweights[,pmax(1L,design@NM.Covariates), drop=FALSE],
0)
NotMissing <- ifelse(matrix(unit.weights>0, nrow(Uweights), ncol(Uweights)),
Uweights/unit.weights, 0)
Expand Down Expand Up @@ -777,11 +791,11 @@ alignDesignsByStrata <- function(design, post.align.transform = NULL) {
strata <- names(design@StrataMatrices)

Ewts <- design@UnitWeights * design@NotMissing
Covs <- ifelse(design@NotMissing[, design@NM.Covariates, drop = FALSE],
Covs <- ifelse(design@NotMissing[, pmax(1L,design@NM.Covariates), drop = FALSE],
design@Covariates[, , drop = FALSE], 0)
k.Covs <- ncol(Covs)
NMcolperm <- if ( (k.NM <- ncol(design@NotMissing)) >1) c(2L:k.NM, 1L) else 1
Covs <- cbind(Covs, design@NotMissing[,NMcolperm])
Covs <- cbind(Covs, 0+design@NotMissing[,NMcolperm])
vars <- c(colnames(design@Covariates), paste0("(", colnames(design@NotMissing)[NMcolperm], ")") )
covars.nmcols <- c(design@NM.Covariates, rep(1L, k.NM ) )
origvars <- match(colnames(design@NotMissing), design@TermLabels, nomatch=0L)
Expand Down Expand Up @@ -817,7 +831,7 @@ alignDesignsByStrata <- function(design, post.align.transform = NULL) {
suppressWarnings( #throws singularity warning if covar is linear in S
slm.wfit.csr( # see note in ./utils.R on why we use our own
S, covars[,jj], #`slm.wfit.csr` instead of `SparseM::slm.wfit`
weights=ewts[, covars.nmcols[jj], drop = TRUE])$residuals
weights=ewts[, max(1L, covars.nmcols[jj]), drop = TRUE])$residuals
)
## A value that was missing might have received an odd residual.
## Although such values don't themselves contribute anything, they'll affect a
Expand All @@ -842,7 +856,7 @@ alignDesignsByStrata <- function(design, post.align.transform = NULL) {
for (jj in 1L:k.Covs) {
covars.Sctr[,jj] <- suppressWarnings(
slm.wfit.csr(S, covars.Sctr.new[,jj],
weights=ewts[, covars.nmcols[jj], drop = TRUE])$residuals
weights=ewts[, max(1L, covars.nmcols[jj]), drop = TRUE])$residuals
)
}
}
Expand Down
45 changes: 32 additions & 13 deletions tests/testthat/test.Design.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("Missingness gets passed through in Covariates, recorded in NotMissing
datmf <- model.frame(z ~ x1 + x2 + fac, dat, na.action = na.pass)
simple2 <- RItools:::design_matrix(z ~ x1 + x2 + fac, data = datmf)
expect_equivalent(ncol(simple2@NotMissing), 3)
expect_equivalent(colnames(simple2@NotMissing), c("Intercept", "x1", "fac"))
expect_equivalent(colnames(simple2@NotMissing), c("_non-null record_", "x1", "fac"))

})
test_that("lookup tables OK, even w/ complex & multi-column terms",{
Expand All @@ -50,18 +50,18 @@ test_that("lookup tables OK, even w/ complex & multi-column terms",{
datmf <- model.frame(z ~ x1 + x2 + fac, dat, na.action = na.pass)
simple2 <- RItools:::design_matrix(z ~ x1 + x2 + fac, data=datmf)
expect_equal(simple2@OriginalVariables, 1:3)
expect_equal(simple2@TermLabels, c("x1", "x2", "fac"))
expect_equal(simple2@NM.Covariates, 1+c(1,0,2))
expect_equal(simple2@NM.terms, 1+c(1,0,2))
expect_equal(simple2@TermLabels, c( "x1", "x2", "fac"))
expect_equal(simple2@NM.Covariates, c(2,0,3))
expect_equal(simple2@NM.terms, c(2,0,3))

## check that complex term don't spell trouble in themselves
datmf <- model.frame(z ~ x1 + cut(x2, c(0,3,6)) + fac, data = dat,
na.action = na.pass)
simple3 <- RItools:::design_matrix(z ~ x1 + cut(x2, c(0,3,6)) + fac, data = datmf)
expect_equal(simple3@OriginalVariables, 1:3)
expect_equal(simple3@TermLabels, c("x1", "cut(x2, c(0, 3, 6))", "fac"))
expect_equal(simple3@NM.Covariates, 1+c(1,0,2))
expect_equal(simple3@NM.terms, 1+c(1,0,2))
expect_equal(simple3@NM.Covariates, c(2,0,3))
expect_equal(simple3@NM.terms, c(2,0,3))

## now try a complex term that actually expands to multiple columns
datmf <- model.frame(z ~ x1 + cut(x2, c(0,3,6)) + fac, data = dat,
Expand All @@ -70,8 +70,8 @@ test_that("lookup tables OK, even w/ complex & multi-column terms",{
contrasts=list("cut(x2, c(0, 3, 6))"=diag(2)))
expect_equal(simple4@OriginalVariables, c(1,2,2,3))
expect_equal(simple4@TermLabels, c("x1", "cut(x2, c(0, 3, 6))", "fac"))
expect_equal(simple4@NM.Covariates, 1+c(1,0,0,2))
expect_equal(simple4@NM.terms, 1+c(1,0,2))
expect_equal(simple4@NM.Covariates, c(2,0,0,3))
expect_equal(simple4@NM.terms, c(2,0,3))

## now put NAs in the multi-column complex term
datmf <- model.frame(z ~ x2 + cut(x1, c(0,3,6)) + fac, data = dat,
Expand All @@ -80,8 +80,8 @@ test_that("lookup tables OK, even w/ complex & multi-column terms",{
contrasts=list("cut(x1, c(0, 3, 6))"=diag(2)))
expect_equal(simple5@OriginalVariables, c(1,2,2,3))
expect_equal(simple5@TermLabels, c("x2", "cut(x1, c(0, 3, 6))", "fac"))
expect_equal(simple5@NM.Covariates, 1+c(0,1,1,2))
expect_equal(simple5@NM.terms, 1+c(0,1,2))
expect_equal(simple5@NM.Covariates, c(0,2,2,3))
expect_equal(simple5@NM.terms, c(0,2,3))

})

Expand Down Expand Up @@ -121,12 +121,31 @@ test_that("Duplicated missingness patterns handled appropriately",{
simple3 <- RItools:::design_matrix(z ~ x1 + I(x1^2), data = datmf)
expect_equal(simple3@OriginalVariables, 1:2)
expect_equal(simple3@TermLabels, c("x1", "I(x1^2)"))
expect_equal(ncol(simple3@NotMissing), 2)
expect_equal(simple3@NM.Covariates, 1 + c(1,1))
expect_equal(simple3@NM.terms, 1 + c(1,1))
expect_equal(ncol(simple3@NotMissing), 1)
expect_equal(simple3@NM.Covariates, c(1,1))
expect_equal(simple3@NM.terms, c(1,1))

})

test_that("All-fields missingness |-> NotMissing col '_non-null record_'",{

dat <- data.frame(strat=rep(letters[1:2], c(3,2)),
clus=factor(c(1,1,2:4)),
z=c(TRUE, rep(c(TRUE, FALSE), 2)),
x1=c(rep(NA, 3), 1:2),
x2=c(1:5),
fac=factor(c(NA, rep(1:2,2)))
)
dat$'(weights)' <- 1
datmf <- model.frame(z ~ x1 + I(x1^2) + fac, data=dat, na.action = na.pass)
simple6 <- RItools:::design_matrix(z ~ x1 + I(x1^2) + fac, data = datmf)
expect_equal(simple6@OriginalVariables, 1:3)
expect_equal(simple6@TermLabels, c("x1", "I(x1^2)", "fac"))
expect_equal(colnames(simple6@NotMissing)[1], "_non-null record_")
expect_equal(simple6@NM.Covariates, c(2,2,1))
expect_equal(simple6@NM.terms, c(2,2,1))

} )

context("DesignOptions objects")

Expand Down

0 comments on commit 674f354

Please sign in to comment.