Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SparseM based approach to issue 134 #136

Merged
merged 14 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import(methods)
import(stats)
import(svd)
import(xtable)
importFrom(SparseM,backsolve)
importFrom(SparseM,chol)
importFrom(SparseM,is.matrix.csr)
importFrom(dplyr,mutate)
importFrom(graphics,abline)
Expand Down
2 changes: 1 addition & 1 deletion R/Design.R
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ alignDesignsByStrata <- function(a_stratification, design, post.align.transform
## Do this for the not-missing indicators as well as for the manifest variables.
covars <- cbind(Covs_w_touchups, 0+NM[,NMcolperm])
covars <- suppressWarnings(
slm.fit.csr.fixed(S, covars*non_null_record_wts)$residuals
slm_fit_csr(S, covars*non_null_record_wts)$residuals
)
colnames(covars) <- vars

Expand Down
123 changes: 97 additions & 26 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ formula.xbal <- function(x, ...) {
##' @return Result of \code{fun}.
withOptions <- function(optionsToChange, fun) {
oldOpts <- options()
on.exit(options(oldOpts))
options(optionsToChange)
tryCatch(fun(), finally = options(oldOpts))
# store the old values of the options, just for the options that were changed
oldOptValues <- oldOpts[names(optionsToChange)]
tryCatch(fun(), finally = options(oldOptValues))
}

## Our own version of these to handle the signif stars.
Expand Down Expand Up @@ -184,20 +185,20 @@ SparseMMFromFactor <- function(thefactor) {
)
}


## Variant of slm.fit.csr
##
## SparseM's slm.fit.csr has a bug for intercept only models
## (admittedly, these are generally a little silly to be done as a
## sparse matrix), but in order to avoid duplicate code, if
## everything is in a single strata, we use the intercept only model.
##
## @param x As slm.fit.csr
## @param y As slm.fit.csr
## @param ... As slm.fit.csr
## @return As slm.fit.csr
## @importFrom SparseM chol backsolve
slm.fit.csr.fixed <- function(x, y, ...) {
#' new version of slm.fit.csr
#'
#' SparseM's slm.fit.csr has a bug for intercept only models
#' (admittedly, these are generally a little silly to be done as a
#' sparse matrix), but in order to avoid duplicate code, if
#' everything is in a single strata, we use the intercept only model.
#' This implementation contains some workarounds to ensure that only
#' positive definite matrices are handed off to SparseM::chol()
#'
#' @param x As slm.fit.csr
#' @param y As slm.fit.csr
#' @param ... As slm.fit.csr
#' @return As slm.fit.csr
slm_fit_csr <- function(x, y, ...) {
if (is.matrix(y)) {
n <- nrow(y)
ycol <- ncol(y)
Expand All @@ -210,29 +211,99 @@ slm.fit.csr.fixed <- function(x, y, ...) {
stop("x and y don't match n")
}

fit <- .lm.fit(as.matrix(x), as.matrix(y))
coef <- fit$coefficients

## Note above: we no longer import chol or backsolve from SparseM in this function
# chol <- SparseM::chol(t(x) %*% x, ...)
# xy <- t(x) %*% y
# coef <- SparseM::backsolve(chol, xy)
temp_sol <- SparseM_solve(x, y, ...)
coef <- temp_sol[["coef"]]
chol <- temp_sol[["chol"]]

if (is.vector(coef)) {
coef <- matrix(coef, ncol = ycol, nrow = p)
}

fitted <- as.matrix(x %*% coef)
resid <- y - fitted
df <- n - p
list(
coefficients = coef,
# chol = chol,
chol = chol,
residuals = resid,
fitted = fitted, df.residual = df
)
}


#' Helper function to slm_fit_csr
#'
#' This function generates a matrix that can be used to reduce
#' the dimensions of x'x and xy such that positive definiteness is
#' ensured and more practically, that SparseM::chol will work
#'
#' @param x logical vector indicating which entries of x'x are zeroes.
#' @return SparseM matrix that will reduce the dimension of x'x and xy
#' @importFrom SparseM chol backsolve
create_SparseM_reduction_matrix <- function(zeroes)
{
if (all(zeroes))
{
stop("Diagonal of X'X is all zeroes. Unable to proceed.")
}

num_rows <- length(zeroes)
num_cols <- sum(!zeroes)
non_zero_indices <- which(!zeroes)

# Calculate the column indices for non-zero values
col_indices <- sapply(non_zero_indices,
function(i) i - sum(zeroes[1:i]))
values <- rep(1, num_cols)

# Define the row pointer array
ia <- cumsum(c(1, !zeroes))

dimension <- as.integer(c(num_rows, num_cols))
reducing_matrix <- new("matrix.csr", ra = values,
ja = as.integer(col_indices),
ia = as.integer(ia),
dimension = dimension)

return(reducing_matrix)
}


#' Helper function to slm_fit_csr
#'
#' This function performs some checks and takes action to
#' ensure positive definiteness of matrices passed to SparseM functions.
#'
#' @param x A slm.fit.csr
#' @param y A slm.fit.csr
#' @param ... A slm.fit.csr
#' @return list containing coefficients (vector or matrix) and Cholesky decomposition (of class matrix.csr.chol)
#' @importFrom SparseM chol backsolve
SparseM_solve <- function(x, y, ...)
{
xy <- t(x) %*% y
xprimex <- t(x) %*% x
diag.xx <- diag(xprimex)
zeroes <- diag.xx == 0
if (any(zeroes)) #check explicitly for zeroes here so we don't do matrix math without needing to
{ # this branch deals with issue 134
reducing_matrix <- create_SparseM_reduction_matrix(zeroes)
xpx.sub <- t(reducing_matrix) %*% xprimex %*% reducing_matrix
xy.sub <- t(reducing_matrix) %*% xy
chol.result <- SparseM::chol(xpx.sub, ...)
coef.nonzero <- SparseM::backsolve(chol.result, xy.sub)
num_rows <- length(zeroes)
coef.all <- numeric(num_rows)
coef.all[!zeroes] <- coef.nonzero
} else
{
chol.result <- SparseM::chol(xprimex, ...)
coef.all <- SparseM::backsolve(chol.result, xy)
}

return(list("coef" = coef.all,
"chol" = chol.result))
}

## slm.wfit with two fixes
##
## slm.wfit shares the intercept-only issue with slm.fit,
Expand All @@ -259,7 +330,7 @@ slm.wfit.csr <- function(x, y, weights, ...) {
w <- sqrt(weights)
wx <- as(w, "matrix.diag.csr") %*% x
wy <- y * w
fit <- slm.fit.csr.fixed(wx, wy, ...)
fit <- slm_fit_csr(wx, wy, ...)

fit$fitted <- as.matrix(x %*% fit$coef)
fit$residuals <- y - fit$fitted
Expand Down
22 changes: 22 additions & 0 deletions man/SparseM_solve.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/create_SparseM_reduction_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions man/slm_fit_csr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

71 changes: 70 additions & 1 deletion tests/testthat/test.utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("fitter for sparse designs handles intercept only design",
# we get to revert to SparseM:slm.fit.csr
lm.n <- lm.fit(matrix(1,4,1), quickY)

slm.n1 <- slm.fit.csr.fixed(nullfac.csr, quickY)
slm.n1 <- slm_fit_csr(nullfac.csr, quickY)

expect_equal(lm.n$fitted, as.vector(slm.n1$fitted))
expect_equal(lm.n$residuals, as.vector(slm.n1$residuals))
Expand All @@ -54,6 +54,75 @@ test_that("sparse design strat mean calculator returns 0 for strata w/o non-null
}
)

test_that("create_SparseM_reduction_matrix works as expected",
{
expect_true(require("SparseM"))
#let tl stand in for xprimex
tl <- diag(1, 5)
diag(tl)[c(1,2,4)] <- 0
zeroes <- diag(tl) == 0
not.zeroes <- !zeroes
stl <- tl[, not.zeroes]
sparse_matrix <- create_SparseM_reduction_matrix(zeroes)

expect_identical(stl, as.matrix(sparse_matrix))

tl <- diag(1, 5)
diag(tl)[c(2,4)] <- 0
zeroes <- diag(tl) == 0
not.zeroes <- !zeroes
stl <- tl[, not.zeroes]
sparse_matrix <- create_SparseM_reduction_matrix(zeroes)

expect_identical(stl, as.matrix(sparse_matrix))


tl <- diag(1, 5)
diag(tl)[c(2)] <- 0
zeroes <- diag(tl) == 0
not.zeroes <- !zeroes
stl <- tl[, not.zeroes]
sparse_matrix <- create_SparseM_reduction_matrix(zeroes)

expect_identical(stl, as.matrix(sparse_matrix))


tl <- diag(1, 5)
diag(tl)[c(5)] <- 0
zeroes <- diag(tl) == 0
not.zeroes <- !zeroes
stl <- tl[, not.zeroes]
sparse_matrix <- create_SparseM_reduction_matrix(zeroes)

expect_identical(stl, as.matrix(sparse_matrix))

tl <- diag(1, 5)
diag(tl)[c(1, 5)] <- 0
zeroes <- diag(tl) == 0
not.zeroes <- !zeroes
stl <- tl[, not.zeroes]
sparse_matrix <- create_SparseM_reduction_matrix(zeroes)

expect_identical(stl, as.matrix(sparse_matrix))

tl <- diag(1, 2)
diag(tl)[c(1)] <- 0
zeroes <- diag(tl) == 0
not.zeroes <- !zeroes
stl <- tl[, not.zeroes, drop = FALSE]
sparse_matrix <- create_SparseM_reduction_matrix(zeroes)

expect_identical(stl, as.matrix(sparse_matrix))

tl <- diag(1, 2)
diag(tl)[c(1, 2)] <- 0
zeroes <- diag(tl) == 0

expect_error(create_SparseM_reduction_matrix(zeroes))

}
)

test_that("Residuals from weighted regressions w/ sparse designs",
{
nullfac <- factor(rep("a", 4))
Expand Down
Loading