-
Notifications
You must be signed in to change notification settings - Fork 3
/
old_code.R
88 lines (76 loc) · 2.56 KB
/
old_code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
roysPval.PcevSingular <- function(pcevObj, shrink, distrib = c("Wishart", "TW"), index, ...) {
results <- estimatePcev(pcevObj, shrink)
n <- nrow(pcevObj$Y)
p <- ncol(pcevObj$Y)
q <- ncol(pcevObj$X)
d <- results$largestRoot
distrib <- match.arg(distrib)
if (distrib == 'Wishart'){
if(!requireNamespace("rootWishart")) {
stop("This requires the rootWishart package. You should use distrib = TW instead.")
}
resid <- results$residual
s <- q - 1
r <- n - s
trRessq <- sum(resid ^ 2)
trRes <- sum(diag(resid))
b <- (trRes/r/p)^2 / ((trRessq-trRes^2/r)/(r-1)/(r+2)/p)
pvalue <- 1-rootWishart::singleWishart(d*p*b, r, s, mprec = TRUE)
} else {
null_dist <- replicate(25, expr = {
tmp <- pcevObj
tmp$Y <- tmp$Y[sample(n), ]
tmpRes <- try(estimatePcev(tmp, shrink, index), silent=TRUE)
if(inherits(tmpRes, "try-error")) {
return(NA)
} else {
return(tmpRes$largestRoot)
}
})
# Use method of moments
muTW <- -1.2065335745820
sigmaTW <- sqrt(1.607781034581)
muS <- mean(log(null_dist))
sigmaS <- stats::sd(log(null_dist))
sigma1 <- sigmaS/sigmaTW
mu1 <- muS - sigma1 * muTW
TW <- (log(d) - mu1)/sigma1
pvalue <- RMTstat::ptw(TW, beta=1, lower.tail = FALSE, log.p = FALSE)
}
results$pvalue <- pvalue
return(results)
}
#Function to find the level in the factor variable X
#having the strongest effect on Y.
#Anova helper-function
#' @importFrom stats anova as.formula fitted.values lm
linregfn <- function(y, covars){
Dis <- covars
y <- unlist(y)
modelstring <- "y ~ covars"
fit <- lm(as.formula(modelstring))
resid <- fitted.values(fit)
sfit <- summary(fit)
anovF <- anova(fit)
nc1 <- nrow(sfit$coefficients)
nc2 <- nrow(anovF) - 1
pvals <- c(sfit$coefficients[2:nc1, 1], sfit$coefficients[2:nc1,4], anovF[1:nc2,5])
names(pvals) <- c(paste0('c_', rownames(sfit$coefficients)[2:nc1]), rownames(sfit$coefficients)[2:nc1], rownames(anovF)[1:nc2])
return(list(pvals = pvals,
resid = resid))
}
#function to find the most deviant subpopulation of samples
# @export
fimp <- function(data, covars){
pvals <- list()
l <- unique(covars)
for (i in l) {
cnew <- ifelse(covars == i, 1, 0)
resFn <- linregfn(data, as.factor(cnew))
pvals <- c(pvals, resFn$pvals['covars'])
}
allp <- linregfn(data, as.factor(covars))$pvals['covars']
minp <- which(unlist(pvals) == min(unlist(pvals)));
return(list(minp = minp, pval = pvals[minp],
lev = l[minp], allp = allp))
}