-
Notifications
You must be signed in to change notification settings - Fork 1
/
power_calculator_functions_farhat2014.R
executable file
·125 lines (114 loc) · 5 KB
/
power_calculator_functions_farhat2014.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Power calculator from:
# A phylogeny-based sampling strategy and power calculator informs genome-wide associations study design for microbial pathogens
#Farhat et al. Genome medicine 2014
# https://doi.org/10.1186/s13073-014-0101-7
######## Site Level Power Calculator Function
######## based on binomial distribution
######## Input Variables ##############
## k = Genome length in bp
## n = vector of sample size (number of strain pairs)
## s = vectors of pairwise distance between matched pairs
## np = nominal false positive rate (default 0.05) this will be bonferroni corrected
## f = vector of effect sizes to test (range 0-1)
####### Output variables ##############
## l = threshold number of mutations in ph+ vs ph- strains to call a site significant
## this is also the quantile of null distribution that protects the nominal fp rate
## power = true positive rate in percent
## fp = exact false positive rate at the threshold l
powerCalculatorSite <- function(k,s,f,n,np)
{
dimnames1<-list(NULL,c("n","s","l","f", "power","fp"))
powermatrixsite<-matrix(nrow=1, ncol=6, dimnames=dimnames1, data=NA)
if (missing(np)){
np<-0.05
}
for (e in f) {
for (m in n) {
for (r in s) {
ts<-m*r #number of snp sites to be tested assumes that k>>> n*s and so overlap between snps is unlikely under neutral evolution
fp<-np/ts ##nominal false positive rate bonferroni corrected
p<-r/k
q<-qbinom(fp,m,p, lower.tail=FALSE) ##quantile of null that protects nominal fp rate
efp<-pbinom(q,m,p, lower.tail=FALSE) ##actual fp rate
tp<-pbinom(q,m,e, lower.tail=FALSE) ##probability of true positive at the quantile given the alternative
powermatrixsite<-rbind(powermatrixsite, c(m, r, q, e, tp*100, efp))
}
}
}
powermatrixsite<-powermatrixsite[-1,]
return(as.data.frame(powermatrixsite))
}
######## Locus Level Power Calculator Function
######## based on poisson distribution
######## Input Variables ##############
## k = Genome length in bp
## n = vector of sample size (number of strain pairs)
## s = vectors of pairwise distance between matched pairs
## np = nominal false positive rate (default 0.05) this will be bonferroni corrected
## f = vector of effect sizes to test (range 0-1)
## gl = vector of locus lengths for the genome of interest
####### Output variables ##############
## l = threshold number of mutations in ph+ vs ph- strains to call a site significant
## this is also the quantile of null distribution that protects the nominal fp rate
## power = true positive rate in percent
## fp = exact false positive rate at the threshold l
powerCalculatorLocus <- function(k,s,f,n,gl,np)
{
gl<-sort(gl[[1]]) #sort loci by length
glq<-quantile(gl,probs=seq(0,1,0.01)) ##seperate loci into 100 quantiles
dimnames1<-list(NULL,c("n","s","l","f","power","fp"))
powermatrix<-matrix(nrow=1, ncol=6, dimnames=dimnames1, data=NA)
if (missing(np)){
np<-0.05
}
for (e in f) {
for (m in n) {
for (x in s) {
efp<-0
tp<-0
for (i in seq(2,length(glq),1)) {
g<-(glq[[i]]+glq[[i-1]])/2
p<-m*x*g/k
tl<-k*(1-exp(-p))/g #number of loci to be tested
fp<-0.05/tl ##bonferonni adjusted p for number of tests
q<-qpois(fp, p, lower.tail=FALSE)
tefp<-ppois(q,p, lower.tail=FALSE) ##actual fp rate
ttp<-ppois(q,m*e, lower.tail=FALSE) ##probability of true positive at the quantile given the alternative
efp<-efp+tefp*0.01 #integrating the false positive rate
tp<-tp+ttp*0.01 #integrating the true positive rate
}
powermatrix<-rbind(powermatrix, c(m, x, q, e,tp*100, efp))
}
}
}
powermatrix<-powermatrix[-1,]
return(as.data.frame(powermatrix))
}
##Plotting function for calculator results
## b vector to plot by options are f, s
## "nameby" name of the variable vector to plot by options are f, s
## fix vector of the variable to fix options are f, s
## "namefix" name of the variable to fix options are f, s
## value the numerical value to fix variable fix to eg s=0.20 ##plotter will return error if this value was not testing in the calculation
## level locus or site
plotPowerResults <-function(result,b,nameb, fix, namefix, value, level)
{
pm<-result[which(fix==value),]
b<-b[which(fix==value)]
plot(pm$n, pm$power, main=paste(level," Level Power by ", nameb, "\n", namefix, "=", value,sep=""), xlab="sample size n-pairs", ylab="power (percent)", type="n")
palette<-c("white", "black", "brown", "darkred", "red", "orange", "green", "blue")
cl<-colors()
cl<-cl[-c(153:252)]
v<-unique(b)
for (i in 1:length(v)) {
pm1<-pm[b==v[i],]
if (i>7) {
ii<-i*5
} else {
ii<-i
}
palette<-c(palette, cl[ii])
lines(pm1$n, pm1$power, col=palette[i+1])
}
legend(x=240, y=50, legend=c(paste(nameb, sep=""),v), col=palette, lty=1, cex=0.75, seg.len=0.6)
}