Skip to content

Commit

Permalink
Add visualization for linear vs. weighted interpolation
Browse files Browse the repository at this point in the history
  • Loading branch information
pyckle committed Jun 13, 2018
1 parent 2c8776a commit 6f5c685
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 0 deletions.
Binary file not shown.
126 changes: 126 additions & 0 deletions docs/t-digest-paper/weighted-vs-linear-interpolation.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
### Compare linear interpolation of centroids to weighted interpolation ###
fade = rgb(0,0,0,alpha=0.5)
dot.size = 0.7
n = 10000
set.seed(5)

pdf("weighted-vs-linear-interpolation.pdf", width=6, height=2.7, pointsize=10)
layout(matrix(c(1,2),byrow=T, ncol=2), widths=c(1.1,1))
x = sort(log(1-runif(n))) # sorted exponential distribution
F = ((0:(n-1))+0.5)/n # the y points for an x point to its percentile
par(mar=c(2.5,3,1,1))
plot(x, F, cex=dot.size, pch=21, bg=fade, col=NA, type='b', xlim=c(x[1], x[110]), ylim=c(0,0.01), xaxt='n', ylab=NA, mgp=c(1,0.5,0), xlab=NA)

axis(side=1, at=-10:-1, labels=NA)
title(xlab='x', line=0.8, cex.lab=1.5)
title(ylab='q', line=1.5, cex.lab=1.5)

left.end = min(x)

lines(c(left.end, x[100]), c(0, 0.01), lwd=2)
lines(c(left.end, left.end), c(-0.0005, 0.0005), lt=1, col='black', lwd=0.5)
lines(c(x[100], x[100]), c(0.0085, 0.015), lt=1, col='black', lwd=0.5)
text(-7, 0.006, "100")

q.to.k = function(q) {
(asin(2*q-1)/pi + 1/2)
}

k.to.q = function(k,compression) {
sin(k/compression*pi - pi/2)/2 + 0.5
}

makeChart = function(weights, titleToDisp, drawFunc, rangeMin=1, rangeMax=round(1.1*n/100)) {
xLimits = c(x[rangeMin], x[rangeMax])
yLimits = c((rangeMin-1)/n, rangeMax/n)
plot(x, F, cex=dot.size, pch=21, bg=fade, col=NA, type='b', xlim=xLimits, ylim=yLimits, xaxt='n')
title(main=titleToDisp)
axis(side=1, at=-10:-1, labels=NA)
axis(side=2, at=(0:6)/10, labels=NA)
title(xlab='x', line=0.8, cex.lab=1.5)
title(ylab='q', line=2, cex.lab=1.5)

xCoordinatesEnds = c(1, cumsum(weights), n)
xCoordinates = numeric(length(xCoordinatesEnds))
xCoordinates[1]=x[1]
for(i in 2:length(xCoordinates)) {
xCoordinates[i] = mean(x[xCoordinatesEnds[i-1]:xCoordinatesEnds[i]])
}
yCoordinates = c(0, cumsum(weights)- weights / 2, n) / sum(weights)

drawFunc(xCoordinates, yCoordinates, c(1,weights,1))
text(xCoordinates, yCoordinates + (rangeMax-rangeMin)*.06/n, round(c(1, weights, 1)))
}

drawLinear = function(xCoordinates, yCoordinates, weights) {
lines(xCoordinates, yCoordinates, type='o', lwd=1, col='blue')
}
drawWeighted = function(xCoordinates, yCoordinates, weights) {
points(xCoordinates, yCoordinates, col='blue')
for(i in 1:(length(xCoordinates)-1)) {
w1 = weights[i]
w2 = weights[i+1]
x1 = xCoordinates[i]
x2 = xCoordinates[i+1]
mean1 = yCoordinates[i]
mean2 = yCoordinates[i+1]

curve((mean1*w1*(x2-x)+mean2*w2*(x-x1))/(w1*(x2-x)+w2*(x-x1)), x1, x2, add=TRUE, col="blue")
}
}

getIdealBounds = function(compression) {
leftBounds = c(0, k.to.q(1:(compression-1), compression))
rightBounds = k.to.q(1:compression, compression)
(rightBounds - leftBounds) * n
}

weights = c(2, 8, 19, 35, 56, 81, 111)

makeChart(c(weights, n-sum(weights)), "Preset Weights", drawLinear)
makeChart(c(weights, n-sum(weights)), "Preset Weights", drawLinear)
makeChart(c(weights, n-sum(weights)), "Preset Weights", drawWeighted)

drawExampleCharts = function(distName) {
for (i in c(30,50,100,200)) {
titleToDisp = paste(distName," c=", i)
makeChart(getIdealBounds(i), titleToDisp, drawLinear)
makeChart(getIdealBounds(i), titleToDisp, drawWeighted)
}

midLen = n/20
titleToDisp = paste(distName," c=", 100)
makeChart(getIdealBounds(100), titleToDisp, drawLinear, rangeMin=round(n/2-midLen), rangeMax=round(n/2+1000))
makeChart(getIdealBounds(100), titleToDisp, drawWeighted, rangeMin=round(n/2-midLen), rangeMax=round(n/2+1000))

makeChart(getIdealBounds(100), titleToDisp, drawLinear, rangeMin=round(midLen/2), rangeMax=midLen)
makeChart(getIdealBounds(100), titleToDisp, drawWeighted, rangeMin=round(midLen/2), rangeMax=midLen)

bottomPercent = (n*1.1)/100
makeChart(getIdealBounds(100), titleToDisp, drawLinear, rangeMin=n-bottomPercent, rangeMax=n)
makeChart(getIdealBounds(100), titleToDisp, drawWeighted, rangeMin=n-bottomPercent, rangeMax=n)
}

drawExampleCharts(paste("Exp n=",n))

n = 50000
x = sort(log(1-runif(n))) # sorted exponential distribution
F = ((0:(n-1))+0.5)/n # the y points for an x point to its percentile
drawExampleCharts(paste("Exp n=",n))

n = 10000
x = sort(rnorm(n)) # sorted normal distribution
F = ((0:(n-1))+0.5)/n # the y points for an x point to its percentile
drawExampleCharts(paste("Normal n=",n))

n = 50000
x = sort(rnorm(n)) # sorted normal distribution
F = ((0:(n-1))+0.5)/n # the y points for an x point to its percentile
drawExampleCharts(paste("Normal n=",n))

n = 50000
x = sort(runif(n)) # sorted uniform distribution
F = ((0:(n-1))+0.5)/n # the y points for an x point to its percentile
drawExampleCharts(paste("Uniform n=",n))

dev.off()

0 comments on commit 6f5c685

Please sign in to comment.