Skip to content

Commit

Permalink
Interpolate merging digest percentiles by centroid weight
Browse files Browse the repository at this point in the history
  • Loading branch information
pyckle committed Jun 13, 2018
1 parent b2d0bd2 commit 08f3679
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 8 deletions.
29 changes: 24 additions & 5 deletions core/src/main/java/com/tdunning/math/stats/MergingDigest.java
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,9 @@ public double quantile(double q) {
// at the boundaries, we return min or max
if (index < weight[0] / 2) {
assert weight[0] > 0;
return min + 2 * index / weight[0] * (mean[0] - min);
double z1 = (index / totalWeight) * weight[0];
double z2 = ((weight[0] / 2 - index) / totalWeight) * getWeightForMinOrMax(weight[0]);
return weightedAverage(min, z2, mean[0], z1);
}

// in between we interpolate between centroids
Expand All @@ -687,8 +689,8 @@ public double quantile(double q) {
double dw = (weight[i] + weight[i + 1]) / 2;
if (weightSoFar + dw > index) {
// centroids i and i+1 bracket our current point
double z1 = index - weightSoFar;
double z2 = weightSoFar + dw - index;
double z1 = ((index - weightSoFar) / totalWeight) * weight[i + 1];
double z2 = ((weightSoFar + dw - index) / totalWeight) * weight[i];
return weightedAverage(mean[i], z2, mean[i + 1], z1);
}
weightSoFar += dw;
Expand All @@ -698,11 +700,28 @@ public double quantile(double q) {

// weightSoFar = totalWeight - weight[n-1]/2 (very nearly)
// so we interpolate out to max value ever seen
double z1 = index - totalWeight - weight[n - 1] / 2.0;
double z2 = weight[n - 1] / 2 - z1;
double z1 = ((totalWeight - index) / totalWeight) * weight[n - 1];
double z2 = (weight[n - 1] / 2 - z1) * getWeightForMinOrMax(weight[n - 1]);
return weightedAverage(mean[n - 1], z1, max, z2);
}

private double getWeightForMinOrMax(double maxWeight) {
return Math.min(maxWeight, centroidScaleToQuantile(1 / compression));
}

/**
* Takes a centroid scale (the centroid number) and returns the ideal
* starting percentile for it.
*
* @param centroidNumber the number of the centroid
* @return the ideal starting percentile for this centroid
*/
private double centroidScaleToQuantile(double centroidNumber) {
double k = centroidNumber / compression;
double sinePrecalc = Math.sin((k*Math.PI)/2);
return sinePrecalc * sinePrecalc;
}

@Override
public int centroidCount() {
return lastUsedCell;
Expand Down
26 changes: 23 additions & 3 deletions core/src/test/java/com/tdunning/math/stats/MergingDigestTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public void printQuantiles() throws FileNotFoundException {
td.setMinMax(0, 10);
td.add(1);
td.add(2);
td.add(5, 2);
td.add(5);
td.add(6);
td.add(9);
td.add(10);
Expand All @@ -132,7 +132,7 @@ public void printQuantiles() throws FileNotFoundException {

quantiles.printf("x,q\n");
cdfs.printf("x,q\n");
for (double q = 0; q < 1; q += 1e-3) {
for (double q = 1.0 / 12.0; q < 1; q += 1e-3) {
double x = td.quantile(q);
quantiles.printf("%.3f,%.3f\n", x, q);

Expand All @@ -145,7 +145,27 @@ public void printQuantiles() throws FileNotFoundException {
}
}

assertEquals(2.0 / 7, td.cdf(3), 1e-9);
assertEquals(0.25 + 1.0/18.0, td.cdf(3), 1e-9);
}

@Test
public void testCloseTo1QuantileBigWeights() {
MergingDigest td = new MergingDigest(200);
td.add(1, 1);
td.add(2, 100);
td.setMinMax(0, 1000);
final double quantile = td.quantile(.999);
assertTrue("Quantile val incorrect: " + quantile, quantile < 101);
}

@Test
public void testCloseToZeroQuantileBigWeights() {
MergingDigest td = new MergingDigest(200);
td.add(1000, 100);
td.add(1001, 1);
td.setMinMax(0, 1002);
final double quantile = td.quantile(.001);
assertTrue("Quantile val incorrect: " + quantile, quantile > 999);
}

@Override
Expand Down

0 comments on commit 08f3679

Please sign in to comment.