commit cda16c857e942a822df6b28f23518fc087bbc0f4 Author: Karsten Loesing karsten.loesing@gmx.net Date: Sat May 19 21:11:49 2018 +0200
Compute percentiles in connbidirect module using linear interpolation.
So far we computed percentiles in the connbidirect module using our own formula that picked existing values for median and quartiles.
Now we're using Apache Commons Math with method R_7 which uses linear interpolation when a percentile lies between two data points.
Implements the last part of #26035. --- .../metrics/stats/connbidirect/Main.java | 31 ++++++++-- .../metrics/stats/connbidirect/MainTest.java | 71 +++++++++++++++++++--- 2 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java b/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java index b2dc1b6..468ffba 100644 --- a/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java +++ b/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java @@ -8,6 +8,8 @@ import org.torproject.descriptor.DescriptorReader; import org.torproject.descriptor.DescriptorSourceFactory; import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile; + import org.slf4j.Logger; import org.slf4j.LoggerFactory;
@@ -22,7 +24,6 @@ import java.io.StringReader; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.SortedMap; @@ -455,19 +456,39 @@ public class Main { } } final String[] quantiles = new String[] { "0.25", "0.5", "0.75" }; - final int[] centiles = new int[] { 25, 50, 75 }; + final double[] centiles = new double[] { 25.0, 50.0, 75.0 }; for (Map.Entry<String, List<Short>> e : fractionsByDateAndDirection.entrySet()) { String dateAndDirection = e.getKey(); List<Short> fractions = e.getValue(); - Collections.sort(fractions); + SortedMap<Double, Short> computedPercentiles + = computePercentiles(fractions, centiles); for (int i = 0; i < quantiles.length; i++) { String dateDirectionAndQuantile = dateAndDirection + "," + quantiles[i]; - short fraction = fractions.get((centiles[i] * fractions.size()) - / 100); + short fraction = computedPercentiles.get(centiles[i]); aggregateStats.put(dateDirectionAndQuantile, fraction); } } } + + /** Compute percentiles (greater than 0.0 and smaller than or equal 100.0) for + * the given list of values, and return a map with percentiles as keys and + * computed values as values. If the list of values is empty, the returned map + * contains all zeros. */ + static SortedMap<Double, Short> computePercentiles(List<Short> valueList, + double ... percentiles) { + SortedMap<Double, Short> computedPercentiles = new TreeMap<>(); + double[] valueArray = new double[valueList.size()]; + for (int i = 0; i < valueList.size(); i++) { + valueArray[i] = valueList.get(i).doubleValue(); + } + Percentile percentile = new Percentile() + .withEstimationType(Percentile.EstimationType.R_7); + percentile.setData(valueArray); + for (double p : percentiles) { + computedPercentiles.put(p, (short) Math.floor(percentile.evaluate(p))); + } + return computedPercentiles; + } } diff --git a/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java b/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java index 255cccf..69001da 100644 --- a/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java +++ b/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java @@ -11,6 +11,8 @@ import org.junit.Test;
import java.io.ByteArrayOutputStream; import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; @@ -242,15 +244,15 @@ public class MainTest { @Test public void testUpdateAggregateStatsTwoRawStat() { SortedMap<String, Short> expectedAggregateStats = new TreeMap<>(); - expectedAggregateStats.put("2015-08-18,read,0.25", (short) 32); - expectedAggregateStats.put("2015-08-18,read,0.5", (short) 42); - expectedAggregateStats.put("2015-08-18,read,0.75", (short) 42); - expectedAggregateStats.put("2015-08-18,write,0.25", (short) 22); - expectedAggregateStats.put("2015-08-18,write,0.5", (short) 32); - expectedAggregateStats.put("2015-08-18,write,0.75", (short) 32); - expectedAggregateStats.put("2015-08-18,both,0.25", (short) 12); - expectedAggregateStats.put("2015-08-18,both,0.5", (short) 22); - expectedAggregateStats.put("2015-08-18,both,0.75", (short) 22); + expectedAggregateStats.put("2015-08-18,read,0.25", (short) 34); + expectedAggregateStats.put("2015-08-18,read,0.5", (short) 37); + expectedAggregateStats.put("2015-08-18,read,0.75", (short) 39); + expectedAggregateStats.put("2015-08-18,write,0.25", (short) 24); + expectedAggregateStats.put("2015-08-18,write,0.5", (short) 27); + expectedAggregateStats.put("2015-08-18,write,0.75", (short) 29); + expectedAggregateStats.put("2015-08-18,both,0.25", (short) 14); + expectedAggregateStats.put("2015-08-18,both,0.5", (short) 17); + expectedAggregateStats.put("2015-08-18,both,0.75", (short) 19); SortedSet<Main.RawStat> rawStats = new TreeSet<>(); rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 32, (short) 22, (short) 12)); @@ -258,4 +260,55 @@ public class MainTest { (short) 22)); assertStatsCanBeAggregated(expectedAggregateStats, rawStats); } + + @Test + public void testComputePercentilesZeroValues() { + List<Short> valueList = new ArrayList<>(); + SortedMap<Double, Short> computedPercentiles = Main.computePercentiles( + valueList, 25.0, 50.0, 75.0); + assertEquals(0, computedPercentiles.get(25.0).shortValue()); + assertEquals(0, computedPercentiles.get(50.0).shortValue()); + assertEquals(0, computedPercentiles.get(75.0).shortValue()); + } + + @Test + public void testComputePercentilesTenValues() { + List<Short> valueList = new ArrayList<>(); + valueList.add((short) 3); + valueList.add((short) 6); + valueList.add((short) 7); + valueList.add((short) 8); + valueList.add((short) 8); + valueList.add((short) 10); + valueList.add((short) 13); + valueList.add((short) 15); + valueList.add((short) 16); + valueList.add((short) 20); + SortedMap<Double, Short> computedPercentiles = Main.computePercentiles( + valueList, 25.0, 50.0, 75.0); + assertEquals(7, computedPercentiles.get(25.0).shortValue()); + assertEquals(9, computedPercentiles.get(50.0).shortValue()); + assertEquals(14, computedPercentiles.get(75.0).shortValue()); + } + + @Test + public void testComputePercentilesElevenValues() { + List<Short> valueList = new ArrayList<>(); + valueList.add((short) 3); + valueList.add((short) 6); + valueList.add((short) 7); + valueList.add((short) 8); + valueList.add((short) 8); + valueList.add((short) 9); + valueList.add((short) 10); + valueList.add((short) 13); + valueList.add((short) 15); + valueList.add((short) 16); + valueList.add((short) 20); + SortedMap<Double, Short> computedPercentiles = Main.computePercentiles( + valueList, 25.0, 50.0, 75.0); + assertEquals(7, computedPercentiles.get(25.0).shortValue()); + assertEquals(9, computedPercentiles.get(50.0).shortValue()); + assertEquals(14, computedPercentiles.get(75.0).shortValue()); + } }
tor-commits@lists.torproject.org