commit dda2b51eaa6efdd871d5190f4b86fad008beee42 Author: Karsten Loesing karsten.loesing@gmx.net Date: Sat May 19 21:08:42 2018 +0200
Compute percentiles in advbwdist module using linear interpolation.
So far we computed percentiles in the advbwdist module using our own formula that picked existing values for the various percentiles we provided.
Now we're using Apache Commons Math with method R_7 which uses linear interpolation when a percentile lies between two data points.
Implements another part of #26035. --- build.xml | 1 + .../torproject/metrics/stats/advbwdist/Main.java | 49 ++++++++++++--- .../metrics/stats/advbwdist/MainTest.java | 72 ++++++++++++++++++++++ 3 files changed, 112 insertions(+), 10 deletions(-)
diff --git a/build.xml b/build.xml index 3614d78..b498ea8 100644 --- a/build.xml +++ b/build.xml @@ -60,6 +60,7 @@ <patternset refid="common" /> <include name="metrics-lib-${metricslibversion}.jar"/> <include name="commons-compress-1.13.jar"/> + <include name="commons-math3-3.6.1.jar"/> <include name="postgresql-9.4.1212.jar"/> <include name="servlet-api-3.1.jar"/> <include name="xz-1.6.jar"/> diff --git a/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java b/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java index a16e7b0..7216581 100644 --- a/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java +++ b/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java @@ -10,6 +10,8 @@ import org.torproject.descriptor.NetworkStatusEntry; import org.torproject.descriptor.RelayNetworkStatusConsensus; import org.torproject.descriptor.ServerDescriptor;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile; + import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; @@ -20,7 +22,9 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.SortedMap; import java.util.TimeZone; +import java.util.TreeMap;
public class Main {
@@ -115,29 +119,54 @@ public class Main { }
/* Write advertised bandwidth percentiles of relays/exits. */ - Collections.sort(advertisedBandwidthsAllRelays); - Collections.sort(advertisedBandwidthsExitsOnly); int[] percentiles = new int[] { 0, 1, 2, 3, 5, 9, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 91, 95, 97, 98, 99, 100 }; if (!advertisedBandwidthsAllRelays.isEmpty()) { - for (int percentile : percentiles) { + for (Map.Entry<Integer, Long> e : computePercentiles( + advertisedBandwidthsAllRelays, percentiles).entrySet()) { bw.write(String.format("%s,,,%d,%d%n", validAfter, - percentile, advertisedBandwidthsAllRelays.get( - ((advertisedBandwidthsAllRelays.size() - 1) - * percentile) / 100))); + e.getKey(), e.getValue())); } } if (!advertisedBandwidthsExitsOnly.isEmpty()) { - for (int percentile : percentiles) { + for (Map.Entry<Integer, Long> e : computePercentiles( + advertisedBandwidthsExitsOnly, percentiles).entrySet()) { bw.write(String.format("%s,TRUE,,%d,%d%n", validAfter, - percentile, advertisedBandwidthsExitsOnly.get( - ((advertisedBandwidthsExitsOnly.size() - 1) - * percentile) / 100))); + e.getKey(), e.getValue())); } } } descriptorReader.saveHistoryFile(historyFile); bw.close(); } + + /** Compute percentiles (between 0 and 100) for the given list of values, and + * return a map with percentiles as keys and computed values as values. If the + * list of values is empty, the returned map contains all zeros. */ + static SortedMap<Integer, Long> computePercentiles( + List<Long> valueList, int ... percentiles) { + SortedMap<Integer, Long> computedPercentiles = new TreeMap<>(); + double[] valueArray = new double[valueList.size()]; + long minValue = Long.MAX_VALUE; + for (int i = 0; i < valueList.size(); i++) { + valueArray[i] = valueList.get(i).doubleValue(); + minValue = Math.min(minValue, valueList.get(i)); + } + if (valueList.isEmpty()) { + minValue = 0L; + } + Percentile percentile = new Percentile() + .withEstimationType(Percentile.EstimationType.R_7); + percentile.setData(valueArray); + for (int p : percentiles) { + if (0 == p) { + computedPercentiles.put(p, minValue); + } else { + computedPercentiles.put(p, + (long) Math.floor(percentile.evaluate((double) p))); + } + } + return computedPercentiles; + } }
diff --git a/src/test/java/org/torproject/metrics/stats/advbwdist/MainTest.java b/src/test/java/org/torproject/metrics/stats/advbwdist/MainTest.java new file mode 100644 index 0000000..92752ef --- /dev/null +++ b/src/test/java/org/torproject/metrics/stats/advbwdist/MainTest.java @@ -0,0 +1,72 @@ +/* Copyright 2018 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.stats.advbwdist; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; + +public class MainTest { + + @Test + public void testComputePercentilesZeroValues() { + List<Long> valueList = new ArrayList<>(); + SortedMap<Integer, Long> computedPercentiles = Main.computePercentiles( + valueList, 0, 25, 50, 75, 100); + assertEquals(0L, (long) computedPercentiles.get(0)); + assertEquals(0L, (long) computedPercentiles.get(25)); + assertEquals(0L, (long) computedPercentiles.get(50)); + assertEquals(0L, (long) computedPercentiles.get(75)); + assertEquals(0L, (long) computedPercentiles.get(100)); + } + + @Test + public void testComputePercentilesTenValues() { + List<Long> valueList = new ArrayList<>(); + valueList.add(3L); + valueList.add(6L); + valueList.add(7L); + valueList.add(8L); + valueList.add(8L); + valueList.add(10L); + valueList.add(13L); + valueList.add(15L); + valueList.add(16L); + valueList.add(20L); + SortedMap<Integer, Long> computedPercentiles = Main.computePercentiles( + valueList, 0, 25, 50, 75, 100); + assertEquals(3L, (long) computedPercentiles.get(0)); + assertEquals(7L, (long) computedPercentiles.get(25)); + assertEquals(9L, (long) computedPercentiles.get(50)); + assertEquals(14L, (long) computedPercentiles.get(75)); + assertEquals(20L, (long) computedPercentiles.get(100)); + } + + @Test + public void testComputePercentilesElevenValues() { + List<Long> valueList = new ArrayList<>(); + valueList.add(3L); + valueList.add(6L); + valueList.add(7L); + valueList.add(8L); + valueList.add(8L); + valueList.add(9L); + valueList.add(10L); + valueList.add(13L); + valueList.add(15L); + valueList.add(16L); + valueList.add(20L); + SortedMap<Integer, Long> computedPercentiles = Main.computePercentiles( + valueList, 0, 25, 50, 75, 100); + assertEquals(3L, (long) computedPercentiles.get(0)); + assertEquals(7L, (long) computedPercentiles.get(25)); + assertEquals(9L, (long) computedPercentiles.get(50)); + assertEquals(14L, (long) computedPercentiles.get(75)); + assertEquals(20L, (long) computedPercentiles.get(100)); + } +}