[tor-commits] [metrics-web/master] Compute percentiles in connbidirect module using linear interpolation.

karsten at torproject.org karsten at torproject.org
Thu Oct 25 09:33:04 UTC 2018


commit cda16c857e942a822df6b28f23518fc087bbc0f4
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sat May 19 21:11:49 2018 +0200

    Compute percentiles in connbidirect module using linear interpolation.
    
    So far we computed percentiles in the connbidirect module using our
    own formula that picked existing values for median and quartiles.
    
    Now we're using Apache Commons Math with method R_7 which uses linear
    interpolation when a percentile lies between two data points.
    
    Implements the last part of #26035.
---
 .../metrics/stats/connbidirect/Main.java           | 31 ++++++++--
 .../metrics/stats/connbidirect/MainTest.java       | 71 +++++++++++++++++++---
 2 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java b/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java
index b2dc1b6..468ffba 100644
--- a/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java
@@ -8,6 +8,8 @@ import org.torproject.descriptor.DescriptorReader;
 import org.torproject.descriptor.DescriptorSourceFactory;
 import org.torproject.descriptor.ExtraInfoDescriptor;
 
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -22,7 +24,6 @@ import java.io.StringReader;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
@@ -455,19 +456,39 @@ public class Main {
       }
     }
     final String[] quantiles = new String[] { "0.25", "0.5", "0.75" };
-    final int[] centiles = new int[] { 25, 50, 75 };
+    final double[] centiles = new double[] { 25.0, 50.0, 75.0 };
     for (Map.Entry<String, List<Short>> e
         : fractionsByDateAndDirection.entrySet()) {
       String dateAndDirection = e.getKey();
       List<Short> fractions = e.getValue();
-      Collections.sort(fractions);
+      SortedMap<Double, Short> computedPercentiles
+          = computePercentiles(fractions, centiles);
       for (int i = 0; i < quantiles.length; i++) {
         String dateDirectionAndQuantile = dateAndDirection + ","
             + quantiles[i];
-        short fraction = fractions.get((centiles[i] * fractions.size())
-            / 100);
+        short fraction = computedPercentiles.get(centiles[i]);
         aggregateStats.put(dateDirectionAndQuantile, fraction);
       }
     }
   }
+
+  /** Compute percentiles (greater than 0.0 and smaller than or equal 100.0) for
+   * the given list of values, and return a map with percentiles as keys and
+   * computed values as values. If the list of values is empty, the returned map
+   * contains all zeros. */
+  static SortedMap<Double, Short> computePercentiles(List<Short> valueList,
+      double ... percentiles) {
+    SortedMap<Double, Short> computedPercentiles = new TreeMap<>();
+    double[] valueArray = new double[valueList.size()];
+    for (int i = 0; i < valueList.size(); i++) {
+      valueArray[i] = valueList.get(i).doubleValue();
+    }
+    Percentile percentile = new Percentile()
+        .withEstimationType(Percentile.EstimationType.R_7);
+    percentile.setData(valueArray);
+    for (double p : percentiles) {
+      computedPercentiles.put(p, (short) Math.floor(percentile.evaluate(p)));
+    }
+    return computedPercentiles;
+  }
 }
diff --git a/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java b/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java
index 255cccf..69001da 100644
--- a/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java
+++ b/src/test/java/org/torproject/metrics/stats/connbidirect/MainTest.java
@@ -11,6 +11,8 @@ import org.junit.Test;
 
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.SortedMap;
 import java.util.SortedSet;
 import java.util.TreeMap;
@@ -242,15 +244,15 @@ public class MainTest {
   @Test
   public void testUpdateAggregateStatsTwoRawStat() {
     SortedMap<String, Short> expectedAggregateStats = new TreeMap<>();
-    expectedAggregateStats.put("2015-08-18,read,0.25", (short) 32);
-    expectedAggregateStats.put("2015-08-18,read,0.5", (short) 42);
-    expectedAggregateStats.put("2015-08-18,read,0.75", (short) 42);
-    expectedAggregateStats.put("2015-08-18,write,0.25", (short) 22);
-    expectedAggregateStats.put("2015-08-18,write,0.5", (short) 32);
-    expectedAggregateStats.put("2015-08-18,write,0.75", (short) 32);
-    expectedAggregateStats.put("2015-08-18,both,0.25", (short) 12);
-    expectedAggregateStats.put("2015-08-18,both,0.5", (short) 22);
-    expectedAggregateStats.put("2015-08-18,both,0.75", (short) 22);
+    expectedAggregateStats.put("2015-08-18,read,0.25", (short) 34);
+    expectedAggregateStats.put("2015-08-18,read,0.5", (short) 37);
+    expectedAggregateStats.put("2015-08-18,read,0.75", (short) 39);
+    expectedAggregateStats.put("2015-08-18,write,0.25", (short) 24);
+    expectedAggregateStats.put("2015-08-18,write,0.5", (short) 27);
+    expectedAggregateStats.put("2015-08-18,write,0.75", (short) 29);
+    expectedAggregateStats.put("2015-08-18,both,0.25", (short) 14);
+    expectedAggregateStats.put("2015-08-18,both,0.5", (short) 17);
+    expectedAggregateStats.put("2015-08-18,both,0.75", (short) 19);
     SortedSet<Main.RawStat> rawStats = new TreeSet<>();
     rawStats.add(new Main.RawStat(DATE_A, FPR_A, (short) 32, (short) 22,
         (short) 12));
@@ -258,4 +260,55 @@ public class MainTest {
         (short) 22));
     assertStatsCanBeAggregated(expectedAggregateStats, rawStats);
   }
+
+  @Test
+  public void testComputePercentilesZeroValues() {
+    List<Short> valueList = new ArrayList<>();
+    SortedMap<Double, Short> computedPercentiles = Main.computePercentiles(
+        valueList, 25.0, 50.0, 75.0);
+    assertEquals(0, computedPercentiles.get(25.0).shortValue());
+    assertEquals(0, computedPercentiles.get(50.0).shortValue());
+    assertEquals(0, computedPercentiles.get(75.0).shortValue());
+  }
+
+  @Test
+  public void testComputePercentilesTenValues() {
+    List<Short> valueList = new ArrayList<>();
+    valueList.add((short) 3);
+    valueList.add((short) 6);
+    valueList.add((short) 7);
+    valueList.add((short) 8);
+    valueList.add((short) 8);
+    valueList.add((short) 10);
+    valueList.add((short) 13);
+    valueList.add((short) 15);
+    valueList.add((short) 16);
+    valueList.add((short) 20);
+    SortedMap<Double, Short> computedPercentiles = Main.computePercentiles(
+        valueList, 25.0, 50.0, 75.0);
+    assertEquals(7, computedPercentiles.get(25.0).shortValue());
+    assertEquals(9, computedPercentiles.get(50.0).shortValue());
+    assertEquals(14, computedPercentiles.get(75.0).shortValue());
+  }
+
+  @Test
+  public void testComputePercentilesElevenValues() {
+    List<Short> valueList = new ArrayList<>();
+    valueList.add((short) 3);
+    valueList.add((short) 6);
+    valueList.add((short) 7);
+    valueList.add((short) 8);
+    valueList.add((short) 8);
+    valueList.add((short) 9);
+    valueList.add((short) 10);
+    valueList.add((short) 13);
+    valueList.add((short) 15);
+    valueList.add((short) 16);
+    valueList.add((short) 20);
+    SortedMap<Double, Short> computedPercentiles = Main.computePercentiles(
+        valueList, 25.0, 50.0, 75.0);
+    assertEquals(7, computedPercentiles.get(25.0).shortValue());
+    assertEquals(9, computedPercentiles.get(50.0).shortValue());
+    assertEquals(14, computedPercentiles.get(75.0).shortValue());
+  }
 }



More information about the tor-commits mailing list